diff options
Diffstat (limited to 'tools')
178 files changed, 5327 insertions, 1202 deletions
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index af05f8e0903e..6ebd3e6a1fd1 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -181,10 +181,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* KVM_IRQ_LINE irq field index values */ diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 3051f86a9b5f..c2860358ae3e 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -201,10 +201,23 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* Device Control API on vcpu fd */ diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 3603b6f51b11..4edbe4bb0e8b 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -413,6 +413,26 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* For KVM_PPC_CONFIGURE_V3_MMU */ +struct kvm_ppc_mmuv3_cfg { + __u64 flags; + __u64 process_table; /* second doubleword of partition table entry */ +}; + +/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */ +#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */ +#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */ + +/* For KVM_PPC_GET_RMMU_INFO */ +struct kvm_ppc_rmmu_info { + struct kvm_ppc_radix_geom { + __u8 page_shift; + __u8 level_bits[4]; + __u8 pad[3]; + } geometries[8]; + __u32 ap_encodings[8]; +}; + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) @@ -613,5 +633,7 @@ struct kvm_get_htab_header { #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) #define KVM_XICS_MASKED (1ULL << 41) #define KVM_XICS_PENDING (1ULL << 42) +#define KVM_XICS_PRESENTED (1ULL << 43) +#define KVM_XICS_QUEUED (1ULL << 44) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h index 059e33e94260..328eeceec709 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -7,6 +7,8 @@ #define LOCK_PREFIX "\n\tlock; " +#include <asm/cmpxchg.h> + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. @@ -62,4 +64,9 @@ static inline int atomic_dec_and_test(atomic_t *v) GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } +static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} + #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */ diff --git a/tools/arch/x86/include/asm/cmpxchg.h b/tools/arch/x86/include/asm/cmpxchg.h new file mode 100644 index 000000000000..f5253260f3cc --- /dev/null +++ b/tools/arch/x86/include/asm/cmpxchg.h @@ -0,0 +1,89 @@ +#ifndef TOOLS_ASM_X86_CMPXCHG_H +#define TOOLS_ASM_X86_CMPXCHG_H + +#include <linux/compiler.h> + +/* + * Non-existant functions to indicate usage errors at link time + * (or compile-time if the compiler implements __compiletime_error(). + */ +extern void __cmpxchg_wrong_size(void) + __compiletime_error("Bad argument size for cmpxchg"); + +/* + * Constants for operation sizes. On 32-bit, the 64-bit size it set to + * -1 because sizeof will never return -1, thereby making those switch + * case statements guaranteeed dead code which the compiler will + * eliminate, and allowing the "missing symbol in the default case" to + * indicate a usage error. + */ +#define __X86_CASE_B 1 +#define __X86_CASE_W 2 +#define __X86_CASE_L 4 +#ifdef __x86_64__ +#define __X86_CASE_Q 8 +#else +#define __X86_CASE_Q -1 /* sizeof will never return -1 */ +#endif + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "q" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ + asm volatile(lock "cmpxchgq %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define cmpxchg(ptr, old, new) \ + __cmpxchg(ptr, old, new, sizeof(*(ptr))) + + +#endif /* TOOLS_ASM_X86_CMPXCHG_H */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 293149a1c6a1..b04bb6dfed7f 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -100,7 +100,7 @@ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ @@ -186,7 +186,7 @@ * * Reuse free bits when adding new feature flags! */ - +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ @@ -289,7 +289,8 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ @@ -321,5 +322,4 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ - #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index e3fb5ecbdcb6..523911f316ce 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -63,6 +63,7 @@ FEATURE_TESTS_BASIC := \ lzma \ get_cpuid \ bpf \ + sched_getcpu \ sdt # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index b564a2eea039..09c9626ea666 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -48,12 +48,13 @@ FILES= \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ - test-jvmti.bin + test-jvmti.bin \ + test-sched_getcpu.bin FILES := $(addprefix $(OUTPUT),$(FILES)) -CC := $(CROSS_COMPILE)gcc -MD -CXX := $(CROSS_COMPILE)g++ -MD +CC ?= $(CROSS_COMPILE)gcc -MD +CXX ?= $(CROSS_COMPILE)g++ -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config @@ -91,6 +92,9 @@ $(OUTPUT)test-libelf.bin: $(OUTPUT)test-glibc.bin: $(BUILD) +$(OUTPUT)test-sched_getcpu.bin: + $(BUILD) + DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 699e43627397..cc6c7c01f4ca 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -117,6 +117,10 @@ # include "test-pthread-attr-setaffinity-np.c" #undef main +#define main main_test_sched_getcpu +# include "test-sched_getcpu.c" +#undef main + # if 0 /* * Disable libbabeltrace check for test-all, because the requested @@ -182,6 +186,7 @@ int main(int argc, char *argv[]) main_test_get_cpuid(); main_test_bpf(); main_test_libcrypto(); + main_test_sched_getcpu(); main_test_sdt(); return 0; diff --git a/tools/build/feature/test-sched_getcpu.c b/tools/build/feature/test-sched_getcpu.c new file mode 100644 index 000000000000..c4a148dd7104 --- /dev/null +++ b/tools/build/feature/test-sched_getcpu.c @@ -0,0 +1,7 @@ +#define _GNU_SOURCE +#include <sched.h> + +int main(void) +{ + return sched_getcpu(); +} diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 2ba78c9f5701..5e9738f97bf3 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -60,4 +60,12 @@ static inline int atomic_dec_and_test(atomic_t *v) return __sync_sub_and_fetch(&v->counter, 1) == 0; } +#define cmpxchg(ptr, oldval, newval) \ + __sync_val_compare_and_swap(ptr, oldval, newval) + +static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval) +{ + return cmpxchg(&(v)->counter, oldval, newval); +} + #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */ diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 4e3d3d18ebab..9f21fc2b092b 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -3,4 +3,10 @@ #include <asm/atomic.h> +/* atomic_cmpxchg_relaxed */ +#ifndef atomic_cmpxchg_relaxed +#define atomic_cmpxchg_relaxed atomic_cmpxchg +#define atomic_cmpxchg_release atomic_cmpxchg +#endif /* atomic_cmpxchg_relaxed */ + #endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 48af2f10a42d..616935f1ff56 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -12,3 +12,7 @@ #if GCC_VERSION >= 70000 && !defined(__CHECKER__) # define __fallthrough __attribute__ ((fallthrough)) #endif + +#if GCC_VERSION >= 40300 +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* GCC_VERSION >= 40300 */ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 8de163b17c0d..c9e65e8faacd 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -5,6 +5,10 @@ #include <linux/compiler-gcc.h> #endif +#ifndef __compiletime_error +# define __compiletime_error(message) +#endif + /* Optimization barrier */ /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 28607db02bd3..adb4d0147755 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -5,6 +5,10 @@ #include <stddef.h> #include <assert.h> +#ifndef UINT_MAX +#define UINT_MAX (~0U) +#endif + #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h index 41446668ccce..d5677d39c1e4 100644 --- a/tools/include/linux/log2.h +++ b/tools/include/linux/log2.h @@ -13,12 +13,6 @@ #define _TOOLS_LINUX_LOG2_H /* - * deal with unrepresentable constant logarithms - */ -extern __attribute__((const, noreturn)) -int ____ilog2_NaN(void); - -/* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented * more efficiently than using fls() and fls64() @@ -78,7 +72,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n) < 1 ? ____ilog2_NaN() : \ + (n) < 2 ? 0 : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ @@ -141,10 +135,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - (n) & (1ULL << 1) ? 1 : \ - (n) & (1ULL << 0) ? 0 : \ - ____ilog2_NaN() \ - ) : \ + 1 ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h new file mode 100644 index 000000000000..a0177c1f55b1 --- /dev/null +++ b/tools/include/linux/refcount.h @@ -0,0 +1,151 @@ +#ifndef _TOOLS_LINUX_REFCOUNT_H +#define _TOOLS_LINUX_REFCOUNT_H + +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * It differs in that the counter saturates at UINT_MAX and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free issues. + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + */ + +#include <linux/atomic.h> +#include <linux/kernel.h> + +#ifdef NDEBUG +#define REFCOUNT_WARN(cond, str) (void)(cond) +#define __refcount_check +#else +#define REFCOUNT_WARN(cond, str) BUG_ON(cond) +#define __refcount_check __must_check +#endif + +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } + +static inline void refcount_set(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + +static inline unsigned int refcount_read(const refcount_t *r) +{ + return atomic_read(&r->refs); +} + +/* + * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + */ +static inline __refcount_check +bool refcount_inc_not_zero(refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + new = val + 1; + + if (!val) + return false; + + if (unlikely(!new)) + return true; + + old = atomic_cmpxchg_relaxed(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +/* + * Similar to atomic_inc(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object, will WARN when this is not so. + */ +static inline void refcount_inc(refcount_t *r) +{ + REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); +} + +/* + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_sub_and_test(unsigned int i, refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (unlikely(val == UINT_MAX)) + return false; + + new = val - i; + if (new > val) { + REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + return false; + } + + old = atomic_cmpxchg_release(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + return !new; +} + +static inline __refcount_check +bool refcount_dec_and_test(refcount_t *r) +{ + return refcount_sub_and_test(1, r); +} + + +#endif /* _ATOMIC_LINUX_REFCOUNT_H */ diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h new file mode 100644 index 000000000000..067427259820 --- /dev/null +++ b/tools/include/uapi/linux/bpf_perf_event.h @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ +#define _UAPI__LINUX_BPF_PERF_EVENT_H__ + +#include <linux/types.h> +#include <linux/ptrace.h> + +struct bpf_perf_event_data { + struct pt_regs regs; + __u64 sample_period; +}; + +#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index c66a485a24ac..d09a9cd021b1 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events; /* wakeup every n events */ @@ -610,6 +611,23 @@ struct perf_event_header { __u16 size; }; +struct perf_ns_link_info { + __u64 dev; + __u64 ino; +}; + +enum { + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +880,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * u64 nr_namespaces; + * { u64 dev, inode; } [nr_namespaces]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -885,6 +915,7 @@ enum perf_callchain_context { */ #define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ #define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 11c8d9bc762e..5d19fdf80292 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -1387,7 +1387,7 @@ static bool pci_data_iowrite(u16 port, u32 mask, u32 val) /* Allow writing to any other BAR, or expansion ROM */ iowrite(portoff, val, mask, &d->config_words[reg]); return true; - /* We let them overide latency timer and cacheline size */ + /* We let them override latency timer and cacheline size */ } else if (&d->config_words[reg] == (void *)&d->config.cacheline_size) { /* Only let them change the first two fields. */ if (mask == 0xFFFFFFFF) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 4b6bfc43cccf..809c7721cd24 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -439,6 +439,35 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep) return filename__read_str(path, buf, sizep); } +int sysfs__read_bool(const char *entry, bool *value) +{ + char *buf; + size_t size; + int ret; + + ret = sysfs__read_str(entry, &buf, &size); + if (ret < 0) + return ret; + + switch (buf[0]) { + case '1': + case 'y': + case 'Y': + *value = true; + break; + case '0': + case 'n': + case 'N': + *value = false; + break; + default: + ret = -1; + } + + free(buf); + + return ret; +} int sysctl__read_int(const char *sysctl, int *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 6b332dc74498..956c21127d1e 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -37,4 +37,5 @@ int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); int sysfs__read_str(const char *entry, char **buf, size_t *sizep); +int sysfs__read_bool(const char *entry, bool *value); #endif /* __API_FS__ */ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index e2efddf10231..1f5300e56b44 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -132,7 +132,7 @@ else Q = @ endif -# Disable command line variables (CFLAGS) overide from top +# Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. MAKEOVERRIDES= diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 47076b15eebe..9b8555ea3459 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -135,7 +135,7 @@ else Q = @ endif -# Disable command line variables (CFLAGS) overide from top +# Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. MAKEOVERRIDES= diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 66342804161c..0c03538df74c 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -140,7 +140,7 @@ struct pevent_plugin_option { * struct pevent_plugin_option PEVENT_PLUGIN_OPTIONS[] = { * { * .name = "option-name", - * .plugin_alias = "overide-file-name", (optional) + * .plugin_alias = "override-file-name", (optional) * .description = "description of option to show users", * }, * { diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 4cfdbb5b6967..066086dd59a8 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -805,11 +805,20 @@ static struct rela *find_switch_table(struct objtool_file *file, insn->jump_dest->offset > orig_insn->offset)) break; + /* look for a relocation which references .rodata */ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym) - return find_rela_by_dest(file->rodata, - text_rela->addend); + if (!text_rela || text_rela->sym != file->rodata->sym) + continue; + + /* + * Make sure the .rodata address isn't associated with a + * symbol. gcc jump tables are anonymous data. + */ + if (find_symbol_containing(file->rodata, text_rela->addend)) + continue; + + return find_rela_by_dest(file->rodata, text_rela->addend); } return NULL; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0d7983ac63ef..d897702ce742 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -85,6 +85,18 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) return NULL; } +struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) +{ + struct symbol *sym; + + list_for_each_entry(sym, &sec->symbol_list, list) + if (sym->type != STT_SECTION && + offset >= sym->offset && offset < sym->offset + sym->len) + return sym; + + return NULL; +} + struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, unsigned int len) { diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index aa1ff6596684..731973e1a3f5 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -79,6 +79,7 @@ struct elf { struct elf *elf_open(const char *name); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, unsigned int len); diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 3db3db9278be..643cc4ba6872 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -31,3 +31,5 @@ config.mak.autogen .config-detected util/intel-pt-decoder/inat-tables.c arch/*/include/generated/ +pmu-events/pmu-events.c +pmu-events/jevents diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 2d96de6132a9..6e6a8b22c859 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -30,6 +30,24 @@ OPTIONS --verbose=:: Verbosity level. +-p:: +--pid=:: + Trace on existing process id (comma separated list). + +-a:: +--all-cpus:: + Force system-wide collection. Scripts run without a <command> + normally use -a by default, while scripts run with a <command> + normally don't - this option allows the latter to be run in + system-wide mode. + +-C:: +--cpu=:: + Only trace for the list of CPUs provided. Multiple CPUs can + be provided as a comma separated list with no space like: 0,1. + Ranges of CPUs are specified with -: 0-2. + Default is to trace on all online CPUs. + SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 41857cce5e86..143d98df2df9 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -24,6 +24,10 @@ Don't print descriptions. --long-desc:: Print longer event descriptions. +--details:: +Print how named events are resolved internally into perf events, and also +any extra expressions computed by perf stat. + [[EVENT_MODIFIERS]] EVENT MODIFIERS diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b16003ec14a7..ea3789d05e5e 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can displayed with the weight and local_weight sort keys. This currently works for TSX abort events and some memory events in precise mode on modern Intel CPUs. +--namespaces:: +Record events of type PERF_RECORD_NAMESPACES. + --transaction:: Record transaction flags for transaction related events. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index c04cc0647c16..e9a61f5485eb 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -72,7 +72,8 @@ OPTIONS --sort=:: Sort histogram entries by given key(s) - multiple keys can be specified in CSV format. Following sort keys are available: - pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight. + pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, + local_weight, cgroup_id. Each key has following meaning: @@ -80,6 +81,7 @@ OPTIONS - pid: command and tid of the task - dso: name of library or module executed at the time of sample - symbol: name of function executed at the time of sample + - symbol_size: size of function executed at the time of sample - parent: name of function matched to the parent regex filter. Unmatched entries are displayed as "[other]". - cpu: cpu number the task ran at the time of sample @@ -91,6 +93,7 @@ OPTIONS - weight: Event specific weight, e.g. memory latency or transaction abort cost. This is the global weight. - local_weight: Local weight version of the weight above. + - cgroup_id: ID derived from cgroup namespace device and inode numbers. - transaction: Transaction abort flags. - overhead: Overhead percentage of sample - overhead_sys: Overhead percentage of sample running in system mode @@ -172,6 +175,9 @@ OPTIONS By default, every sort keys not specified in -F will be appended automatically. + If the keys starts with a prefix '+', then it will append the specified + field(s) to the default field order. For example: perf report -F +period,sample. + -p:: --parent=<regex>:: A regex filter to identify parent. The parent is a caller of this diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index d33deddb0146..a092a2499e8f 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist' --migrations:: Show migration events. +-n:: +--next:: + Show next task. + -I:: --idle-hist:: Show idle-related events only. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 4ed5f239ba7d..cb0eda3925e6 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,7 +116,7 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags, bpf-output, + srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -189,15 +189,20 @@ OPTIONS i.e., -F "" is not allowed. The brstack output includes branch related information with raw addresses using the - /v/v/v/v/ syntax in the following order: + /v/v/v/v/cycles syntax in the following order: FROM: branch source instruction TO : branch target instruction M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported X/- : X=branch inside a transactional region, -=not in transaction region or not supported A/- : A=TSX abort entry, -=not aborted region or not supported + cycles The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible. + When brstackinsn is specified the full assembler sequences of branch sequences for each sample + is printed. This is the full execution path leading to the sample. This is only supported when the + sample was recorded with perf record -b or -j any. + -k:: --vmlinux=<file>:: vmlinux pathname @@ -248,6 +253,9 @@ OPTIONS --show-mmap-events Display mmap related events (e.g. MMAP, MMAP2). +--show-namespace-events + Display namespace events i.e. events of type PERF_RECORD_NAMESPACES. + --show-switch-events Display context switch events i.e. events of type PERF_RECORD_SWITCH or PERF_RECORD_SWITCH_CPU_WIDE. @@ -299,6 +307,10 @@ include::itrace.txt[] stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. +--max-blocks:: + Set the maximum number of program blocks to print with brstackasm for + each sample. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index aecf2a87e7d6..bd0e4417f2be 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -94,8 +94,7 @@ to activate system-wide monitoring. Default is to count on all CPUs. -A:: --no-aggr:: -Do not aggregate counts across all monitored CPUs in system-wide mode (-a). -This option is only valid in system-wide mode. +Do not aggregate counts across all monitored CPUs. -n:: --null:: @@ -237,6 +236,9 @@ To interpret the results it is usually needed to know on which CPUs the workload runs on. If needed the CPUs can be forced using taskset. +--no-merge:: +Do not merge results from same PMUs. + EXAMPLES -------- diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 8672f835ae4e..28648c09dcd6 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -12,6 +12,7 @@ tools/arch/sparc/include/asm/barrier_32.h tools/arch/sparc/include/asm/barrier_64.h tools/arch/tile/include/asm/barrier.h tools/arch/x86/include/asm/barrier.h +tools/arch/x86/include/asm/cmpxchg.h tools/arch/x86/include/asm/cpufeatures.h tools/arch/x86/include/asm/disabled-features.h tools/arch/x86/include/asm/required-features.h @@ -78,6 +79,7 @@ tools/include/uapi/linux/perf_event.h tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h +tools/include/linux/refcount.h tools/include/linux/string.h tools/include/linux/stringify.h tools/include/linux/types.h diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 27c9fbca7bd9..2b656de99495 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -317,6 +317,10 @@ ifdef NO_DWARF NO_LIBDW_DWARF_UNWIND := 1 endif +ifeq ($(feature-sched_getcpu), 1) + CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT +endif + ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 1030a6e504bb..39dbe512b9fc 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -10,6 +10,7 @@ #include "symbol.h" #include "map.h" #include "probe-event.h" +#include "probe-file.h" #ifdef HAVE_LIBELF_SUPPORT bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) @@ -79,13 +80,18 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, * However, if the user specifies an offset, we fall back to using the * GEP since all userspace applications (objdump/readelf) show function * disassembly with offsets from the GEP. - * - * In addition, we shouldn't specify an offset for kretprobes. */ - if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) || - !map || !sym) + if (pev->point.offset || !map || !sym) return; + /* For kretprobes, add an offset only if the kernel supports it */ + if (!pev->uprobes && pev->point.retprobe) { +#ifdef HAVE_LIBELF_SUPPORT + if (!kretprobe_offset_is_supported()) +#endif + return; + } + lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym); if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index c5db14f36cc7..d8a8dcf761f7 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -1,4 +1,7 @@ +#include <string.h> + #include "../../perf.h" +#include "../../util/util.h" #include "../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { @@ -26,3 +29,103 @@ const struct sample_reg sample_reg_masks[] = { #endif SMPL_REG_END }; + +struct sdt_name_reg { + const char *sdt_name; + const char *uprobe_name; +}; +#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m} +#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL} + +static const struct sdt_name_reg sdt_reg_renamings[] = { + SDT_NAME_REG(eax, ax), + SDT_NAME_REG(rax, ax), + SDT_NAME_REG(ebx, bx), + SDT_NAME_REG(rbx, bx), + SDT_NAME_REG(ecx, cx), + SDT_NAME_REG(rcx, cx), + SDT_NAME_REG(edx, dx), + SDT_NAME_REG(rdx, dx), + SDT_NAME_REG(esi, si), + SDT_NAME_REG(rsi, si), + SDT_NAME_REG(sil, si), + SDT_NAME_REG(edi, di), + SDT_NAME_REG(rdi, di), + SDT_NAME_REG(dil, di), + SDT_NAME_REG(ebp, bp), + SDT_NAME_REG(rbp, bp), + SDT_NAME_REG(bpl, bp), + SDT_NAME_REG(rsp, sp), + SDT_NAME_REG(esp, sp), + SDT_NAME_REG(spl, sp), + + /* rNN registers */ + SDT_NAME_REG(r8b, r8), + SDT_NAME_REG(r8w, r8), + SDT_NAME_REG(r8d, r8), + SDT_NAME_REG(r9b, r9), + SDT_NAME_REG(r9w, r9), + SDT_NAME_REG(r9d, r9), + SDT_NAME_REG(r10b, r10), + SDT_NAME_REG(r10w, r10), + SDT_NAME_REG(r10d, r10), + SDT_NAME_REG(r11b, r11), + SDT_NAME_REG(r11w, r11), + SDT_NAME_REG(r11d, r11), + SDT_NAME_REG(r12b, r12), + SDT_NAME_REG(r12w, r12), + SDT_NAME_REG(r12d, r12), + SDT_NAME_REG(r13b, r13), + SDT_NAME_REG(r13w, r13), + SDT_NAME_REG(r13d, r13), + SDT_NAME_REG(r14b, r14), + SDT_NAME_REG(r14w, r14), + SDT_NAME_REG(r14d, r14), + SDT_NAME_REG(r15b, r15), + SDT_NAME_REG(r15w, r15), + SDT_NAME_REG(r15d, r15), + SDT_NAME_REG_END, +}; + +int sdt_rename_register(char **pdesc, char *old_name) +{ + const struct sdt_name_reg *rnames = sdt_reg_renamings; + char *new_desc, *old_desc = *pdesc; + size_t prefix_len, sdt_len, uprobe_len, old_desc_len, offset; + int ret = -1; + + while (ret != 0 && rnames->sdt_name != NULL) { + sdt_len = strlen(rnames->sdt_name); + ret = strncmp(old_name, rnames->sdt_name, sdt_len); + rnames += !!ret; + } + + if (rnames->sdt_name == NULL) + return 0; + + sdt_len = strlen(rnames->sdt_name); + uprobe_len = strlen(rnames->uprobe_name); + old_desc_len = strlen(old_desc) + 1; + + new_desc = zalloc(old_desc_len + uprobe_len - sdt_len); + if (new_desc == NULL) + return -1; + + /* Copy the chars before the register name (at least '%') */ + prefix_len = old_name - old_desc; + memcpy(new_desc, old_desc, prefix_len); + + /* Copy the new register name */ + memcpy(new_desc + prefix_len, rnames->uprobe_name, uprobe_len); + + /* Copy the chars after the register name (if need be) */ + offset = prefix_len + sdt_len; + if (offset < old_desc_len) + memcpy(new_desc + prefix_len + uprobe_len, + old_desc + offset, old_desc_len - offset); + + free(old_desc); + *pdesc = new_desc; + + return 0; +} diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index da04b8c5568a..2499e1b0c6fb 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -9,6 +9,7 @@ */ /* For the CLR_() macros */ +#include <string.h> #include <pthread.h> #include <errno.h> diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 91877777ec6e..a20814d94af1 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -3,6 +3,7 @@ */ /* For the CLR_() macros */ +#include <string.h> #include <pthread.h> #include <signal.h> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 2b9705a8734c..9fad1e4fcd3e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -9,6 +9,7 @@ */ /* For the CLR_() macros */ +#include <string.h> #include <pthread.h> #include <signal.h> diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 2c8fa67ad537..40f5fcf1d120 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -8,6 +8,7 @@ */ /* For the CLR_() macros */ +#include <string.h> #include <pthread.h> #include <signal.h> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index e246b1b8388a..789490281ae3 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -9,6 +9,7 @@ */ /* For the CLR_() macros */ +#include <string.h> #include <pthread.h> #include <signal.h> diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index b2e06d1190d0..e44fd3239530 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -88,13 +88,11 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak #ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP #include <pthread.h> -static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr, - size_t cpusetsize, - cpu_set_t *cpuset) +#include <linux/compiler.h> +static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused, + size_t cpusetsize __maybe_unused, + cpu_set_t *cpuset __maybe_unused) { - attr = attr; - cpusetsize = cpusetsize; - cpuset = cpuset; return 0; } #endif diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 3083fc36282b..6bd0581de298 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -187,7 +187,8 @@ static const struct option options[] = { OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"), OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"), OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), - OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"), + OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, " + "convergence is reached when each process (all its threads) is running on a single NUMA node."), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4f52d85f5ebc..e54b1f9fe1ee 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -393,6 +393,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, + .namespaces = perf_event__process_namespaces, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index e2b21723bbf8..5cd6d7a047b9 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2334,7 +2334,7 @@ out: static void perf_c2c_display(struct perf_session *session) { - if (c2c.use_stdio) + if (use_browser == 0) perf_c2c__hists_fprintf(stdout, session); else perf_c2c__hists_browse(&c2c.hists.hists); @@ -2536,7 +2536,7 @@ static int perf_c2c__report(int argc, const char **argv) OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), #endif OPT_BOOLEAN(0, "stats", &c2c.stats_only, - "Use the stdio interface"), + "Display only statistic tables (implies --stdio)"), OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, "Display full length of symbols"), OPT_BOOLEAN(0, "no-source", &no_source, diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1b96a3122228..5e4803158672 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -364,6 +364,7 @@ static struct perf_tool tool = { .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, + .namespaces = perf_event__process_namespaces, .ordered_events = true, .ordering_requires_timestamps = true, }; diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index c3e643666c72..6087295f8827 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -11,11 +11,13 @@ #include <unistd.h> #include <signal.h> +#include <fcntl.h> #include "debug.h" #include <subcmd/parse-options.h> #include "evlist.h" #include "target.h" +#include "cpumap.h" #include "thread_map.h" #include "util/config.h" @@ -50,11 +52,12 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused, done = true; } -static int write_tracing_file(const char *name, const char *val) +static int __write_tracing_file(const char *name, const char *val, bool append) { char *file; int fd, ret = -1; ssize_t size = strlen(val); + int flags = O_WRONLY; file = get_tracing_file(name); if (!file) { @@ -62,7 +65,12 @@ static int write_tracing_file(const char *name, const char *val) return -1; } - fd = open(file, O_WRONLY); + if (append) + flags |= O_APPEND; + else + flags |= O_TRUNC; + + fd = open(file, flags); if (fd < 0) { pr_debug("cannot open tracing file: %s\n", name); goto out; @@ -79,6 +87,18 @@ out: return ret; } +static int write_tracing_file(const char *name, const char *val) +{ + return __write_tracing_file(name, val, false); +} + +static int append_tracing_file(const char *name, const char *val) +{ + return __write_tracing_file(name, val, true); +} + +static int reset_tracing_cpu(void); + static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) { if (write_tracing_file("tracing_on", "0") < 0) @@ -90,14 +110,78 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) if (write_tracing_file("set_ftrace_pid", " ") < 0) return -1; + if (reset_tracing_cpu() < 0) + return -1; + + return 0; +} + +static int set_tracing_pid(struct perf_ftrace *ftrace) +{ + int i; + char buf[16]; + + if (target__has_cpu(&ftrace->target)) + return 0; + + for (i = 0; i < thread_map__nr(ftrace->evlist->threads); i++) { + scnprintf(buf, sizeof(buf), "%d", + ftrace->evlist->threads->map[i]); + if (append_tracing_file("set_ftrace_pid", buf) < 0) + return -1; + } return 0; } +static int set_tracing_cpumask(struct cpu_map *cpumap) +{ + char *cpumask; + size_t mask_size; + int ret; + int last_cpu; + + last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1); + mask_size = (last_cpu + 3) / 4 + 1; + mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ + + cpumask = malloc(mask_size); + if (cpumask == NULL) { + pr_debug("failed to allocate cpu mask\n"); + return -1; + } + + cpu_map__snprint_mask(cpumap, cpumask, mask_size); + + ret = write_tracing_file("tracing_cpumask", cpumask); + + free(cpumask); + return ret; +} + +static int set_tracing_cpu(struct perf_ftrace *ftrace) +{ + struct cpu_map *cpumap = ftrace->evlist->cpus; + + if (!target__has_cpu(&ftrace->target)) + return 0; + + return set_tracing_cpumask(cpumap); +} + +static int reset_tracing_cpu(void) +{ + struct cpu_map *cpumap = cpu_map__new(NULL); + int ret; + + ret = set_tracing_cpumask(cpumap); + cpu_map__put(cpumap); + return ret; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; int trace_fd; - char *trace_pid; char buf[4096]; struct pollfd pollfd = { .events = POLLIN, @@ -108,42 +192,43 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) return -1; } - if (argc < 1) - return -1; - signal(SIGINT, sig_handler); signal(SIGUSR1, sig_handler); signal(SIGCHLD, sig_handler); + signal(SIGPIPE, sig_handler); - reset_tracing_files(ftrace); + if (reset_tracing_files(ftrace) < 0) + goto out; /* reset ftrace buffer */ if (write_tracing_file("trace", "0") < 0) goto out; - if (perf_evlist__prepare_workload(ftrace->evlist, &ftrace->target, - argv, false, ftrace__workload_exec_failed_signal) < 0) + if (argc && perf_evlist__prepare_workload(ftrace->evlist, + &ftrace->target, argv, false, + ftrace__workload_exec_failed_signal) < 0) { goto out; + } - if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { - pr_err("failed to set current_tracer to %s\n", ftrace->tracer); - goto out; + if (set_tracing_pid(ftrace) < 0) { + pr_err("failed to set ftrace pid\n"); + goto out_reset; } - if (asprintf(&trace_pid, "%d", thread_map__pid(ftrace->evlist->threads, 0)) < 0) { - pr_err("failed to allocate pid string\n"); - goto out; + if (set_tracing_cpu(ftrace) < 0) { + pr_err("failed to set tracing cpumask\n"); + goto out_reset; } - if (write_tracing_file("set_ftrace_pid", trace_pid) < 0) { - pr_err("failed to set pid: %s\n", trace_pid); - goto out_free_pid; + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { + pr_err("failed to set current_tracer to %s\n", ftrace->tracer); + goto out_reset; } trace_file = get_tracing_file("trace_pipe"); if (!trace_file) { pr_err("failed to open trace_pipe\n"); - goto out_free_pid; + goto out_reset; } trace_fd = open(trace_file, O_RDONLY); @@ -152,7 +237,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) if (trace_fd < 0) { pr_err("failed to open trace_pipe\n"); - goto out_free_pid; + goto out_reset; } fcntl(trace_fd, F_SETFL, O_NONBLOCK); @@ -163,6 +248,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_close_fd; } + setup_pager(); + perf_evlist__start_workload(ftrace->evlist); while (!done) { @@ -191,11 +278,9 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) out_close_fd: close(trace_fd); -out_free_pid: - free(trace_pid); -out: +out_reset: reset_tracing_files(ftrace); - +out: return done ? 0 : -1; } @@ -227,15 +312,21 @@ int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) .target = { .uid = UINT_MAX, }, }; const char * const ftrace_usage[] = { - "perf ftrace [<options>] <command>", + "perf ftrace [<options>] [<command>]", "perf ftrace [<options>] -- <command> [<options>]", NULL }; const struct option ftrace_options[] = { OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", "tracer to use: function_graph(default) or function"), + OPT_STRING('p', "pid", &ftrace.target.pid, "pid", + "trace on existing process id"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", + "list of cpus to monitor"), OPT_END() }; @@ -245,9 +336,18 @@ int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, ftrace_options, ftrace_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc) + if (!argc && target__none(&ftrace.target)) usage_with_options(ftrace_usage, ftrace_options); + ret = target__validate(&ftrace.target); + if (ret) { + char errbuf[512]; + + target__strerror(&ftrace.target, ret, errbuf, 512); + pr_err("%s\n", errbuf); + return -EINVAL; + } + ftrace.evlist = perf_evlist__new(); if (ftrace.evlist == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b9bc7e39833a..8d1d13b9bab6 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -333,6 +333,18 @@ static int perf_event__repipe_comm(struct perf_tool *tool, return err; } +static int perf_event__repipe_namespaces(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + int err = perf_event__process_namespaces(tool, event, sample, machine); + + perf_event__repipe(tool, event, sample, machine); + + return err; +} + static int perf_event__repipe_exit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -660,6 +672,7 @@ static int __cmd_inject(struct perf_inject *inject) session->itrace_synth_opts = &inject->itrace_synth_opts; inject->itrace_synth_opts.inject = true; inject->tool.comm = perf_event__repipe_comm; + inject->tool.namespaces = perf_event__repipe_namespaces; inject->tool.exit = perf_event__repipe_exit; inject->tool.id_index = perf_event__repipe_id_index; inject->tool.auxtrace_info = perf_event__process_auxtrace_info; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 6da8d083e4e5..d509e74bc6e8 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -964,6 +964,7 @@ static struct perf_tool perf_kmem = { .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 08fa88f62a24..18e6c38864bc 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1044,6 +1044,7 @@ static int read_events(struct perf_kvm_stat *kvm) struct perf_tool eops = { .sample = process_sample_event, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }; struct perf_data_file file = { @@ -1348,6 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, kvm->tool.exit = perf_event__process_exit; kvm->tool.fork = perf_event__process_fork; kvm->tool.lost = process_lost_event; + kvm->tool.namespaces = perf_event__process_namespaces; kvm->tool.ordered_events = true; perf_tool__fill_defaults(&kvm->tool); diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 3b9d98b5feef..be9195e95c78 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -18,6 +18,7 @@ #include <subcmd/parse-options.h> static bool desc_flag = true; +static bool details_flag; int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { @@ -30,6 +31,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) "Print extra event descriptions. --no-desc to not print."), OPT_BOOLEAN('v', "long-desc", &long_desc_flag, "Print longer event descriptions."), + OPT_BOOLEAN(0, "details", &details_flag, + "Print information on the perf event names and expressions used internally by events."), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -50,7 +53,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, raw_dump, !desc_flag, long_desc_flag); + print_events(NULL, raw_dump, !desc_flag, long_desc_flag, + details_flag); return 0; } @@ -72,7 +76,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump, !desc_flag, - long_desc_flag); + long_desc_flag, details_flag); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if ((sep = strchr(argv[i], ':')) != NULL) { @@ -80,7 +84,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) if (sep == NULL) { print_events(argv[i], raw_dump, !desc_flag, - long_desc_flag); + long_desc_flag, + details_flag); continue; } sep_idx = sep - argv[i]; @@ -103,7 +108,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); print_hwcache_events(s, raw_dump); print_pmu_events(s, raw_dump, !desc_flag, - long_desc_flag); + long_desc_flag, + details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); free(s); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index ce3bfb48b26f..e992e7206993 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -858,6 +858,7 @@ static int __cmd_report(bool display_info) struct perf_tool eops = { .sample = process_sample_event, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }; struct perf_data_file file = { @@ -947,27 +948,29 @@ static int __cmd_record(int argc, const char **argv) int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) { - const struct option info_options[] = { - OPT_BOOLEAN('t', "threads", &info_threads, - "dump thread list in perf.data"), - OPT_BOOLEAN('m', "map", &info_map, - "map of lock instances (address:name table)"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), - OPT_END() - }; const struct option lock_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_END() }; + + const struct option info_options[] = { + OPT_BOOLEAN('t', "threads", &info_threads, + "dump thread list in perf.data"), + OPT_BOOLEAN('m', "map", &info_map, + "map of lock instances (address:name table)"), + OPT_PARENT(lock_options) + }; + const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), /* TODO: type */ - OPT_END() + OPT_PARENT(lock_options) }; + const char * const info_usage[] = { "perf lock info [<options>]", NULL diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 6114e07ca613..030a6cfdda59 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -342,6 +342,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .lost = perf_event__process_lost, .fork = perf_event__process_fork, .build_id = perf_event__process_build_id, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }, .input_name = "perf.data", diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 1fcebc31a508..51cdc230f6ca 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -442,9 +442,9 @@ static int perf_del_probe_events(struct strfilter *filter) } if (ret == -ENOENT && ret2 == -ENOENT) - pr_debug("\"%s\" does not hit any event.\n", str); - /* Note that this is silently ignored */ - ret = 0; + pr_warning("\"%s\" does not hit any event.\n", str); + else + ret = 0; error: if (kfd >= 0) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index bc84a375295d..04faef79a548 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -876,6 +876,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGTERM, sig_handler); signal(SIGSEGV, sigsegv_handler); + if (rec->opts.record_namespaces) + tool->namespace_events = true; + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) @@ -983,6 +986,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) */ if (forks) { union perf_event *event; + pid_t tgid; event = malloc(sizeof(event->comm) + machine->id_hdr_size); if (event == NULL) { @@ -996,10 +1000,30 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * cannot see a correct process name for those events. * Synthesize COMM event to prevent it. */ - perf_event__synthesize_comm(tool, event, - rec->evlist->workload.pid, - process_synthesized_event, - machine); + tgid = perf_event__synthesize_comm(tool, event, + rec->evlist->workload.pid, + process_synthesized_event, + machine); + free(event); + + if (tgid == -1) + goto out_child; + + event = malloc(sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (event == NULL) { + err = -ENOMEM; + goto out_child; + } + + /* + * Synthesize NAMESPACES event for the command specified. + */ + perf_event__synthesize_namespaces(tool, event, + rec->evlist->workload.pid, + tgid, process_synthesized_event, + machine); free(event); perf_evlist__start_workload(rec->evlist); @@ -1497,6 +1521,7 @@ static struct record record = { .fork = perf_event__process_fork, .exit = perf_event__process_exit, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, .ordered_events = true, @@ -1611,6 +1636,8 @@ static struct option __record_options[] = { "opts", "AUX area tracing Snapshot Mode", ""), OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), + OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, + "Record namespaces events"), OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, "Record context switch events"), OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0a88670e56f3..5ab8117c3bfd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -394,8 +394,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, fprintf(stdout, "\n\n"); } - if (sort_order == NULL && - parent_pattern == default_parent_pattern) + if (!quiet) fprintf(stdout, "#\n# (%s)\n#\n", help); if (rep->show_threads) { @@ -701,6 +700,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b94cf0de715a..b92c4d97192c 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -221,6 +221,7 @@ struct perf_sched { unsigned int max_stack; bool show_cpu_visual; bool show_wakeups; + bool show_next; bool show_migrations; bool show_state; u64 skipped_samples; @@ -1897,14 +1898,18 @@ static char task_state_char(struct thread *thread, int state) } static void timehist_print_sample(struct perf_sched *sched, + struct perf_evsel *evsel, struct perf_sample *sample, struct addr_location *al, struct thread *thread, u64 t, int state) { struct thread_runtime *tr = thread__priv(thread); + const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); + const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); u32 max_cpus = sched->max_cpu + 1; char tstr[64]; + char nstr[30]; u64 wait_time; timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); @@ -1937,7 +1942,12 @@ static void timehist_print_sample(struct perf_sched *sched, if (sched->show_state) printf(" %5c ", task_state_char(thread, state)); - if (sched->show_wakeups) + if (sched->show_next) { + snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid); + printf(" %-*s", comm_width, nstr); + } + + if (sched->show_wakeups && !sched->show_next) printf(" %-*s", comm_width, ""); if (thread->tid == 0) @@ -2531,7 +2541,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, } if (!sched->summary_only) - timehist_print_sample(sched, sample, &al, thread, t, state); + timehist_print_sample(sched, evsel, sample, &al, thread, t, state); out: if (sched->hist_time.start == 0 && t >= ptime->start) @@ -3272,6 +3282,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = perf_sched__process_tracepoint_sample, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .lost = perf_event__process_lost, .fork = perf_sched__process_fork_event, .ordered_events = true, @@ -3340,6 +3351,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('S', "with-summary", &sched.summary, "Show all syscalls and summary with statistics"), OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), + OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"), OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), @@ -3437,10 +3449,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(timehist_usage, timehist_options); } - if (sched.show_wakeups && sched.summary_only) { - pr_err(" Error: -s and -w are mutually exclusive.\n"); + if ((sched.show_wakeups || sched.show_next) && + sched.summary_only) { + pr_err(" Error: -s and -[n|w] are mutually exclusive.\n"); parse_options_usage(timehist_usage, timehist_options, "s", true); - parse_options_usage(NULL, timehist_options, "w", true); + if (sched.show_wakeups) + parse_options_usage(NULL, timehist_options, "w", true); + if (sched.show_next) + parse_options_usage(NULL, timehist_options, "n", true); return -EINVAL; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c0783b4f7b6c..c98e16689b57 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -28,6 +28,7 @@ #include <linux/time64.h> #include "asm/bug.h" #include "util/mem-events.h" +#include "util/dump-insn.h" static char const *script_name; static char const *generate_script_lang; @@ -42,6 +43,7 @@ static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); static struct perf_stat_config stat_config; +static int max_blocks; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; @@ -69,6 +71,7 @@ enum perf_output_field { PERF_OUTPUT_CALLINDENT = 1U << 20, PERF_OUTPUT_INSN = 1U << 21, PERF_OUTPUT_INSNLEN = 1U << 22, + PERF_OUTPUT_BRSTACKINSN = 1U << 23, }; struct output_option { @@ -98,6 +101,7 @@ struct output_option { {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, {.str = "insn", .field = PERF_OUTPUT_INSN}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, + {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, }; /* default set to maintain compatibility with current format */ @@ -292,7 +296,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected. Hence, no address to lookup the source line number.\n"); return -EINVAL; } - + if (PRINT_FIELD(BRSTACKINSN) && + !(perf_evlist__combined_branch_type(session->evlist) & + PERF_SAMPLE_BRANCH_ANY)) { + pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" + "Hint: run 'perf record -b ...'\n"); + return -EINVAL; + } if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID)) @@ -546,6 +556,233 @@ static void print_sample_brstacksym(struct perf_sample *sample, } } +#define MAXBB 16384UL + +static int grab_bb(u8 *buffer, u64 start, u64 end, + struct machine *machine, struct thread *thread, + bool *is64bit, u8 *cpumode, bool last) +{ + long offset, len; + struct addr_location al; + bool kernel; + + if (!start || !end) + return 0; + + kernel = machine__kernel_ip(machine, start); + if (kernel) + *cpumode = PERF_RECORD_MISC_KERNEL; + else + *cpumode = PERF_RECORD_MISC_USER; + + /* + * Block overlaps between kernel and user. + * This can happen due to ring filtering + * On Intel CPUs the entry into the kernel is filtered, + * but the exit is not. Let the caller patch it up. + */ + if (kernel != machine__kernel_ip(machine, end)) { + printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", + start, end); + return -ENXIO; + } + + memset(&al, 0, sizeof(al)); + if (end - start > MAXBB - MAXINSN) { + if (last) + printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end); + else + printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start); + return 0; + } + + thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); + if (!al.map || !al.map->dso) { + printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + return 0; + } + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) { + printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + return 0; + } + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map); + + offset = al.map->map_ip(al.map, start); + len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer, + end - start + MAXINSN); + + *is64bit = al.map->dso->is_64_bit; + if (len <= 0) + printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n", + start, end); + return len; +} + +static void print_jump(uint64_t ip, struct branch_entry *en, + struct perf_insn *x, u8 *inbuf, int len, + int insn) +{ + printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", + ip, + dump_insn(x, ip, inbuf, len, NULL), + en->flags.predicted ? " PRED" : "", + en->flags.mispred ? " MISPRED" : "", + en->flags.in_tx ? " INTX" : "", + en->flags.abort ? " ABORT" : ""); + if (en->flags.cycles) { + printf(" %d cycles", en->flags.cycles); + if (insn) + printf(" %.2f IPC", (float)insn / en->flags.cycles); + } + putchar('\n'); +} + +static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, + uint64_t addr, struct symbol **lastsym, + struct perf_event_attr *attr) +{ + struct addr_location al; + int off; + + memset(&al, 0, sizeof(al)); + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); + if (!al.map) + thread__find_addr_map(thread, cpumode, MAP__VARIABLE, + addr, &al); + if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) + return; + + al.cpu = cpu; + al.sym = NULL; + if (al.map) + al.sym = map__find_symbol(al.map, al.addr); + + if (!al.sym) + return; + + if (al.addr < al.sym->end) + off = al.addr - al.sym->start; + else + off = al.addr - al.map->start - al.sym->start; + printf("\t%s", al.sym->name); + if (off) + printf("%+d", off); + putchar(':'); + if (PRINT_FIELD(SRCLINE)) + map__fprintf_srcline(al.map, al.addr, "\t", stdout); + putchar('\n'); + *lastsym = al.sym; +} + +static void print_sample_brstackinsn(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr, + struct machine *machine) +{ + struct branch_stack *br = sample->branch_stack; + u64 start, end; + int i, insn, len, nr, ilen; + struct perf_insn x; + u8 buffer[MAXBB]; + unsigned off; + struct symbol *lastsym = NULL; + + if (!(br && br->nr)) + return; + nr = br->nr; + if (max_blocks && nr > max_blocks + 1) + nr = max_blocks + 1; + + x.thread = thread; + x.cpu = sample->cpu; + + putchar('\n'); + + /* Handle first from jump, of which we don't know the entry. */ + len = grab_bb(buffer, br->entries[nr-1].from, + br->entries[nr-1].from, + machine, thread, &x.is64bit, &x.cpumode, false); + if (len > 0) { + print_ip_sym(thread, x.cpumode, x.cpu, + br->entries[nr - 1].from, &lastsym, attr); + print_jump(br->entries[nr - 1].from, &br->entries[nr - 1], + &x, buffer, len, 0); + } + + /* Print all blocks */ + for (i = nr - 2; i >= 0; i--) { + if (br->entries[i].from || br->entries[i].to) + pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, + br->entries[i].from, + br->entries[i].to); + start = br->entries[i + 1].to; + end = br->entries[i].from; + + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); + /* Patch up missing kernel transfers due to ring filters */ + if (len == -ENXIO && i > 0) { + end = br->entries[--i].from; + pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); + } + if (len <= 0) + continue; + + insn = 0; + for (off = 0;; off += ilen) { + uint64_t ip = start + off; + + print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr); + if (ip == end) { + print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn); + break; + } else { + printf("\t%016" PRIx64 "\t%s\n", ip, + dump_insn(&x, ip, buffer + off, len - off, &ilen)); + if (ilen == 0) + break; + insn++; + } + } + } + + /* + * Hit the branch? In this case we are already done, and the target + * has not been executed yet. + */ + if (br->entries[0].from == sample->ip) + return; + if (br->entries[0].flags.abort) + return; + + /* + * Print final block upto sample + */ + start = br->entries[0].to; + end = sample->ip; + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true); + print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr); + if (len <= 0) { + /* Print at least last IP if basic block did not work */ + len = grab_bb(buffer, sample->ip, sample->ip, + machine, thread, &x.is64bit, &x.cpumode, false); + if (len <= 0) + return; + + printf("\t%016" PRIx64 "\t%s\n", sample->ip, + dump_insn(&x, sample->ip, buffer, len, NULL)); + return; + } + for (off = 0; off <= end - start; off += ilen) { + printf("\t%016" PRIx64 "\t%s\n", start + off, + dump_insn(&x, start + off, buffer + off, len - off, &ilen)); + if (ilen == 0) + break; + } +} static void print_sample_addr(struct perf_sample *sample, struct thread *thread, @@ -632,7 +869,9 @@ static void print_sample_callindent(struct perf_sample *sample, } static void print_insn(struct perf_sample *sample, - struct perf_event_attr *attr) + struct perf_event_attr *attr, + struct thread *thread, + struct machine *machine) { if (PRINT_FIELD(INSNLEN)) printf(" ilen: %d", sample->insn_len); @@ -643,12 +882,15 @@ static void print_insn(struct perf_sample *sample, for (i = 0; i < sample->insn_len; i++) printf(" %02x", (unsigned char)sample->insn[i]); } + if (PRINT_FIELD(BRSTACKINSN)) + print_sample_brstackinsn(sample, thread, attr, machine); } static void print_sample_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, - struct addr_location *al) + struct addr_location *al, + struct machine *machine) { struct perf_event_attr *attr = &evsel->attr; bool print_srcline_last = false; @@ -689,7 +931,7 @@ static void print_sample_bts(struct perf_sample *sample, if (print_srcline_last) map__fprintf_srcline(al->map, al->addr, "\n ", stdout); - print_insn(sample, attr); + print_insn(sample, attr, thread, machine); printf("\n"); } @@ -830,6 +1072,7 @@ struct perf_script { bool show_task_events; bool show_mmap_events; bool show_switch_events; + bool show_namespace_events; bool allocated; struct cpu_map *cpus; struct thread_map *threads; @@ -871,7 +1114,8 @@ static size_t data_src__printf(u64 data_src) static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, - struct addr_location *al) + struct addr_location *al, + struct machine *machine) { struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; @@ -898,7 +1142,7 @@ static void process_event(struct perf_script *script, print_sample_flags(sample->flags); if (is_bts_event(attr)) { - print_sample_bts(sample, evsel, thread, al); + print_sample_bts(sample, evsel, thread, al, machine); return; } @@ -936,7 +1180,7 @@ static void process_event(struct perf_script *script, if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); - print_insn(sample, attr); + print_insn(sample, attr, thread, machine); printf("\n"); } @@ -1046,7 +1290,7 @@ static int process_sample_event(struct perf_tool *tool, if (scripting_ops) scripting_ops->process_event(event, sample, evsel, &al); else - process_event(scr, sample, evsel, &al); + process_event(scr, sample, evsel, &al, machine); out_put: addr_location__put(&al); @@ -1118,6 +1362,41 @@ out: return ret; } +static int process_namespaces_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + int ret = -1; + + thread = machine__findnew_thread(machine, event->namespaces.pid, + event->namespaces.tid); + if (thread == NULL) { + pr_debug("problem processing NAMESPACES event, skipping it.\n"); + return -1; + } + + if (perf_event__process_namespaces(tool, event, sample, machine) < 0) + goto out; + + if (!evsel->attr.sample_id_all) { + sample->cpu = 0; + sample->time = 0; + sample->tid = event->namespaces.tid; + sample->pid = event->namespaces.pid; + } + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + ret = 0; +out: + thread__put(thread); + return ret; +} + static int process_fork_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -1293,6 +1572,8 @@ static int __cmd_script(struct perf_script *script) } if (script->show_switch_events) script->tool.context_switch = process_switch_event; + if (script->show_namespace_events) + script->tool.namespaces = process_namespaces_event; ret = perf_session__process_events(script->session); @@ -2097,6 +2378,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .attr = process_attr, @@ -2152,7 +2434,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,period,iregs,brstack,brstacksym,flags," - "bpf-output,callindent,insn,insnlen", parse_output_fields), + "bpf-output,callindent,insn,insnlen,brstackinsn", + parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", @@ -2180,7 +2463,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the mmap events"), OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, "Show context switch events (if recorded)"), + OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, + "Show namespace events (if recorded)"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), + OPT_INTEGER(0, "max-blocks", &max_blocks, + "Maximum number of code blocks to dump with brstackinsn"), OPT_BOOLEAN(0, "ns", &nanosecs, "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 13b54999ad79..01b589e3c3a6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -140,12 +140,14 @@ static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; static bool force_metric_only = false; +static bool no_merge = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; static bool append_file; static const char *output_name; static int output_fd; +static int print_free_counters_hint; struct perf_stat { bool record; @@ -1109,6 +1111,9 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep); + if (counter->supported) + print_free_counters_hint = 1; + fprintf(stat_config.output, "%-*s%s", csv_output ? 0 : unit_width, counter->unit, csv_sep); @@ -1140,6 +1145,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, out.print_metric = pm; out.new_line = nl; out.ctx = &os; + out.force_header = false; if (csv_output && !metric_only) { print_noise(counter, noise); @@ -1178,11 +1184,81 @@ static void aggr_update_shadow(void) } } +static void collect_all_aliases(struct perf_evsel *counter, + void (*cb)(struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + struct perf_evsel *alias; + + alias = list_prepare_entry(counter, &(evsel_list->entries), node); + list_for_each_entry_continue (alias, &evsel_list->entries, node) { + if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + alias->scale != counter->scale || + alias->cgrp != counter->cgrp || + strcmp(alias->unit, counter->unit) || + nsec_counter(alias) != nsec_counter(counter)) + break; + alias->merged_stat = true; + cb(alias, data, false); + } +} + +static bool collect_data(struct perf_evsel *counter, + void (*cb)(struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + if (counter->merged_stat) + return false; + cb(counter, data, true); + if (!no_merge) + collect_all_aliases(counter, cb, data); + return true; +} + +struct aggr_data { + u64 ena, run, val; + int id; + int nr; + int cpu; +}; + +static void aggr_cb(struct perf_evsel *counter, void *data, bool first) +{ + struct aggr_data *ad = data; + int cpu, s2; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + struct perf_counts_values *counts; + + s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); + if (s2 != ad->id) + continue; + if (first) + ad->nr++; + counts = perf_counts(counter->counts, cpu, 0); + /* + * When any result is bad, make them all to give + * consistent output in interval mode. + */ + if (counts->ena == 0 || counts->run == 0 || + counter->counts->scaled == -1) { + ad->ena = 0; + ad->run = 0; + break; + } + ad->val += counts->val; + ad->ena += counts->ena; + ad->run += counts->run; + } +} + static void print_aggr(char *prefix) { FILE *output = stat_config.output; struct perf_evsel *counter; - int cpu, s, s2, id, nr; + int s, id, nr; double uval; u64 ena, run, val; bool first; @@ -1197,23 +1273,21 @@ static void print_aggr(char *prefix) * Without each counter has its own line. */ for (s = 0; s < aggr_map->nr; s++) { + struct aggr_data ad; if (prefix && metric_only) fprintf(output, "%s", prefix); - id = aggr_map->map[s]; + ad.id = id = aggr_map->map[s]; first = true; evlist__for_each_entry(evsel_list, counter) { - val = ena = run = 0; - nr = 0; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); - if (s2 != id) - continue; - val += perf_counts(counter->counts, cpu, 0)->val; - ena += perf_counts(counter->counts, cpu, 0)->ena; - run += perf_counts(counter->counts, cpu, 0)->run; - nr++; - } + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + if (!collect_data(counter, aggr_cb, &ad)) + continue; + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; if (first && metric_only) { first = false; aggr_printout(counter, id, nr); @@ -1257,6 +1331,21 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) } } +struct caggr_data { + double avg, avg_enabled, avg_running; +}; + +static void counter_aggr_cb(struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct caggr_data *cd = data; + struct perf_stat_evsel *ps = counter->priv; + + cd->avg += avg_stats(&ps->res_stats[0]); + cd->avg_enabled += avg_stats(&ps->res_stats[1]); + cd->avg_running += avg_stats(&ps->res_stats[2]); +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode @@ -1264,23 +1353,31 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) static void print_counter_aggr(struct perf_evsel *counter, char *prefix) { FILE *output = stat_config.output; - struct perf_stat_evsel *ps = counter->priv; - double avg = avg_stats(&ps->res_stats[0]); double uval; - double avg_enabled, avg_running; + struct caggr_data cd = { .avg = 0.0 }; - avg_enabled = avg_stats(&ps->res_stats[1]); - avg_running = avg_stats(&ps->res_stats[2]); + if (!collect_data(counter, counter_aggr_cb, &cd)) + return; if (prefix && !metric_only) fprintf(output, "%s", prefix); - uval = avg * counter->scale; - printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); + uval = cd.avg * counter->scale; + printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); if (!metric_only) fprintf(output, "\n"); } +static void counter_cb(struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct aggr_data *ad = data; + + ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; +} + /* * Print out the results of a single counter: * does not use aggregated count in system-wide @@ -1293,9 +1390,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix) int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - val = perf_counts(counter->counts, cpu, 0)->val; - ena = perf_counts(counter->counts, cpu, 0)->ena; - run = perf_counts(counter->counts, cpu, 0)->run; + struct aggr_data ad = { .cpu = cpu }; + + if (!collect_data(counter, counter_cb, &ad)) + return; + val = ad.val; + ena = ad.ena; + run = ad.run; if (prefix) fprintf(output, "%s", prefix); @@ -1380,6 +1481,7 @@ static void print_metric_headers(const char *prefix, bool no_indent) out.ctx = &os; out.print_metric = print_metric_header; out.new_line = new_line_metric; + out.force_header = true; os.evsel = counter; perf_stat__print_shadow_stats(counter, 0, 0, @@ -1477,6 +1579,13 @@ static void print_footer(void) avg_stats(&walltime_nsecs_stats)); } fprintf(output, "\n\n"); + + if (print_free_counters_hint) + fprintf(output, +"Some events weren't counted. Try disabling the NMI watchdog:\n" +" echo 0 > /proc/sys/kernel/nmi_watchdog\n" +" perf stat ...\n" +" echo 1 > /proc/sys/kernel/nmi_watchdog\n"); } static void print_counters(struct timespec *ts, int argc, const char **argv) @@ -1633,6 +1742,7 @@ static const struct option stat_options[] = { "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), + OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", @@ -2339,6 +2449,35 @@ static int __cmd_report(int argc, const char **argv) return 0; } +static void setup_system_wide(int forks) +{ + /* + * Make system wide (-a) the default target if + * no target was specified and one of following + * conditions is met: + * + * - there's no workload specified + * - there is workload specified but all requested + * events are system wide events + */ + if (!target__none(&target)) + return; + + if (!forks) + target.system_wide = true; + else { + struct perf_evsel *counter; + + evlist__for_each_entry(evsel_list, counter) { + if (!counter->system_wide) + return; + } + + if (evsel_list->nr_entries) + target.system_wide = true; + } +} + int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const stat_usage[] = { @@ -2361,6 +2500,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, (const char **) stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); + perf_stat__collect_metric_expr(evsel_list); perf_stat__init_shadow_stats(); if (csv_sep) { @@ -2445,9 +2585,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } else if (big_num_opt == 0) /* User passed --no-big-num */ big_num = false; - /* Make system wide (-a) the default target. */ - if (!argc && target__none(&target)) - target.system_wide = true; + setup_system_wide(argc); if (run_count < 0) { pr_err("Run count must be a positive number\n"); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index e7eaa298d34a..fbd7c6c695b8 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1933,6 +1933,11 @@ int cmd_timechart(int argc, const char **argv, .merge_dist = 1000, }; const char *output_name = "output.svg"; + const struct option timechart_common_options[] = { + OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), + OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, "output processes data only"), + OPT_END() + }; const struct option timechart_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_STRING('o', "output", &output_name, "file", "output file name"), @@ -1940,9 +1945,6 @@ int cmd_timechart(int argc, const char **argv, OPT_CALLBACK(0, "highlight", NULL, "duration or task name", "highlight tasks. Pass duration in ns or process name.", parse_highlight), - OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), - OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, - "output processes data only"), OPT_CALLBACK('p', "process", NULL, "process", "process selector. Pass a pid or process name.", parse_process), @@ -1962,22 +1964,18 @@ int cmd_timechart(int argc, const char **argv, "merge events that are merge-dist us apart", parse_time), OPT_BOOLEAN('f', "force", &tchart.force, "don't complain, do it"), - OPT_END() + OPT_PARENT(timechart_common_options), }; const char * const timechart_subcommands[] = { "record", NULL }; const char *timechart_usage[] = { "perf timechart [<options>] {record}", NULL }; - const struct option timechart_record_options[] = { - OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), - OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, - "output processes data only"), OPT_BOOLEAN('I', "io-only", &tchart.io_only, "record only IO data"), OPT_BOOLEAN('g', "callchain", &tchart.with_backtrace, "record callchain"), - OPT_END() + OPT_PARENT(timechart_common_options), }; const char * const timechart_record_usage[] = { "perf timechart record [<options>]", diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 256f1fac6f7e..912fedc5b42d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2415,8 +2415,9 @@ static int trace__replay(struct trace *trace) trace->tool.exit = perf_event__process_exit; trace->tool.fork = perf_event__process_fork; trace->tool.attr = perf_event__process_attr; - trace->tool.tracing_data = perf_event__process_tracing_data; + trace->tool.tracing_data = perf_event__process_tracing_data; trace->tool.build_id = perf_event__process_build_id; + trace->tool.namespaces = perf_event__process_namespaces; trace->tool.ordered_events = true; trace->tool.ordering_requires_timestamps = true; diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index ac3efd396a72..2d0caf20ff3a 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -9,6 +9,7 @@ perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common perf-data mainporcelain common perf-diff mainporcelain common +perf-c2c mainporcelain common perf-config mainporcelain common perf-evlist mainporcelain common perf-ftrace mainporcelain common diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 1c27d947c2fe..806c216a1078 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -50,6 +50,7 @@ struct record_opts { bool running_time; bool full_auxtrace; bool auxtrace_snapshot_mode; + bool record_namespaces; bool record_switch_events; bool all_kernel; bool all_user; diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json index 076459c51d4e..58ed6d33d1f4 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1,2,3", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1,2,3", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -212,7 +212,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -221,7 +221,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to local home agent. Derived from unc_h_requests.reads_local", + "BriefDescription": "read requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_LOCAL", @@ -230,7 +230,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to remote home agent. Derived from unc_h_requests.reads_remote", + "BriefDescription": "read requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_REMOTE", @@ -239,7 +239,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -248,7 +248,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to local home agent. Derived from unc_h_requests.writes_local", + "BriefDescription": "write requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_LOCAL", @@ -257,7 +257,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to remote home agent. Derived from unc_h_requests.writes_remote", + "BriefDescription": "write requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_REMOTE", @@ -266,7 +266,7 @@ "Unit": "HA" }, { - "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously). Derived from unc_h_snoop_resp.rspcnflct", + "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT", @@ -275,7 +275,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -285,7 +285,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -295,7 +295,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -305,7 +305,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json index d17dc235f734..fa09e12018ce 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json @@ -3,7 +3,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -13,48 +13,44 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Derived from unc_m_clockticks", - "Counter": "0,1,2,3", - "EventName": "UNC_M_CLOCKTICKS", - "PerPkg": "1", - "Unit": "iMC" - }, - { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Pre-charges due to page misses. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Pre-charges due to page misses", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -63,7 +59,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for reads. Derived from unc_m_pre_count.rd", + "BriefDescription": "Pre-charge for reads", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.RD", @@ -72,7 +68,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for writes. Derived from unc_m_pre_count.wr", + "BriefDescription": "Pre-charge for writes", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.WR", diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json index b44d43088bbb..dd1b95655d1d 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json @@ -1,83 +1,91 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C0 and C1. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C3. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C6 and C7. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "External Prochot. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xA", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Thermal Strongest Upper Limit Cycles. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "OS Strongest Upper Limit Cycles. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Power Strongest Upper Limit Cycles. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x74", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json index 076459c51d4e..58ed6d33d1f4 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1,2,3", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1,2,3", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -212,7 +212,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -221,7 +221,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to local home agent. Derived from unc_h_requests.reads_local", + "BriefDescription": "read requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_LOCAL", @@ -230,7 +230,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to remote home agent. Derived from unc_h_requests.reads_remote", + "BriefDescription": "read requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_REMOTE", @@ -239,7 +239,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -248,7 +248,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to local home agent. Derived from unc_h_requests.writes_local", + "BriefDescription": "write requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_LOCAL", @@ -257,7 +257,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to remote home agent. Derived from unc_h_requests.writes_remote", + "BriefDescription": "write requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_REMOTE", @@ -266,7 +266,7 @@ "Unit": "HA" }, { - "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously). Derived from unc_h_snoop_resp.rspcnflct", + "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT", @@ -275,7 +275,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -285,7 +285,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -295,7 +295,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -305,7 +305,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json index 39387f7909b2..824961318c1e 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -10,7 +10,7 @@ { "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.DATA", + "EventName": "QPI_DATA_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x2", @@ -19,7 +19,7 @@ { "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", + "EventName": "QPI_CTL_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x4", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json index d17dc235f734..66eed399724c 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json @@ -3,7 +3,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -13,48 +13,51 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Pre-charges due to page misses. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Pre-charges due to page misses", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -63,7 +66,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for reads. Derived from unc_m_pre_count.rd", + "BriefDescription": "Pre-charge for reads", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.RD", @@ -72,7 +75,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for writes. Derived from unc_m_pre_count.wr", + "BriefDescription": "Pre-charge for writes", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.WR", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json index b44d43088bbb..dd1b95655d1d 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json @@ -1,83 +1,91 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C0 and C1. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C3. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C6 and C7. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "External Prochot. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xA", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Thermal Strongest Upper Limit Cycles. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "OS Strongest Upper Limit Cycles. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Power Strongest Upper Limit Cycles. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x74", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json index 076459c51d4e..58ed6d33d1f4 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1,2,3", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1,2,3", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -212,7 +212,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -221,7 +221,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to local home agent. Derived from unc_h_requests.reads_local", + "BriefDescription": "read requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_LOCAL", @@ -230,7 +230,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to remote home agent. Derived from unc_h_requests.reads_remote", + "BriefDescription": "read requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_REMOTE", @@ -239,7 +239,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -248,7 +248,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to local home agent. Derived from unc_h_requests.writes_local", + "BriefDescription": "write requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_LOCAL", @@ -257,7 +257,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to remote home agent. Derived from unc_h_requests.writes_remote", + "BriefDescription": "write requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_REMOTE", @@ -266,7 +266,7 @@ "Unit": "HA" }, { - "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously). Derived from unc_h_snoop_resp.rspcnflct", + "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT", @@ -275,7 +275,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -285,7 +285,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -295,7 +295,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -305,7 +305,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json index 39387f7909b2..824961318c1e 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -10,7 +10,7 @@ { "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.DATA", + "EventName": "QPI_DATA_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x2", @@ -19,7 +19,7 @@ { "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", + "EventName": "QPI_CTL_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x4", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json index d17dc235f734..66eed399724c 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json @@ -3,7 +3,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -13,48 +13,51 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Pre-charges due to page misses. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Pre-charges due to page misses", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -63,7 +66,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for reads. Derived from unc_m_pre_count.rd", + "BriefDescription": "Pre-charge for reads", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.RD", @@ -72,7 +75,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for writes. Derived from unc_m_pre_count.wr", + "BriefDescription": "Pre-charge for writes", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.WR", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json index b44d43088bbb..dd1b95655d1d 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json @@ -1,83 +1,91 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C0 and C1. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C3. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C6 and C7. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "External Prochot. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xA", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Thermal Strongest Upper Limit Cycles. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "OS Strongest Upper Limit Cycles. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Power Strongest Upper Limit Cycles. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x74", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json index 2efdc6772e0b..267410594833 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -237,7 +237,7 @@ "Unit": "CBO" }, { - "BriefDescription": "Occupancy for all LLC misses that are addressed to local memory. Derived from unc_c_tor_occupancy.miss_local", + "BriefDescription": "Occupancy for all LLC misses that are addressed to local memory", "EventCode": "0x36", "EventName": "UNC_C_TOR_OCCUPANCY.MISS_LOCAL", "PerPkg": "1", @@ -254,7 +254,7 @@ "Unit": "CBO" }, { - "BriefDescription": "Occupancy for all LLC misses that are addressed to remote memory. Derived from unc_c_tor_occupancy.miss_remote", + "BriefDescription": "Occupancy for all LLC misses that are addressed to remote memory", "EventCode": "0x36", "EventName": "UNC_C_TOR_OCCUPANCY.MISS_REMOTE", "PerPkg": "1", @@ -262,7 +262,7 @@ "Unit": "CBO" }, { - "BriefDescription": "Read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "Read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -271,7 +271,7 @@ "Unit": "HA" }, { - "BriefDescription": "Write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "Write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -280,7 +280,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -290,7 +290,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -300,7 +300,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -310,7 +310,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json index d7e2fda1d695..b798a860bc81 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Use to get percentages for QPI cycles events. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks. Use to get percentages for QPI cycles events", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -8,25 +8,27 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where receiving QPI link is in half-width mode. Derived from unc_q_rxl0p_power_cycles", + "BriefDescription": "Cycles where receiving QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0x10", "EventName": "UNC_Q_RxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "rxl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where transmitting QPI link is in half-width mode. Derived from unc_q_txl0p_power_cycles", + "BriefDescription": "Cycles where transmitting QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_Q_TxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "txl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", + "BriefDescription": "Number of data flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.DATA", "PerPkg": "1", @@ -35,7 +37,7 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", + "BriefDescription": "Number of non data (control) flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json index ac4ad4d6357b..df4b43294fa0 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "Memory page activates for reads and writes. Derived from unc_m_act_count.rd", + "BriefDescription": "Memory page activates for reads and writes", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_M_ACT_COUNT.RD", @@ -13,7 +13,7 @@ "BriefDescription": "Read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -23,48 +23,51 @@ "BriefDescription": "Write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Use to generate percentages for memory controller CYCLES events. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks. Use to generate percentages for memory controller CYCLES events", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Memory page conflicts. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Memory page conflicts", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json index dc2586db0dfc..d40498f2cb1e 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json @@ -1,44 +1,48 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_BAND0_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_BAND1_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_BAND2_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_BAND3_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -49,6 +53,7 @@ "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -59,6 +64,7 @@ "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -69,6 +75,7 @@ "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -79,90 +86,100 @@ "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xa", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when thermal conditions are the upper limit on frequency. This is related to the THERMAL_THROTTLE CYCLES_ABOVE_TEMP event, which always counts cycles when we are above the thermal temperature. This event (STRONGEST_UPPER_LIMIT) is sampled at the output of the algorithm that determines the actual frequency, while THERMAL_THROTTLE looks at the input. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency. Derived from unc_p_freq_max_current_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x7", "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_current_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when the system is changing frequency. This can not be filtered by thread ID. One can also use it with the occupancy counter that monitors number of threads in C0 to estimate the performance impact that frequency transitions had on the system. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Cycles spent changing Frequency", "Counter": "0,1,2,3", "EventCode": "0x60", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -173,6 +190,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", "Filter": "filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -183,6 +201,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", "Filter": "filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -193,6 +212,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", "Filter": "filter_band2=3000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -203,6 +223,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", "Filter": "filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -213,6 +234,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", "Filter": "edge=1,filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -223,6 +245,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -233,6 +256,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band2=4000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -243,6 +267,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json index 2f23cf0129e7..3fa61d962607 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -171,11 +171,12 @@ "Unit": "CBO" }, { - "BriefDescription": "Occupancy counter for all LLC misses; we divide this by UNC_C_CLOCKTICKS to get average Q depth. Derived from unc_c_tor_occupancy.miss_all", + "BriefDescription": "Occupancy counter for all LLC misses; we divide this by UNC_C_CLOCKTICKS to get average Q depth", "EventCode": "0x36", "EventName": "UNC_C_TOR_OCCUPANCY.MISS_ALL", "Filter": "filter_opc=0x182", "MetricExpr": "(UNC_C_TOR_OCCUPANCY.MISS_ALL / UNC_C_CLOCKTICKS) * 100.", + "MetricName": "tor_occupancy.miss_all %", "PerPkg": "1", "UMask": "0xa", "Unit": "CBO" @@ -189,7 +190,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -198,7 +199,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json index 63351876eb57..1b53c0e609e3 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Used to get percentages of QPI cycles events. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks. Used to get percentages of QPI cycles events", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -8,25 +8,27 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where receiving QPI link is in half-width mode. Derived from unc_q_rxl0p_power_cycles", + "BriefDescription": "Cycles where receiving QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0x10", "EventName": "UNC_Q_RxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "rxl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where transmitting QPI link is in half-width mode. Derived from unc_q_txl0p_power_cycles", + "BriefDescription": "Cycles where transmitting QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_Q_TxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "txl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", + "BriefDescription": "Number of data flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.DATA", "PerPkg": "1", @@ -35,7 +37,7 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", + "BriefDescription": "Number of non data (control) flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json index e2cf6daa7b37..8551cebeba23 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "Memory page activates. Derived from unc_m_act_count", + "BriefDescription": "Memory page activates", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_M_ACT_COUNT", @@ -11,7 +11,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "UMask": "0x3", "Unit": "iMC" @@ -20,47 +20,50 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "UMask": "0xc", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Used to get percentages of memory controller cycles events. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks. Used to get percentages of memory controller cycles events", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Memory page conflicts. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Memory page conflicts", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -69,7 +72,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Occupancy counter for memory read queue. Derived from unc_m_rpq_occupancy", + "BriefDescription": "Occupancy counter for memory read queue", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_M_RPQ_OCCUPANCY", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json index bbe36d547386..16034bfd06dd 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json @@ -1,44 +1,48 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_BAND0_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_BAND1_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_BAND2_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_BAND3_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -49,6 +53,7 @@ "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -59,6 +64,7 @@ "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -69,6 +75,7 @@ "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -79,89 +86,99 @@ "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events . Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xa", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency. Derived from unc_p_freq_max_current_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x7", "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_current_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Cycles spent changing Frequency", "Counter": "0,1,2,3", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -172,6 +189,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", "Filter": "filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -182,6 +200,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", "Filter": "filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -192,6 +211,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", "Filter": "filter_band2=3000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -202,6 +222,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", "Filter": "filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -212,6 +233,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", "Filter": "edge=1,filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -222,6 +244,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -232,6 +255,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band2=4000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -242,6 +266,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 12181bb1da2a..d1a12e584c1b 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -17,6 +17,7 @@ GenuineIntel-6-3A,v18,ivybridge,core GenuineIntel-6-3E,v19,ivytown,core GenuineIntel-6-2D,v20,jaketown,core GenuineIntel-6-57,v9,knightslanding,core +GenuineIntel-6-85,v9,knightslanding,core GenuineIntel-6-1E,v2,nehalemep,core GenuineIntel-6-1F,v2,nehalemep,core GenuineIntel-6-1A,v2,nehalemep,core diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index eed09346a72a..81f2ef3b15cf 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -291,7 +291,9 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) static int print_events_table_entry(void *data, char *name, char *event, char *desc, char *long_desc, - char *pmu, char *unit, char *perpkg) + char *pmu, char *unit, char *perpkg, + char *metric_expr, + char *metric_name) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -315,6 +317,10 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.unit = \"%s\",\n", unit); if (perpkg) fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg); + if (metric_expr) + fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr); + if (metric_name) + fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name); fprintf(outfp, "},\n"); return 0; @@ -362,7 +368,9 @@ static char *real_event(const char *name, char *event) int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, char *long_desc, - char *pmu, char *unit, char *perpkg), + char *pmu, char *unit, char *perpkg, + char *metric_expr, + char *metric_name), void *data) { int err = -EIO; @@ -388,6 +396,8 @@ int json_events(const char *fn, char *filter = NULL; char *perpkg = NULL; char *unit = NULL; + char *metric_expr = NULL; + char *metric_name = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; @@ -398,6 +408,7 @@ int json_events(const char *fn, for (j = 0; j < obj->size; j += 2) { jsmntok_t *field, *val; int nz; + char *s; field = tok + j; EXPECT(field->type == JSMN_STRING, tok + j, @@ -444,7 +455,6 @@ int json_events(const char *fn, NULL); } else if (json_streq(map, field, "Unit")) { const char *ppmu; - char *s; ppmu = field_to_perf(unit_to_pmu, map, val); if (ppmu) { @@ -464,6 +474,12 @@ int json_events(const char *fn, addfield(map, &unit, "", "", val); } else if (json_streq(map, field, "PerPkg")) { addfield(map, &perpkg, "", "", val); + } else if (json_streq(map, field, "MetricName")) { + addfield(map, &metric_name, "", "", val); + } else if (json_streq(map, field, "MetricExpr")) { + addfield(map, &metric_expr, "", "", val); + for (s = metric_expr; *s; s++) + *s = tolower(*s); } /* ignore unknown fields */ } @@ -488,7 +504,7 @@ int json_events(const char *fn, fixname(name); err = func(data, name, real_event(name, event), desc, long_desc, - pmu, unit, perpkg); + pmu, unit, perpkg, metric_expr, metric_name); free(event); free(desc); free(name); @@ -498,6 +514,8 @@ int json_events(const char *fn, free(filter); free(perpkg); free(unit); + free(metric_expr); + free(metric_name); if (err) break; tok += j; diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 71e13de31092..611fac01913d 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -5,7 +5,8 @@ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, - char *unit, char *perpkg), + char *unit, char *perpkg, char *metric_expr, + char *metric_name), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index c669a3cdb9f0..569eab3688dd 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -13,6 +13,8 @@ struct pmu_event { const char *pmu; const char *unit; const char *perpkg; + const char *metric_expr; + const char *metric_name; }; /* diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 1cb3d9b540e9..af58ebc243ef 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -38,6 +38,7 @@ perf-y += cpumap.o perf-y += stat.o perf-y += event_update.o perf-y += event-times.o +perf-y += expr.o perf-y += backward-ring-buffer.o perf-y += sdt.o perf-y += is_printable_array.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 83c4669cbc5b..86822969e8a8 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -44,6 +44,10 @@ static struct test generic_tests[] = { .func = test__parse_events, }, { + .desc = "Simple expression parser", + .func = test__expr, + }, + { .desc = "PERF_RECORD_* events & perf_sample fields", .func = test__PERF_RECORD, }, diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index f168a85992d0..4478773cdb97 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -66,7 +66,7 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", map->nr == 2); TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1); TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256); - TEST_ASSERT_VAL("wrong refcnt", atomic_read(&map->refcnt) == 1); + TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1); cpu_map__put(map); return 0; } diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c new file mode 100644 index 000000000000..6c6a3749aaf6 --- /dev/null +++ b/tools/perf/tests/expr.c @@ -0,0 +1,56 @@ +#include "util/debug.h" +#include "util/expr.h" +#include "tests.h" +#include <stdlib.h> + +static int test(struct parse_ctx *ctx, const char *e, double val2) +{ + double val; + + if (expr__parse(&val, ctx, &e)) + TEST_ASSERT_VAL("parse test failed", 0); + TEST_ASSERT_VAL("unexpected value", val == val2); + return 0; +} + +int test__expr(int subtest __maybe_unused) +{ + const char *p; + const char **other; + double val; + int ret; + struct parse_ctx ctx; + int num_other; + + expr__ctx_init(&ctx); + expr__add_id(&ctx, "FOO", 1); + expr__add_id(&ctx, "BAR", 2); + + ret = test(&ctx, "1+1", 2); + ret |= test(&ctx, "FOO+BAR", 3); + ret |= test(&ctx, "(BAR/2)%2", 1); + ret |= test(&ctx, "1 - -4", 5); + ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); + + if (ret) + return ret; + + p = "FOO/0"; + ret = expr__parse(&val, &ctx, &p); + TEST_ASSERT_VAL("division by zero", ret == 1); + + p = "BAR/"; + ret = expr__parse(&val, &ctx, &p); + TEST_ASSERT_VAL("missing operand", ret == 1); + + TEST_ASSERT_VAL("find other", + expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0); + TEST_ASSERT_VAL("find other", num_other == 3); + TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR")); + TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); + TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); + TEST_ASSERT_VAL("find other", other[3] == NULL); + free((void *)other); + + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 1fa9b9d83aa5..631859629403 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -62,6 +62,7 @@ int test__sample_parsing(int subtest); int test__keep_tracking(int subtest); int test__parse_no_sample_id_all(int subtest); int test__dwarf_unwind(int subtest); +int test__expr(int subtest); int test__hists_filter(int subtest); int test__mmap_thread_lookup(int subtest); int test__thread_mg_share(int subtest); diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index f2d2e542d0ee..a63d6945807b 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -29,7 +29,7 @@ int test__thread_map(int subtest __maybe_unused) thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), NAME)); TEST_ASSERT_VAL("wrong refcnt", - atomic_read(&map->refcnt) == 1); + refcount_read(&map->refcnt) == 1); thread_map__put(map); /* test dummy pid */ @@ -44,7 +44,7 @@ int test__thread_map(int subtest __maybe_unused) thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), "dummy")); TEST_ASSERT_VAL("wrong refcnt", - atomic_read(&map->refcnt) == 1); + refcount_read(&map->refcnt) == 1); thread_map__put(map); return 0; } @@ -71,7 +71,7 @@ static int process_event(struct perf_tool *tool __maybe_unused, thread_map__comm(threads, 0) && !strcmp(thread_map__comm(threads, 0), NAME)); TEST_ASSERT_VAL("wrong refcnt", - atomic_read(&threads->refcnt) == 1); + refcount_read(&threads->refcnt) == 1); thread_map__put(threads); return 0; } diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c index 188b63140fc8..76686dd6f5ec 100644 --- a/tools/perf/tests/thread-mg-share.c +++ b/tools/perf/tests/thread-mg-share.c @@ -43,7 +43,7 @@ int test__thread_mg_share(int subtest __maybe_unused) leader && t1 && t2 && t3 && other); mg = leader->mg; - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 4); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 4); /* test the map groups pointer is shared */ TEST_ASSERT_VAL("map groups don't match", mg == t1->mg); @@ -71,25 +71,25 @@ int test__thread_mg_share(int subtest __maybe_unused) machine__remove_thread(machine, other_leader); other_mg = other->mg; - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 2); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 2); TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg); /* release thread group */ thread__put(leader); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 3); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 3); thread__put(t1); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 2); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 2); thread__put(t2); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 1); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 1); thread__put(t3); /* release other group */ thread__put(other_leader); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 1); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 1); thread__put(other); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index fc4fb669ceee..2dc82bec10c0 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2308,7 +2308,7 @@ static int switch_data_file(void) return ret; memset(options, 0, sizeof(options)); - memset(options, 0, sizeof(abs_path)); + memset(abs_path, 0, sizeof(abs_path)); while ((dent = readdir(pwd_dir))) { char path[PATH_MAX]; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 5da376bc1afc..2ae92da613dd 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -42,6 +42,7 @@ libperf-y += pstack.o libperf-y += session.o libperf-$(CONFIG_AUDIT) += syscalltbl.o libperf-y += ordered-events.o +libperf-y += namespaces.o libperf-y += comm.o libperf-y += thread.o libperf-y += thread_map.o @@ -81,6 +82,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o +libperf-y += dump-insn.o libperf-y += parse-regs-options.o libperf-y += term.o libperf-y += help-unknown-cmd.o @@ -88,6 +90,7 @@ libperf-y += mem-events.o libperf-y += vsprintf.o libperf-y += drv_configs.o libperf-y += time-utils.o +libperf-y += expr-bison.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o @@ -140,6 +143,10 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y $(call rule_mkdir) $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ +$(OUTPUT)util/expr-bison.c: util/expr.y + $(call rule_mkdir) + $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__ + $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(call rule_mkdir) $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l @@ -152,6 +159,7 @@ CFLAGS_parse-events-flex.o += -w CFLAGS_pmu-flex.o += -w CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w +CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 273f21fa32b5..22cd1dbe724b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1435,7 +1435,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na snprintf(command, sizeof(command), "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s -C %s 2>/dev/null|grep -v %s|expand", + " -l -d %s %s -C %s 2>/dev/null|grep -v %s:|expand", objdump_path ? objdump_path : "objdump", disassembler_style ? "-M " : "", disassembler_style ? disassembler_style : "", @@ -1482,6 +1482,12 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na nline = 0; while (!feof(file)) { + /* + * The source code line number (lineno) needs to be kept in + * accross calls to symbol__parse_objdump_line(), so that it + * can associate it with the instructions till the next one. + * See disasm_line__new() and struct disasm_line::line_nr. + */ if (symbol__parse_objdump_line(sym, map, arch, file, privsize, &lineno) < 0) break; diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index eafbf11442b2..86399eda3684 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -127,19 +127,19 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) goto found; n++; } - if (atomic_read(&cgrp->refcnt) == 0) + if (refcount_read(&cgrp->refcnt) == 0) free(cgrp); return -1; found: - atomic_inc(&cgrp->refcnt); + refcount_inc(&cgrp->refcnt); counter->cgrp = cgrp; return 0; } void close_cgroup(struct cgroup_sel *cgrp) { - if (cgrp && atomic_dec_and_test(&cgrp->refcnt)) { + if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) { close(cgrp->fd); zfree(&cgrp->name); free(cgrp); diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index 31f8dcdbd7ef..d91966b97cbd 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -1,14 +1,14 @@ #ifndef __CGROUP_H__ #define __CGROUP_H__ -#include <linux/atomic.h> +#include <linux/refcount.h> struct option; struct cgroup_sel { char *name; int fd; - atomic_t refcnt; + refcount_t refcnt; }; diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index d0d465953d36..94a5a7d829d5 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -3,10 +3,4 @@ unsigned long perf_event_open_cloexec_flag(void); -#ifdef __GLIBC_PREREQ -#if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) -int sched_getcpu(void) __THROW; -#endif -#endif - #endif /* __PERF_CLOEXEC_H */ diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 21b7ff382c3f..32837b6f7879 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -2,12 +2,12 @@ #include "util.h" #include <stdlib.h> #include <stdio.h> -#include <linux/atomic.h> +#include <linux/refcount.h> struct comm_str { char *str; struct rb_node rb_node; - atomic_t refcnt; + refcount_t refcnt; }; /* Should perhaps be moved to struct machine */ @@ -16,13 +16,13 @@ static struct rb_root comm_str_root; static struct comm_str *comm_str__get(struct comm_str *cs) { if (cs) - atomic_inc(&cs->refcnt); + refcount_inc(&cs->refcnt); return cs; } static void comm_str__put(struct comm_str *cs) { - if (cs && atomic_dec_and_test(&cs->refcnt)) { + if (cs && refcount_dec_and_test(&cs->refcnt)) { rb_erase(&cs->rb_node, &comm_str_root); zfree(&cs->str); free(cs); @@ -43,7 +43,7 @@ static struct comm_str *comm_str__alloc(const char *str) return NULL; } - atomic_set(&cs->refcnt, 0); + refcount_set(&cs->refcnt, 1); return cs; } @@ -61,7 +61,7 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) cmp = strcmp(str, iter->str); if (!cmp) - return iter; + return comm_str__get(iter); if (cmp < 0) p = &(*p)->rb_left; @@ -95,8 +95,6 @@ struct comm *comm__new(const char *str, u64 timestamp, bool exec) return NULL; } - comm_str__get(comm->comm_str); - return comm; } @@ -108,7 +106,6 @@ int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec) if (!new) return -ENOMEM; - comm_str__get(new); comm_str__put(old); comm->comm_str = new; comm->start = timestamp; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 8c7504939113..061018b42393 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -29,7 +29,7 @@ static struct cpu_map *cpu_map__default_new(void) cpus->map[i] = i; cpus->nr = nr_cpus; - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -43,7 +43,7 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) if (cpus != NULL) { cpus->nr = nr_cpus; memcpy(cpus->map, tmp_cpus, payload_size); - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -252,7 +252,7 @@ struct cpu_map *cpu_map__dummy_new(void) if (cpus != NULL) { cpus->nr = 1; cpus->map[0] = -1; - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -269,7 +269,7 @@ struct cpu_map *cpu_map__empty_new(int nr) for (i = 0; i < nr; i++) cpus->map[i] = -1; - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -278,7 +278,7 @@ struct cpu_map *cpu_map__empty_new(int nr) static void cpu_map__delete(struct cpu_map *map) { if (map) { - WARN_ONCE(atomic_read(&map->refcnt) != 0, + WARN_ONCE(refcount_read(&map->refcnt) != 0, "cpu_map refcnt unbalanced\n"); free(map); } @@ -287,13 +287,13 @@ static void cpu_map__delete(struct cpu_map *map) struct cpu_map *cpu_map__get(struct cpu_map *map) { if (map) - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); return map; } void cpu_map__put(struct cpu_map *map) { - if (map && atomic_dec_and_test(&map->refcnt)) + if (map && refcount_dec_and_test(&map->refcnt)) cpu_map__delete(map); } @@ -357,7 +357,7 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(int), cmp_ids); - atomic_set(&c->refcnt, 1); + refcount_set(&c->refcnt, 1); *res = c; return 0; } @@ -673,3 +673,49 @@ size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size) pr_debug("cpumask list: %s\n", buf); return ret; } + +static char hex_char(unsigned char val) +{ + if (val < 10) + return val + '0'; + if (val < 16) + return val - 10 + 'a'; + return '?'; +} + +size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size) +{ + int i, cpu; + char *ptr = buf; + unsigned char *bitmap; + int last_cpu = cpu_map__cpu(map, map->nr - 1); + + bitmap = zalloc((last_cpu + 7) / 8); + if (bitmap == NULL) { + buf[0] = '\0'; + return 0; + } + + for (i = 0; i < map->nr; i++) { + cpu = cpu_map__cpu(map, i); + bitmap[cpu / 8] |= 1 << (cpu % 8); + } + + for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) { + unsigned char bits = bitmap[cpu / 8]; + + if (cpu % 8) + bits >>= 4; + else + bits &= 0xf; + + *ptr++ = hex_char(bits); + if ((cpu % 32) == 0 && cpu > 0) + *ptr++ = ','; + } + *ptr = '\0'; + free(bitmap); + + buf[size - 1] = '\0'; + return ptr - buf; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 1a0549af8f5c..6b8bff87481d 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -3,13 +3,13 @@ #include <stdio.h> #include <stdbool.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include "perf.h" #include "util/debug.h" struct cpu_map { - atomic_t refcnt; + refcount_t refcnt; int nr; int map[]; }; @@ -20,6 +20,7 @@ struct cpu_map *cpu_map__dummy_new(void); struct cpu_map *cpu_map__new_data(struct cpu_map_data *data); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size); +size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 4e6cbc99f08e..89ece2445713 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1468,6 +1468,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, .lost = perf_event__process_lost, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, + .namespaces = perf_event__process_namespaces, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d38b62a700ca..42db00d78573 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1109,7 +1109,7 @@ struct dso *dso__new(const char *name) INIT_LIST_HEAD(&dso->node); INIT_LIST_HEAD(&dso->data.open_entry); pthread_mutex_init(&dso->lock, NULL); - atomic_set(&dso->refcnt, 1); + refcount_set(&dso->refcnt, 1); } return dso; @@ -1147,13 +1147,13 @@ void dso__delete(struct dso *dso) struct dso *dso__get(struct dso *dso) { if (dso) - atomic_inc(&dso->refcnt); + refcount_inc(&dso->refcnt); return dso; } void dso__put(struct dso *dso) { - if (dso && atomic_dec_and_test(&dso->refcnt)) + if (dso && refcount_dec_and_test(&dso->refcnt)) dso__delete(dso); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index ecc4bbd3f82e..12350b171727 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -1,7 +1,7 @@ #ifndef __PERF_DSO #define __PERF_DSO -#include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/types.h> #include <linux/rbtree.h> #include <sys/types.h> @@ -187,7 +187,7 @@ struct dso { void *priv; u64 db_id; }; - atomic_t refcnt; + refcount_t refcnt; char name[0]; }; diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c new file mode 100644 index 000000000000..ffbdb19f05d0 --- /dev/null +++ b/tools/perf/util/dump-insn.c @@ -0,0 +1,14 @@ +#include <linux/compiler.h> +#include "dump-insn.h" + +/* Fallback code */ + +__weak +const char *dump_insn(struct perf_insn *x __maybe_unused, + u64 ip __maybe_unused, u8 *inbuf __maybe_unused, + int inlen __maybe_unused, int *lenp) +{ + if (lenp) + *lenp = 0; + return "?"; +} diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h new file mode 100644 index 000000000000..90fb115981cf --- /dev/null +++ b/tools/perf/util/dump-insn.h @@ -0,0 +1,22 @@ +#ifndef __PERF_DUMP_INSN_H +#define __PERF_DUMP_INSN_H 1 + +#define MAXINSN 15 + +#include <linux/types.h> + +struct thread; + +struct perf_insn { + /* Initialized by callers: */ + struct thread *thread; + u8 cpumode; + bool is64bit; + int cpu; + /* Temporary */ + char out[256]; +}; + +const char *dump_insn(struct perf_insn *x, u64 ip, + u8 *inbuf, int inlen, int *lenp); +#endif diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4ea7ce72ed9c..76b9c6bc8369 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -31,6 +31,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", [PERF_RECORD_SWITCH] = "SWITCH", [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", + [PERF_RECORD_NAMESPACES] = "NAMESPACES", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -49,6 +50,16 @@ static const char *perf_event__names[] = { [PERF_RECORD_TIME_CONV] = "TIME_CONV", }; +static const char *perf_ns__names[] = { + [NET_NS_INDEX] = "net", + [UTS_NS_INDEX] = "uts", + [IPC_NS_INDEX] = "ipc", + [PID_NS_INDEX] = "pid", + [USER_NS_INDEX] = "user", + [MNT_NS_INDEX] = "mnt", + [CGROUP_NS_INDEX] = "cgroup", +}; + const char *perf_event__name(unsigned int id) { if (id >= ARRAY_SIZE(perf_event__names)) @@ -58,6 +69,13 @@ const char *perf_event__name(unsigned int id) return perf_event__names[id]; } +static const char *perf_ns__name(unsigned int id) +{ + if (id >= ARRAY_SIZE(perf_ns__names)) + return "UNKNOWN"; + return perf_ns__names[id]; +} + static int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, @@ -203,6 +221,58 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, return tgid; } +static void perf_event__get_ns_link_info(pid_t pid, const char *ns, + struct perf_ns_link_info *ns_link_info) +{ + struct stat64 st; + char proc_ns[128]; + + sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns); + if (stat64(proc_ns, &st) == 0) { + ns_link_info->dev = st.st_dev; + ns_link_info->ino = st.st_ino; + } +} + +int perf_event__synthesize_namespaces(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine) +{ + u32 idx; + struct perf_ns_link_info *ns_link_info; + + if (!tool || !tool->namespace_events) + return 0; + + memset(&event->namespaces, 0, (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size)); + + event->namespaces.pid = tgid; + event->namespaces.tid = pid; + + event->namespaces.nr_namespaces = NR_NAMESPACES; + + ns_link_info = event->namespaces.link_info; + + for (idx = 0; idx < event->namespaces.nr_namespaces; idx++) + perf_event__get_ns_link_info(pid, perf_ns__name(idx), + &ns_link_info[idx]); + + event->namespaces.header.type = PERF_RECORD_NAMESPACES; + + event->namespaces.header.size = (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) + return -1; + + return 0; +} + static int perf_event__synthesize_fork(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, pid_t ppid, @@ -255,8 +325,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, if (machine__is_default_guest(machine)) return 0; - snprintf(filename, sizeof(filename), "%s/proc/%d/maps", - machine->root_dir, pid); + snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", + machine->root_dir, pid, pid); fp = fopen(filename, "r"); if (fp == NULL) { @@ -434,8 +504,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool, static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *mmap_event, union perf_event *fork_event, + union perf_event *namespaces_event, pid_t pid, int full, - perf_event__handler_t process, + perf_event__handler_t process, struct perf_tool *tool, struct machine *machine, bool mmap_data, @@ -455,6 +526,11 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (tgid == -1) return -1; + if (perf_event__synthesize_namespaces(tool, namespaces_event, pid, + tgid, process, machine) < 0) + return -1; + + return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data, proc_map_timeout); @@ -488,6 +564,11 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, ppid, process, machine) < 0) break; + + if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid, + tgid, process, machine) < 0) + break; + /* * Send the prepared comm event */ @@ -516,6 +597,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, unsigned int proc_map_timeout) { union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; int err = -1, thread, j; comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); @@ -530,10 +612,16 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, if (fork_event == NULL) goto out_free_mmap; + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + err = 0; for (thread = 0; thread < threads->nr; ++thread) { if (__event__synthesize_thread(comm_event, mmap_event, - fork_event, + fork_event, namespaces_event, thread_map__pid(threads, thread), 0, process, tool, machine, mmap_data, proc_map_timeout)) { @@ -559,7 +647,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, /* if not, generate events for it */ if (need_leader && __event__synthesize_thread(comm_event, mmap_event, - fork_event, + fork_event, namespaces_event, comm_event->comm.pid, 0, process, tool, machine, mmap_data, proc_map_timeout)) { @@ -568,6 +656,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, } } } + free(namespaces_event); +out_free_fork: free(fork_event); out_free_mmap: free(mmap_event); @@ -587,6 +677,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, char proc_path[PATH_MAX]; struct dirent *dirent; union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; int err = -1; if (machine__is_default_guest(machine)) @@ -604,11 +695,17 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (fork_event == NULL) goto out_free_mmap; + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); proc = opendir(proc_path); if (proc == NULL) - goto out_free_fork; + goto out_free_namespaces; while ((dirent = readdir(proc)) != NULL) { char *end; @@ -620,13 +717,16 @@ int perf_event__synthesize_threads(struct perf_tool *tool, * We may race with exiting thread, so don't stop just because * one thread couldn't be synthesized. */ - __event__synthesize_thread(comm_event, mmap_event, fork_event, pid, - 1, process, tool, machine, mmap_data, + __event__synthesize_thread(comm_event, mmap_event, fork_event, + namespaces_event, pid, 1, process, + tool, machine, mmap_data, proc_map_timeout); } err = 0; closedir(proc); +out_free_namespaces: + free(namespaces_event); out_free_fork: free(fork_event); out_free_mmap: @@ -1008,6 +1108,33 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid); } +size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp) +{ + size_t ret = 0; + struct perf_ns_link_info *ns_link_info; + u32 nr_namespaces, idx; + + ns_link_info = event->namespaces.link_info; + nr_namespaces = event->namespaces.nr_namespaces; + + ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[", + event->namespaces.pid, + event->namespaces.tid, + nr_namespaces); + + for (idx = 0; idx < nr_namespaces; idx++) { + if (idx && (idx % 4 == 0)) + ret += fprintf(fp, "\n\t\t "); + + ret += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx, + perf_ns__name(idx), (u64)ns_link_info[idx].dev, + (u64)ns_link_info[idx].ino, + ((idx + 1) != nr_namespaces) ? ", " : "]\n"); + } + + return ret; +} + int perf_event__process_comm(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -1016,6 +1143,14 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused, return machine__process_comm_event(machine, event, sample); } +int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_namespaces_event(machine, event, sample); +} + int perf_event__process_lost(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -1153,11 +1288,12 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused, size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp) { - return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s]\n", + return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s%s]\n", event->aux.aux_offset, event->aux.aux_size, event->aux.flags, event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "", - event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : ""); + event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "", + event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : ""); } size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) @@ -1196,6 +1332,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_MMAP: ret += perf_event__fprintf_mmap(event, fp); break; + case PERF_RECORD_NAMESPACES: + ret += perf_event__fprintf_namespaces(event, fp); + break; case PERF_RECORD_MMAP2: ret += perf_event__fprintf_mmap2(event, fp); break; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c735c53a26f8..eb7a7b200737 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -39,6 +39,13 @@ struct comm_event { char comm[16]; }; +struct namespaces_event { + struct perf_event_header header; + u32 pid, tid; + u64 nr_namespaces; + struct perf_ns_link_info link_info[]; +}; + struct fork_event { struct perf_event_header header; u32 pid, ppid; @@ -269,6 +276,7 @@ struct events_stats { u64 total_lost; u64 total_lost_samples; u64 total_aux_lost; + u64 total_aux_partial; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_non_filtered_samples; @@ -485,6 +493,7 @@ union perf_event { struct mmap_event mmap; struct mmap2_event mmap2; struct comm_event comm; + struct namespaces_event namespaces; struct fork_event fork; struct lost_event lost; struct lost_samples_event lost_samples; @@ -587,6 +596,10 @@ int perf_event__process_switch(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_namespaces(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -636,6 +649,12 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_namespaces(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine); + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -653,6 +672,7 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); u64 kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b601f2814a30..50420cd35446 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -777,7 +777,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messu /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!atomic_read(&md->refcnt)) + if (!refcount_read(&md->refcnt)) return NULL; head = perf_mmap__read_head(md); @@ -794,7 +794,7 @@ perf_mmap__read_backward(struct perf_mmap *md) /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!atomic_read(&md->refcnt)) + if (!refcount_read(&md->refcnt)) return NULL; head = perf_mmap__read_head(md); @@ -856,7 +856,7 @@ void perf_mmap__read_catchup(struct perf_mmap *md) { u64 head; - if (!atomic_read(&md->refcnt)) + if (!refcount_read(&md->refcnt)) return; head = perf_mmap__read_head(md); @@ -875,14 +875,14 @@ static bool perf_mmap__empty(struct perf_mmap *md) static void perf_mmap__get(struct perf_mmap *map) { - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); } static void perf_mmap__put(struct perf_mmap *md) { - BUG_ON(md->base && atomic_read(&md->refcnt) == 0); + BUG_ON(md->base && refcount_read(&md->refcnt) == 0); - if (atomic_dec_and_test(&md->refcnt)) + if (refcount_dec_and_test(&md->refcnt)) perf_mmap__munmap(md); } @@ -894,7 +894,7 @@ void perf_mmap__consume(struct perf_mmap *md, bool overwrite) perf_mmap__write_tail(md, old); } - if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) + if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md)) perf_mmap__put(md); } @@ -937,7 +937,7 @@ static void perf_mmap__munmap(struct perf_mmap *map) munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; map->fd = -1; - atomic_set(&map->refcnt, 0); + refcount_set(&map->refcnt, 0); } auxtrace_mmap__munmap(&map->auxtrace_mmap); } @@ -974,8 +974,19 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) if (!map) return NULL; - for (i = 0; i < evlist->nr_mmaps; i++) + for (i = 0; i < evlist->nr_mmaps; i++) { map[i].fd = -1; + /* + * When the perf_mmap() call is made we grab one refcount, plus + * one extra to let perf_evlist__mmap_consume() get the last + * events after all real references (perf_mmap__get()) are + * dropped. + * + * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and + * thus does perf_mmap__get() on it. + */ + refcount_set(&map[i].refcnt, 0); + } return map; } @@ -1001,7 +1012,7 @@ static int perf_mmap__mmap(struct perf_mmap *map, * evlist layer can't just drop it when filtering events in * perf_evlist__filter_pollfd(). */ - atomic_set(&map->refcnt, 2); + refcount_set(&map->refcnt, 2); map->prev = 0; map->mask = mp->mask; map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 389b9ccdf8c7..39942995f537 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -1,7 +1,7 @@ #ifndef __PERF_EVLIST_H #define __PERF_EVLIST_H 1 -#include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/list.h> #include <api/fd/array.h> #include <stdio.h> @@ -29,7 +29,7 @@ struct perf_mmap { void *base; int mask; int fd; - atomic_t refcnt; + refcount_t refcnt; u64 prev; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ac59710b79e0..9dc7e2d6e48a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -236,6 +236,10 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); perf_evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; + evsel->metric_expr = NULL; + evsel->metric_name = NULL; + evsel->metric_events = NULL; + evsel->collect_stat = false; } struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -932,6 +936,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + if (opts->record_namespaces) + attr->namespaces = track; + if (opts->record_switch_events) attr->context_switch = track; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 06ef6f29efa1..d101695c482c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -131,6 +131,11 @@ struct perf_evsel { bool cmdline_group_boundary; struct list_head config_terms; int bpf_fd; + bool merged_stat; + const char * metric_expr; + const char * metric_name; + struct perf_evsel **metric_events; + bool collect_stat; }; union u64_swap { diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h new file mode 100644 index 000000000000..9c2760a1a96e --- /dev/null +++ b/tools/perf/util/expr.h @@ -0,0 +1,25 @@ +#ifndef PARSE_CTX_H +#define PARSE_CTX_H 1 + +#define EXPR_MAX_OTHER 8 +#define MAX_PARSE_ID EXPR_MAX_OTHER + +struct parse_id { + const char *name; + double val; +}; + +struct parse_ctx { + int num_ids; + struct parse_id ids[MAX_PARSE_ID]; +}; + +void expr__ctx_init(struct parse_ctx *ctx); +void expr__add_id(struct parse_ctx *ctx, const char *id, double val); +#ifndef IN_EXPR_Y +int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); +#endif +int expr__find_other(const char *p, const char *one, const char ***other, + int *num_other); + +#endif diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y new file mode 100644 index 000000000000..954556bea36e --- /dev/null +++ b/tools/perf/util/expr.y @@ -0,0 +1,173 @@ +/* Simple expression parser */ +%{ +#include "util.h" +#include "util/debug.h" +#define IN_EXPR_Y 1 +#include "expr.h" +#include <string.h> + +#define MAXIDLEN 256 +%} + +%pure-parser +%parse-param { double *final_val } +%parse-param { struct parse_ctx *ctx } +%parse-param { const char **pp } +%lex-param { const char **pp } + +%union { + double num; + char id[MAXIDLEN+1]; +} + +%token <num> NUMBER +%token <id> ID +%left '|' +%left '^' +%left '&' +%left '-' '+' +%left '*' '/' '%' +%left NEG NOT +%type <num> expr + +%{ +static int expr__lex(YYSTYPE *res, const char **pp); + +static void expr__error(double *final_val __maybe_unused, + struct parse_ctx *ctx __maybe_unused, + const char **pp __maybe_unused, + const char *s) +{ + pr_debug("%s\n", s); +} + +static int lookup_id(struct parse_ctx *ctx, char *id, double *val) +{ + int i; + + for (i = 0; i < ctx->num_ids; i++) { + if (!strcasecmp(ctx->ids[i].name, id)) { + *val = ctx->ids[i].val; + return 0; + } + } + return -1; +} + +%} +%% + +all_expr: expr { *final_val = $1; } + ; + +expr: NUMBER + | ID { if (lookup_id(ctx, $1, &$$) < 0) { + pr_debug("%s not found", $1); + YYABORT; + } + } + | expr '+' expr { $$ = $1 + $3; } + | expr '-' expr { $$ = $1 - $3; } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; } + | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; } + | '-' expr %prec NEG { $$ = -$2; } + | '(' expr ')' { $$ = $2; } + ; + +%% + +static int expr__symbol(YYSTYPE *res, const char *p, const char **pp) +{ + char *dst = res->id; + const char *s = p; + + while (isalnum(*p) || *p == '_' || *p == '.') { + if (p - s >= MAXIDLEN) + return -1; + *dst++ = *p++; + } + *dst = 0; + *pp = p; + return ID; +} + +static int expr__lex(YYSTYPE *res, const char **pp) +{ + int tok; + const char *s; + const char *p = *pp; + + while (isspace(*p)) + p++; + s = p; + switch (*p++) { + case 'a' ... 'z': + case 'A' ... 'Z': + return expr__symbol(res, p - 1, pp); + case '0' ... '9': case '.': + res->num = strtod(s, (char **)&p); + tok = NUMBER; + break; + default: + tok = *s; + break; + } + *pp = p; + return tok; +} + +/* Caller must make sure id is allocated */ +void expr__add_id(struct parse_ctx *ctx, const char *name, double val) +{ + int idx; + assert(ctx->num_ids < MAX_PARSE_ID); + idx = ctx->num_ids++; + ctx->ids[idx].name = name; + ctx->ids[idx].val = val; +} + +void expr__ctx_init(struct parse_ctx *ctx) +{ + ctx->num_ids = 0; +} + +int expr__find_other(const char *p, const char *one, const char ***other, + int *num_otherp) +{ + const char *orig = p; + int err = -1; + int num_other; + + *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *)); + if (!*other) + return -1; + + num_other = 0; + for (;;) { + YYSTYPE val; + int tok = expr__lex(&val, &p); + if (tok == 0) { + err = 0; + break; + } + if (tok == ID && strcasecmp(one, val.id)) { + if (num_other >= EXPR_MAX_OTHER - 1) { + pr_debug("Too many extra events in %s\n", orig); + break; + } + (*other)[num_other] = strdup(val.id); + if (!(*other)[num_other]) + return -1; + num_other++; + } + } + (*other)[num_other] = NULL; + *num_otherp = num_other; + if (err) { + *num_otherp = 0; + free(*other); + *other = NULL; + } + return err; +} diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index eaf72a938fb4..e3b38f629504 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -3,6 +3,7 @@ #include "hist.h" #include "map.h" #include "session.h" +#include "namespaces.h" #include "sort.h" #include "evlist.h" #include "evsel.h" @@ -169,6 +170,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); } + hists__new_col_len(hists, HISTC_CGROUP_ID, 20); hists__new_col_len(hists, HISTC_CPU, 3); hists__new_col_len(hists, HISTC_SOCKET, 6); hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); @@ -574,9 +576,14 @@ __hists__add_entry(struct hists *hists, bool sample_self, struct hist_entry_ops *ops) { + struct namespaces *ns = thread__namespaces(al->thread); struct hist_entry entry = { .thread = al->thread, .comm = thread__comm(al->thread), + .cgroup_id = { + .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0, + .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0, + }, .ms = { .map = al->map, .sym = al->sym, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 28c216e3d5b7..ee3670a388df 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -30,6 +30,7 @@ enum hist_column { HISTC_DSO, HISTC_THREAD, HISTC_COMM, + HISTC_CGROUP_ID, HISTC_PARENT, HISTC_CPU, HISTC_SOCKET, @@ -57,6 +58,7 @@ enum hist_column { HISTC_SRCLINE_FROM, HISTC_SRCLINE_TO, HISTC_TRACE, + HISTC_SYM_SIZE, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 7913363bde5c..54818828023b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -26,12 +26,13 @@ #include "insn.c" #include "intel-pt-insn-decoder.h" +#include "dump-insn.h" #if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN #error Instruction buffer size too small #endif -/* Based on branch_type() from perf_event_intel_lbr.c */ +/* Based on branch_type() from arch/x86/events/intel/lbr.c */ static void intel_pt_insn_decoder(struct insn *insn, struct intel_pt_insn *intel_pt_insn) { @@ -39,6 +40,8 @@ static void intel_pt_insn_decoder(struct insn *insn, enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH; int ext; + intel_pt_insn->rel = 0; + if (insn_is_avx(insn)) { intel_pt_insn->op = INTEL_PT_OP_OTHER; intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; @@ -177,6 +180,29 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, return 0; } +const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused, + u8 *inbuf, int inlen, int *lenp) +{ + struct insn insn; + int n, i; + int left; + + insn_init(&insn, inbuf, inlen, x->is64bit); + insn_get_length(&insn); + if (!insn_complete(&insn) || insn.length > inlen) + return "<bad>"; + if (lenp) + *lenp = insn.length; + left = sizeof(x->out); + n = snprintf(x->out, left, "insn: "); + left -= n; + for (i = 0; i < insn.length; i++) { + n += snprintf(x->out + n, left, "%02x ", inbuf[i]); + left -= n; + } + return x->out; +} + const char *branch_name[] = { [INTEL_PT_OP_OTHER] = "Other", [INTEL_PT_OP_CALL] = "Call", diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 71c9720d4973..dfc600446586 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -13,6 +13,7 @@ #include <symbol/kallsyms.h> #include "unwind.h" #include "linux/hash.h" +#include "asm/bug.h" static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); @@ -501,6 +502,37 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event return err; } +int machine__process_namespaces_event(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct thread *thread = machine__findnew_thread(machine, + event->namespaces.pid, + event->namespaces.tid); + int err = 0; + + WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES, + "\nWARNING: kernel seems to support more namespaces than perf" + " tool.\nTry updating the perf tool..\n\n"); + + WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES, + "\nWARNING: perf tool seems to support more namespaces than" + " the kernel.\nTry updating the kernel..\n\n"); + + if (dump_trace) + perf_event__fprintf_namespaces(event, stdout); + + if (thread == NULL || + thread__set_namespaces(thread, sample->time, &event->namespaces)) { + dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n"); + err = -1; + } + + thread__put(thread); + + return err; +} + int machine__process_lost_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused) { @@ -1439,7 +1471,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, if (machine->last_match == th) machine->last_match = NULL; - BUG_ON(atomic_read(&th->refcnt) == 0); + BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) pthread_rwlock_wrlock(&machine->threads_lock); rb_erase_init(&th->rb_node, &machine->threads); @@ -1538,6 +1570,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_comm_event(machine, event, sample); break; case PERF_RECORD_MMAP: ret = machine__process_mmap_event(machine, event, sample); break; + case PERF_RECORD_NAMESPACES: + ret = machine__process_namespaces_event(machine, event, sample); break; case PERF_RECORD_MMAP2: ret = machine__process_mmap2_event(machine, event, sample); break; case PERF_RECORD_FORK: diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index a28305029711..3cdb1340f917 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -97,6 +97,9 @@ int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); int machine__process_switch_event(struct machine *machine, union perf_event *event); +int machine__process_namespaces_event(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 0a943e7b1ea7..1d9ebcf9e38e 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -141,7 +141,7 @@ void map__init(struct map *map, enum map_type type, RB_CLEAR_NODE(&map->rb_node); map->groups = NULL; map->erange_warned = false; - atomic_set(&map->refcnt, 1); + refcount_set(&map->refcnt, 1); } struct map *map__new(struct machine *machine, u64 start, u64 len, @@ -255,7 +255,7 @@ void map__delete(struct map *map) void map__put(struct map *map) { - if (map && atomic_dec_and_test(&map->refcnt)) + if (map && refcount_dec_and_test(&map->refcnt)) map__delete(map); } @@ -354,7 +354,7 @@ struct map *map__clone(struct map *from) struct map *map = memdup(from, sizeof(*map)); if (map != NULL) { - atomic_set(&map->refcnt, 1); + refcount_set(&map->refcnt, 1); RB_CLEAR_NODE(&map->rb_node); dso__get(map->dso); map->groups = NULL; @@ -485,7 +485,7 @@ void map_groups__init(struct map_groups *mg, struct machine *machine) maps__init(&mg->maps[i]); } mg->machine = machine; - atomic_set(&mg->refcnt, 1); + refcount_set(&mg->refcnt, 1); } static void __maps__purge(struct maps *maps) @@ -547,7 +547,7 @@ void map_groups__delete(struct map_groups *mg) void map_groups__put(struct map_groups *mg) { - if (mg && atomic_dec_and_test(&mg->refcnt)) + if (mg && refcount_dec_and_test(&mg->refcnt)) map_groups__delete(mg); } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index abdacf800c98..c8a5a644c0a9 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -1,7 +1,7 @@ #ifndef __PERF_MAP_H #define __PERF_MAP_H -#include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/compiler.h> #include <linux/list.h> #include <linux/rbtree.h> @@ -51,7 +51,7 @@ struct map { struct dso *dso; struct map_groups *groups; - atomic_t refcnt; + refcount_t refcnt; }; struct kmap { @@ -67,7 +67,7 @@ struct maps { struct map_groups { struct maps maps[MAP__NR_TYPES]; struct machine *machine; - atomic_t refcnt; + refcount_t refcnt; }; struct map_groups *map_groups__new(struct machine *machine); @@ -77,7 +77,7 @@ bool map_groups__empty(struct map_groups *mg); static inline struct map_groups *map_groups__get(struct map_groups *mg) { if (mg) - atomic_inc(&mg->refcnt); + refcount_inc(&mg->refcnt); return mg; } @@ -150,7 +150,7 @@ struct map *map__clone(struct map *map); static inline struct map *map__get(struct map *map) { if (map) - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); return map; } diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c new file mode 100644 index 000000000000..2de8da64d90c --- /dev/null +++ b/tools/perf/util/namespaces.c @@ -0,0 +1,36 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2017 Hari Bathini, IBM Corporation + */ + +#include "namespaces.h" +#include "util.h" +#include "event.h" +#include <stdlib.h> +#include <stdio.h> + +struct namespaces *namespaces__new(struct namespaces_event *event) +{ + struct namespaces *namespaces; + u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) * + sizeof(struct perf_ns_link_info)); + + namespaces = zalloc(sizeof(struct namespaces) + link_info_size); + if (!namespaces) + return NULL; + + namespaces->end_time = -1; + + if (event) + memcpy(namespaces->link_info, event->link_info, link_info_size); + + return namespaces; +} + +void namespaces__free(struct namespaces *namespaces) +{ + free(namespaces); +} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h new file mode 100644 index 000000000000..468f1e9a1484 --- /dev/null +++ b/tools/perf/util/namespaces.h @@ -0,0 +1,26 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2017 Hari Bathini, IBM Corporation + */ + +#ifndef __PERF_NAMESPACES_H +#define __PERF_NAMESPACES_H + +#include "../perf.h" +#include <linux/list.h> + +struct namespaces_event; + +struct namespaces { + struct list_head list; + u64 end_time; + struct perf_ns_link_info link_info[]; +}; + +struct namespaces *namespaces__new(struct namespaces_event *event); +void namespaces__free(struct namespaces *namespaces); + +#endif /* __PERF_NAMESPACES_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 67a8aebc67ab..6b498aea9fde 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -316,8 +316,9 @@ __add_event(struct list_head *list, int *idx, return NULL; (*idx)++; - evsel->cpus = cpu_map__get(cpus); - evsel->own_cpus = cpu_map__get(cpus); + evsel->cpus = cpu_map__get(cpus); + evsel->own_cpus = cpu_map__get(cpus); + evsel->system_wide = !!cpus; if (name) evsel->name = strdup(name); @@ -1254,11 +1255,59 @@ int parse_events_add_pmu(struct parse_events_evlist *data, evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; + evsel->metric_expr = info.metric_expr; + evsel->metric_name = info.metric_name; } return evsel ? 0 : -ENOMEM; } +int parse_events_multi_pmu_add(struct parse_events_evlist *data, + char *str, struct list_head **listp) +{ + struct list_head *head; + struct parse_events_term *term; + struct list_head *list; + struct perf_pmu *pmu = NULL; + int ok = 0; + + *listp = NULL; + /* Add it for all PMUs that support the alias */ + list = malloc(sizeof(struct list_head)); + if (!list) + return -1; + INIT_LIST_HEAD(list); + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + struct perf_pmu_alias *alias; + + list_for_each_entry(alias, &pmu->aliases, list) { + if (!strcasecmp(alias->name, str)) { + head = malloc(sizeof(struct list_head)); + if (!head) + return -1; + INIT_LIST_HEAD(head); + if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + str, 1, false, &str, NULL) < 0) + return -1; + list_add_tail(&term->list, head); + + if (!parse_events_add_pmu(data, list, + pmu->name, head)) { + pr_debug("%s -> %s/%s/\n", str, + pmu->name, alias->str); + ok++; + } + + parse_events_terms__delete(head); + } + } + } + if (!ok) + return -1; + *listp = list; + return 0; +} + int parse_events__modifier_group(struct list_head *list, char *event_mod) { @@ -2276,7 +2325,7 @@ out_enomem: * Print the help text for the event symbols: */ void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc) + bool long_desc, bool details_flag) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -2286,7 +2335,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only, quiet_flag, long_desc); + print_pmu_events(event_glob, name_only, quiet_flag, long_desc, + details_flag); if (event_glob != NULL) return; @@ -2415,6 +2465,31 @@ int parse_events_term__clone(struct parse_events_term **new, return new_term(new, &temp, term->val.str, term->val.num); } +int parse_events_copy_term_list(struct list_head *old, + struct list_head **new) +{ + struct parse_events_term *term, *n; + int ret; + + if (!old) { + *new = NULL; + return 0; + } + + *new = malloc(sizeof(struct list_head)); + if (!*new) + return -ENOMEM; + INIT_LIST_HEAD(*new); + + list_for_each_entry (term, old, list) { + ret = parse_events_term__clone(&n, term); + if (ret) + return ret; + list_add_tail(&n->list, *new); + } + return 0; +} + void parse_events_terms__purge(struct list_head *terms) { struct parse_events_term *term, *h; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1af6a267c21b..a235f4d6d5e5 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -8,6 +8,7 @@ #include <stdbool.h> #include <linux/types.h> #include <linux/perf_event.h> +#include <string.h> struct list_head; struct perf_evsel; @@ -166,6 +167,14 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, int parse_events_add_pmu(struct parse_events_evlist *data, struct list_head *list, char *name, struct list_head *head_config); + +int parse_events_multi_pmu_add(struct parse_events_evlist *data, + char *str, + struct list_head **listp); + +int parse_events_copy_term_list(struct list_head *old, + struct list_head **new); + enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name); void parse_events__set_leader(char *name, struct list_head *list); @@ -175,7 +184,7 @@ void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc); + bool long_desc, bool details_flag); struct event_symbol { const char *symbol; @@ -196,4 +205,23 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); +#ifdef HAVE_LIBELF_SUPPORT +/* + * If the probe point starts with '%', + * or starts with "sdt_" and has a ':' but no '=', + * then it should be a SDT/cached probe point. + */ +static inline bool is_sdt_event(char *str) +{ + return (str[0] == '%' || + (!strncmp(str, "sdt_", 4) && + !!strchr(str, ':') && !strchr(str, '='))); +} +#else +static inline bool is_sdt_event(char *str __maybe_unused) +{ + return false; +} +#endif /* HAVE_LIBELF_SUPPORT */ + #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 30f018ea1370..04fd8c9af9f9 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -226,68 +226,55 @@ event_pmu: PE_NAME opt_event_config { struct parse_events_evlist *data = _data; - struct list_head *list; + struct list_head *list, *orig_terms, *terms; + + if (parse_events_copy_term_list($2, &orig_terms)) + YYABORT; ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, $2)); + if (parse_events_add_pmu(data, list, $1, $2)) { + struct perf_pmu *pmu = NULL; + int ok = 0; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + char *name = pmu->name; + + if (!strncmp(name, "uncore_", 7) && + strncmp($1, "uncore_", 7)) + name += 7; + if (!strncmp($1, name, strlen($1))) { + if (parse_events_copy_term_list(orig_terms, &terms)) + YYABORT; + if (!parse_events_add_pmu(data, list, pmu->name, terms)) + ok++; + parse_events_terms__delete(terms); + } + } + if (!ok) + YYABORT; + } parse_events_terms__delete($2); + parse_events_terms__delete(orig_terms); $$ = list; } | PE_KERNEL_PMU_EVENT sep_dc { - struct parse_events_evlist *data = _data; - struct list_head *head; - struct parse_events_term *term; struct list_head *list; - struct perf_pmu *pmu = NULL; - int ok = 0; - - /* Add it for all PMUs that support the alias */ - ALLOC_LIST(list); - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - struct perf_pmu_alias *alias; - - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcasecmp(alias->name, $1)) { - ALLOC_LIST(head); - ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, 1, false, &@1, NULL)); - list_add_tail(&term->list, head); - - if (!parse_events_add_pmu(data, list, - pmu->name, head)) { - pr_debug("%s -> %s/%s/\n", $1, - pmu->name, alias->str); - ok++; - } - parse_events_terms__delete(head); - } - } - } - if (!ok) + if (parse_events_multi_pmu_add(_data, $1, &list) < 0) YYABORT; $$ = list; } | PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc { - struct parse_events_evlist *data = _data; - struct list_head *head; - struct parse_events_term *term; struct list_head *list; char pmu_name[128]; - snprintf(&pmu_name, 128, "%s-%s", $1, $3); - ALLOC_LIST(head); - ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - &pmu_name, 1, false, &@1, NULL)); - list_add_tail(&term->list, head); - - ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events_terms__delete(head); + snprintf(&pmu_name, 128, "%s-%s", $1, $3); + if (parse_events_multi_pmu_add(_data, pmu_name, &list) < 0) + YYABORT; $$ = list; } diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index c4023f22f287..a37e5934aa2a 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -6,6 +6,12 @@ const struct sample_reg __weak sample_reg_masks[] = { SMPL_REG_END }; +int __weak sdt_rename_register(char **pdesc __maybe_unused, + char *old_name __maybe_unused) +{ + return 0; +} + #ifdef HAVE_PERF_REGS_SUPPORT int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 679d6e493962..7544a157e159 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -15,6 +15,12 @@ struct sample_reg { extern const struct sample_reg sample_reg_masks[]; +/* + * The table sdt_reg_renamings is used for adjusting gcc/gas-generated + * registers before filling the uprobe tracer interface. + */ +int sdt_rename_register(char **pdesc, char *old_name); + #ifdef HAVE_PERF_REGS_SUPPORT #include <perf_regs.h> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 12f84dd2ac5d..362051ea7f3d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -231,7 +231,9 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *desc, char *val, char *long_desc, char *topic, - char *unit, char *perpkg) + char *unit, char *perpkg, + char *metric_expr, + char *metric_name) { struct perf_pmu_alias *alias; int ret; @@ -265,6 +267,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_snapshot(alias, dir, name); } + alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL; + alias->metric_name = metric_name ? strdup(metric_name): NULL; alias->desc = desc ? strdup(desc) : NULL; alias->long_desc = long_desc ? strdup(long_desc) : desc ? strdup(desc) : NULL; @@ -294,7 +298,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, - NULL); + NULL, NULL, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -564,7 +568,9 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) __perf_pmu__new_alias(head, NULL, (char *)pe->name, (char *)pe->desc, (char *)pe->event, (char *)pe->long_desc, (char *)pe->topic, - (char *)pe->unit, (char *)pe->perpkg); + (char *)pe->unit, (char *)pe->perpkg, + (char *)pe->metric_expr, + (char *)pe->metric_name); } out: @@ -991,6 +997,8 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, info->unit = NULL; info->scale = 0.0; info->snapshot = false; + info->metric_expr = NULL; + info->metric_name = NULL; list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); @@ -1006,6 +1014,8 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, if (alias->per_pkg) info->per_pkg = true; + info->metric_expr = alias->metric_expr; + info->metric_name = alias->metric_name; list_del(&term->list); free(term); @@ -1100,6 +1110,8 @@ struct sevent { char *topic; char *str; char *pmu; + char *metric_expr; + char *metric_name; }; static int cmp_sevent(const void *a, const void *b) @@ -1142,7 +1154,7 @@ static void wordwrap(char *s, int start, int max, int corr) } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc) + bool long_desc, bool details_flag) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1198,6 +1210,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, aliases[j].topic = alias->topic; aliases[j].str = alias->str; aliases[j].pmu = pmu->name; + aliases[j].metric_expr = alias->metric_expr; + aliases[j].metric_name = alias->metric_name; j++; } if (pmu->selectable && @@ -1232,8 +1246,14 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, printf("%*s", 8, "["); wordwrap(aliases[j].desc, 8, columns, 0); printf("]\n"); - if (verbose > 0) - printf("%*s%s/%s/\n", 8, "", aliases[j].pmu, aliases[j].str); + if (details_flag) { + printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str); + if (aliases[j].metric_name) + printf(" MetricName: %s", aliases[j].metric_name); + if (aliases[j].metric_expr) + printf(" MetricExpr: %s", aliases[j].metric_expr); + putchar('\n'); + } } else printf(" %-50s [Kernel PMU event]\n", aliases[j].name); printed++; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 00852ddc7741..ea7f450dc609 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -31,6 +31,8 @@ struct perf_pmu { struct perf_pmu_info { const char *unit; + const char *metric_expr; + const char *metric_name; double scale; bool per_pkg; bool snapshot; @@ -50,6 +52,8 @@ struct perf_pmu_alias { double scale; bool per_pkg; bool snapshot; + char *metric_expr; + char *metric_name; }; struct perf_pmu *perf_pmu__find(const char *name); @@ -76,7 +80,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc); + bool long_desc, bool details_flag); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 28fb62c32678..e4b889444447 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -47,7 +47,6 @@ #include "probe-file.h" #include "session.h" -#define MAX_CMDLEN 256 #define PERFPROBE_GROUP "probe" bool probe_event_dry_run; /* Dry run flag */ @@ -757,7 +756,9 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, } for (i = 0; i < ntevs; i++) { - if (!tevs[i].point.address || tevs[i].point.retprobe) + if (!tevs[i].point.address) + continue; + if (tevs[i].point.retprobe && !kretprobe_offset_is_supported()) continue; /* If we found a wrong one, mark it by NULL symbol */ if (kprobe_warn_out_range(tevs[i].point.symbol, @@ -1339,14 +1340,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (!arg) return -EINVAL; - /* - * If the probe point starts with '%', - * or starts with "sdt_" and has a ':' but no '=', - * then it should be a SDT/cached probe point. - */ - if (arg[0] == '%' || - (!strncmp(arg, "sdt_", 4) && - !!strchr(arg, ':') && !strchr(arg, '='))) { + if (is_sdt_event(arg)) { pev->sdt = true; if (arg[0] == '%') arg++; @@ -1528,11 +1522,6 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) return -EINVAL; } - if (pp->retprobe && !pp->function) { - semantic_error("Return probe requires an entry function.\n"); - return -EINVAL; - } - if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) { semantic_error("Offset/Line/Lazy pattern can't be used with " "return probe.\n"); @@ -2841,7 +2830,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } /* Note that the symbols in the kmodule are not relocated */ - if (!pev->uprobes && !pp->retprobe && !pev->target) { + if (!pev->uprobes && !pev->target && + (!pp->retprobe || kretprobe_offset_is_supported())) { reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); @@ -3057,7 +3047,7 @@ concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs, struct probe_trace_event *new_tevs; int ret = 0; - if (ntevs == 0) { + if (*ntevs == 0) { *tevs = *tevs2; *ntevs = ntevs2; *tevs2 = NULL; diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 436b64731f65..d741634cbfc0 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -27,8 +27,10 @@ #include "probe-event.h" #include "probe-file.h" #include "session.h" +#include "perf_regs.h" -#define MAX_CMDLEN 256 +/* 4096 - 2 ('\n' + '\0') */ +#define MAX_CMDLEN 4094 static void print_open_warning(int err, bool uprobe) { @@ -70,7 +72,7 @@ static void print_both_open_warning(int kerr, int uerr) } } -static int open_probe_events(const char *trace_file, bool readwrite) +int open_trace_file(const char *trace_file, bool readwrite) { char buf[PATH_MAX]; int ret; @@ -92,12 +94,12 @@ static int open_probe_events(const char *trace_file, bool readwrite) static int open_kprobe_events(bool readwrite) { - return open_probe_events("kprobe_events", readwrite); + return open_trace_file("kprobe_events", readwrite); } static int open_uprobe_events(bool readwrite) { - return open_probe_events("uprobe_events", readwrite); + return open_trace_file("uprobe_events", readwrite); } int probe_file__open(int flag) @@ -687,6 +689,166 @@ static unsigned long long sdt_note__get_addr(struct sdt_note *note) : (unsigned long long)note->addr.a64[0]; } +static const char * const type_to_suffix[] = { + ":s64", "", "", "", ":s32", "", ":s16", ":s8", + "", ":u8", ":u16", "", ":u32", "", "", "", ":u64" +}; + +static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg) +{ + char *tmp, *desc = strdup(arg); + const char *prefix = "", *suffix = ""; + int ret = -1; + + if (desc == NULL) { + pr_debug4("Allocation error\n"); + return ret; + } + + tmp = strchr(desc, '@'); + if (tmp) { + long type_idx; + /* + * Isolate the string number and convert it into a + * binary value; this will be an index to get suffix + * of the uprobe name (defining the type) + */ + tmp[0] = '\0'; + type_idx = strtol(desc, NULL, 10); + /* Check that the conversion went OK */ + if (type_idx == LONG_MIN || type_idx == LONG_MAX) { + pr_debug4("Failed to parse sdt type\n"); + goto error; + } + /* Check that the converted value is OK */ + if (type_idx < -8 || type_idx > 8) { + pr_debug4("Failed to get a valid sdt type\n"); + goto error; + } + suffix = type_to_suffix[type_idx + 8]; + /* Get rid of the sdt prefix which is now useless */ + tmp++; + memmove(desc, tmp, strlen(tmp) + 1); + } + + /* + * The uprobe tracer format does not support all the + * addressing modes (notably: in x86 the scaled mode); so, we + * detect ',' characters, if there is just one, there is no + * use converting the sdt arg into a uprobe one. + */ + if (strchr(desc, ',')) { + pr_debug4("Skipping unsupported SDT argument; %s\n", desc); + goto out; + } + + /* + * If the argument addressing mode is indirect, we must check + * a few things... + */ + tmp = strchr(desc, '('); + if (tmp) { + int j; + + /* + * ...if the addressing mode is indirect with a + * positive offset (ex.: "1608(%ax)"), we need to add + * a '+' prefix so as to be compliant with uprobe + * format. + */ + if (desc[0] != '+' && desc[0] != '-') + prefix = "+"; + + /* + * ...or if the addressing mode is indirect with a symbol + * as offset, the argument will not be supported by + * the uprobe tracer format; so, let's skip this one. + */ + for (j = 0; j < tmp - desc; j++) { + if (desc[j] != '+' && desc[j] != '-' && + !isdigit(desc[j])) { + pr_debug4("Skipping unsupported SDT argument; " + "%s\n", desc); + goto out; + } + } + } + + /* + * The uprobe tracer format does not support constants; if we + * find one in the current argument, let's skip the argument. + */ + if (strchr(desc, '$')) { + pr_debug4("Skipping unsupported SDT argument; %s\n", desc); + goto out; + } + + /* + * The uprobe parser does not support all gas register names; + * so, we have to replace them (ex. for x86_64: %rax -> %ax); + * the loop below looks for the register names (starting with + * a '%' and tries to perform the needed renamings. + */ + tmp = strchr(desc, '%'); + while (tmp) { + size_t offset = tmp - desc; + + ret = sdt_rename_register(&desc, desc + offset); + if (ret < 0) + goto error; + + /* + * The desc pointer might have changed; so, let's not + * try to reuse tmp for next lookup + */ + tmp = strchr(desc + offset + 1, '%'); + } + + if (strbuf_addf(buf, " arg%d=%s%s%s", i + 1, prefix, desc, suffix) < 0) + goto error; + +out: + ret = 0; +error: + free(desc); + return ret; +} + +static char *synthesize_sdt_probe_command(struct sdt_note *note, + const char *pathname, + const char *sdtgrp) +{ + struct strbuf buf; + char *ret = NULL, **args; + int i, args_count; + + if (strbuf_init(&buf, 32) < 0) + return NULL; + + if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx", + sdtgrp, note->name, pathname, + sdt_note__get_addr(note)) < 0) + goto error; + + if (!note->args) + goto out; + + if (note->args) { + args = argv_split(note->args, &args_count); + + for (i = 0; i < args_count; ++i) { + if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) + goto error; + } + } + +out: + ret = strbuf_detach(&buf, NULL); +error: + strbuf_release(&buf); + return ret; +} + int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname) { struct probe_cache_entry *entry = NULL; @@ -723,11 +885,12 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname) entry->pev.group = strdup(sdtgrp); list_add_tail(&entry->node, &pcache->entries); } - ret = asprintf(&buf, "p:%s/%s %s:0x%llx", - sdtgrp, note->name, pathname, - sdt_note__get_addr(note)); - if (ret < 0) + buf = synthesize_sdt_probe_command(note, pathname, sdtgrp); + if (!buf) { + ret = -ENOMEM; break; + } + strlist__add(entry->tevlist, buf); free(buf); entry = NULL; @@ -877,59 +1040,72 @@ int probe_cache__show_all_caches(struct strfilter *filter) return 0; } +enum ftrace_readme { + FTRACE_README_PROBE_TYPE_X = 0, + FTRACE_README_KRETPROBE_OFFSET, + FTRACE_README_END, +}; + static struct { const char *pattern; - bool avail; - bool checked; -} probe_type_table[] = { -#define DEFINE_TYPE(idx, pat, def_avail) \ - [idx] = {.pattern = pat, .avail = (def_avail)} - DEFINE_TYPE(PROBE_TYPE_U, "* u8/16/32/64,*", true), - DEFINE_TYPE(PROBE_TYPE_S, "* s8/16/32/64,*", true), - DEFINE_TYPE(PROBE_TYPE_X, "* x8/16/32/64,*", false), - DEFINE_TYPE(PROBE_TYPE_STRING, "* string,*", true), - DEFINE_TYPE(PROBE_TYPE_BITFIELD, - "* b<bit-width>@<bit-offset>/<container-size>", true), + bool avail; +} ftrace_readme_table[] = { +#define DEFINE_TYPE(idx, pat) \ + [idx] = {.pattern = pat, .avail = false} + DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"), + DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"), }; -bool probe_type_is_available(enum probe_type type) +static bool scan_ftrace_readme(enum ftrace_readme type) { + int fd; FILE *fp; char *buf = NULL; size_t len = 0; - bool target_line = false; - bool ret = probe_type_table[type].avail; + bool ret = false; + static bool scanned = false; - if (type >= PROBE_TYPE_END) - return false; - /* We don't have to check the type which supported by default */ - if (ret || probe_type_table[type].checked) - return ret; + if (scanned) + goto result; - if (asprintf(&buf, "%s/README", tracing_path) < 0) + fd = open_trace_file("README", false); + if (fd < 0) return ret; - fp = fopen(buf, "r"); - if (!fp) - goto end; - - zfree(&buf); - while (getline(&buf, &len, fp) > 0 && !ret) { - if (!target_line) { - target_line = !!strstr(buf, " type: "); - if (!target_line) - continue; - } else if (strstr(buf, "\t ") != buf) - break; - ret = strglobmatch(buf, probe_type_table[type].pattern); + fp = fdopen(fd, "r"); + if (!fp) { + close(fd); + return ret; } - /* Cache the result */ - probe_type_table[type].checked = true; - probe_type_table[type].avail = ret; + + while (getline(&buf, &len, fp) > 0) + for (enum ftrace_readme i = 0; i < FTRACE_README_END; i++) + if (!ftrace_readme_table[i].avail) + ftrace_readme_table[i].avail = + strglobmatch(buf, ftrace_readme_table[i].pattern); + scanned = true; fclose(fp); -end: free(buf); - return ret; +result: + if (type >= FTRACE_README_END) + return false; + + return ftrace_readme_table[type].avail; +} + +bool probe_type_is_available(enum probe_type type) +{ + if (type >= PROBE_TYPE_END) + return false; + else if (type == PROBE_TYPE_X) + return scan_ftrace_readme(FTRACE_README_PROBE_TYPE_X); + + return true; +} + +bool kretprobe_offset_is_supported(void) +{ + return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET); } diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index eba44c3e9dca..dbf95a00864a 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -35,6 +35,7 @@ enum probe_type { /* probe-file.c depends on libelf */ #ifdef HAVE_LIBELF_SUPPORT +int open_trace_file(const char *trace_file, bool readwrite); int probe_file__open(int flag); int probe_file__open_both(int *kfd, int *ufd, int flag); struct strlist *probe_file__get_namelist(int fd); @@ -64,6 +65,7 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache, const char *group, const char *event); int probe_cache__show_all_caches(struct strfilter *filter); bool probe_type_is_available(enum probe_type type); +bool kretprobe_offset_is_supported(void); #else /* ! HAVE_LIBELF_SUPPORT */ static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused) { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1dd617d116b5..24259bc2c598 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,5 +1,6 @@ #include <linux/kernel.h> #include <traceevent/event-parse.h> +#include <api/fs/fs.h> #include <byteswap.h> #include <unistd.h> @@ -1239,6 +1240,8 @@ static int machines__deliver_event(struct machines *machines, return tool->mmap2(tool, event, sample, machine); case PERF_RECORD_COMM: return tool->comm(tool, event, sample, machine); + case PERF_RECORD_NAMESPACES: + return tool->namespaces(tool, event, sample, machine); case PERF_RECORD_FORK: return tool->fork(tool, event, sample, machine); case PERF_RECORD_EXIT: @@ -1258,9 +1261,12 @@ static int machines__deliver_event(struct machines *machines, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); case PERF_RECORD_AUX: - if (tool->aux == perf_event__process_aux && - (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)) - evlist->stats.total_aux_lost += 1; + if (tool->aux == perf_event__process_aux) { + if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) + evlist->stats.total_aux_lost += 1; + if (event->aux.flags & PERF_AUX_FLAG_PARTIAL) + evlist->stats.total_aux_partial += 1; + } return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: return tool->itrace_start(tool, event, sample, machine); @@ -1494,6 +1500,11 @@ int perf_session__register_idle_thread(struct perf_session *session) err = -1; } + if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) { + pr_err("problem inserting idle task.\n"); + err = -1; + } + /* machine__findnew_thread() got the thread, so put it */ thread__put(thread); return err; @@ -1548,6 +1559,23 @@ static void perf_session__warn_about_errors(const struct perf_session *session) stats->nr_events[PERF_RECORD_AUX]); } + if (session->tool->aux == perf_event__process_aux && + stats->total_aux_partial != 0) { + bool vmm_exclusive = false; + + (void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive", + &vmm_exclusive); + + ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n" + "Are you running a KVM guest in the background?%s\n\n", + stats->total_aux_partial, + stats->nr_events[PERF_RECORD_AUX], + vmm_exclusive ? + "\nReloading kvm_intel module with vmm_exclusive=0\n" + "will reduce the gaps to only guest's timeslices." : + ""); + } + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0ff622288d24..8b0d4e39f640 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -536,6 +536,46 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +/* --sort cgroup_id */ + +static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev) +{ + return (int64_t)(right_dev - left_dev); +} + +static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino) +{ + return (int64_t)(right_ino - left_ino); +} + +static int64_t +sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev); + if (ret != 0) + return ret; + + return _sort__cgroup_inode_cmp(right->cgroup_id.ino, + left->cgroup_id.ino); +} + +static int hist_entry__cgroup_id_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width __maybe_unused) +{ + return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev, + he->cgroup_id.ino); +} + +struct sort_entry sort_cgroup_id = { + .se_header = "cgroup id (dev/inode)", + .se_cmp = sort__cgroup_id_cmp, + .se_snprintf = hist_entry__cgroup_id_snprintf, + .se_width_idx = HISTC_CGROUP_ID, +}; + /* --sort socket */ static int64_t @@ -846,6 +886,9 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf, static int64_t sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) { + if (!left->branch_info || !right->branch_info) + return cmp_null(left->branch_info, right->branch_info); + return left->branch_info->flags.cycles - right->branch_info->flags.cycles; } @@ -853,6 +896,8 @@ sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { + if (!he->branch_info) + return scnprintf(bf, size, "%-.*s", width, "N/A"); if (he->branch_info->flags.cycles == 0) return repsep_snprintf(bf, size, "%-*s", width, "-"); return repsep_snprintf(bf, size, "%-*hd", width, @@ -1396,6 +1441,46 @@ struct sort_entry sort_transaction = { .se_width_idx = HISTC_TRANSACTION, }; +/* --sort symbol_size */ + +static int64_t _sort__sym_size_cmp(struct symbol *sym_l, struct symbol *sym_r) +{ + int64_t size_l = sym_l != NULL ? symbol__size(sym_l) : 0; + int64_t size_r = sym_r != NULL ? symbol__size(sym_r) : 0; + + return size_l < size_r ? -1 : + size_l == size_r ? 0 : 1; +} + +static int64_t +sort__sym_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__sym_size_cmp(right->ms.sym, left->ms.sym); +} + +static int _hist_entry__sym_size_snprintf(struct symbol *sym, char *bf, + size_t bf_size, unsigned int width) +{ + if (sym) + return repsep_snprintf(bf, bf_size, "%*d", width, symbol__size(sym)); + + return repsep_snprintf(bf, bf_size, "%*s", width, "unknown"); +} + +static int hist_entry__sym_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__sym_size_snprintf(he->ms.sym, bf, size, width); +} + +struct sort_entry sort_sym_size = { + .se_header = "Symbol size", + .se_cmp = sort__sym_size_cmp, + .se_snprintf = hist_entry__sym_size_snprintf, + .se_width_idx = HISTC_SYM_SIZE, +}; + + struct sort_dimension { const char *name; struct sort_entry *entry; @@ -1418,6 +1503,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), DIM(SORT_TRANSACTION, "transaction", sort_transaction), DIM(SORT_TRACE, "trace", sort_trace), + DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), + DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 796c847e2f00..baf20a399f34 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -54,6 +54,11 @@ struct he_stat { u32 nr_events; }; +struct namespace_id { + u64 dev; + u64 ino; +}; + struct hist_entry_diff { bool computed; union { @@ -91,6 +96,7 @@ struct hist_entry { struct map_symbol ms; struct thread *thread; struct comm *comm; + struct namespace_id cgroup_id; u64 ip; u64 transaction; s32 socket; @@ -211,6 +217,8 @@ enum sort_type { SORT_GLOBAL_WEIGHT, SORT_TRANSACTION, SORT_TRACE, + SORT_SYM_SIZE, + SORT_CGROUP_ID, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 8a2bbd2a4d82..ac10cc675d39 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -3,6 +3,9 @@ #include "stat.h" #include "color.h" #include "pmu.h" +#include "rblist.h" +#include "evlist.h" +#include "expr.h" enum { CTX_BIT_USER = 1 << 0, @@ -41,13 +44,73 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; +static struct rblist runtime_saved_values; static bool have_frontend_stalled; struct stats walltime_nsecs_stats; +struct saved_value { + struct rb_node rb_node; + struct perf_evsel *evsel; + int cpu; + int ctx; + struct stats stats; +}; + +static int saved_value_cmp(struct rb_node *rb_node, const void *entry) +{ + struct saved_value *a = container_of(rb_node, + struct saved_value, + rb_node); + const struct saved_value *b = entry; + + if (a->ctx != b->ctx) + return a->ctx - b->ctx; + if (a->cpu != b->cpu) + return a->cpu - b->cpu; + return a->evsel - b->evsel; +} + +static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, + const void *entry) +{ + struct saved_value *nd = malloc(sizeof(struct saved_value)); + + if (!nd) + return NULL; + memcpy(nd, entry, sizeof(struct saved_value)); + return &nd->rb_node; +} + +static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, + int cpu, int ctx, + bool create) +{ + struct rb_node *nd; + struct saved_value dm = { + .cpu = cpu, + .ctx = ctx, + .evsel = evsel, + }; + nd = rblist__find(&runtime_saved_values, &dm); + if (nd) + return container_of(nd, struct saved_value, rb_node); + if (create) { + rblist__add_node(&runtime_saved_values, &dm); + nd = rblist__find(&runtime_saved_values, &dm); + if (nd) + return container_of(nd, struct saved_value, rb_node); + } + return NULL; +} + void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); + rblist__init(&runtime_saved_values); + runtime_saved_values.node_cmp = saved_value_cmp; + runtime_saved_values.node_new = saved_value_new; + /* No delete for now */ } static int evsel_context(struct perf_evsel *evsel) @@ -70,6 +133,8 @@ static int evsel_context(struct perf_evsel *evsel) void perf_stat__reset_shadow_stats(void) { + struct rb_node *pos, *next; + memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); @@ -92,6 +157,15 @@ void perf_stat__reset_shadow_stats(void) memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); + + next = rb_first(&runtime_saved_values.entries); + while (next) { + pos = next; + next = rb_next(pos); + memset(&container_of(pos, struct saved_value, rb_node)->stats, + 0, + sizeof(struct stats)); + } } /* @@ -143,6 +217,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); + + if (counter->collect_stat) { + struct saved_value *v = saved_value_lookup(counter, cpu, ctx, + true); + update_stats(&v->stats, count[0]); + } } /* used for get_ratio_color() */ @@ -172,6 +252,95 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } +static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, + const char *name) +{ + struct perf_evsel *c2; + + evlist__for_each_entry (evsel_list, c2) { + if (!strcasecmp(c2->name, name)) + return c2; + } + return NULL; +} + +/* Mark MetricExpr target events and link events using them to them. */ +void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) +{ + struct perf_evsel *counter, *leader, **metric_events, *oc; + bool found; + const char **metric_names; + int i; + int num_metric_names; + + evlist__for_each_entry(evsel_list, counter) { + bool invalid = false; + + leader = counter->leader; + if (!counter->metric_expr) + continue; + metric_events = counter->metric_events; + if (!metric_events) { + if (expr__find_other(counter->metric_expr, counter->name, + &metric_names, &num_metric_names) < 0) + continue; + + metric_events = calloc(sizeof(struct perf_evsel *), + num_metric_names + 1); + if (!metric_events) + return; + counter->metric_events = metric_events; + } + + for (i = 0; i < num_metric_names; i++) { + found = false; + if (leader) { + /* Search in group */ + for_each_group_member (oc, leader) { + if (!strcasecmp(oc->name, metric_names[i])) { + found = true; + break; + } + } + } + if (!found) { + /* Search ignoring groups */ + oc = perf_stat__find_event(evsel_list, metric_names[i]); + } + if (!oc) { + /* Deduping one is good enough to handle duplicated PMUs. */ + static char *printed; + + /* + * Adding events automatically would be difficult, because + * it would risk creating groups that are not schedulable. + * perf stat doesn't understand all the scheduling constraints + * of events. So we ask the user instead to add the missing + * events. + */ + if (!printed || strcasecmp(printed, metric_names[i])) { + fprintf(stderr, + "Add %s event to groups to get metric expression for %s\n", + metric_names[i], + counter->name); + printed = strdup(metric_names[i]); + } + invalid = true; + continue; + } + metric_events[i] = oc; + oc->collect_stat = true; + } + metric_events[i] = NULL; + free(metric_names); + if (invalid) { + free(metric_events); + counter->metric_events = NULL; + counter->metric_expr = NULL; + } + } +} + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) @@ -614,6 +783,34 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, be_bound * 100.); else print_metric(ctxp, NULL, NULL, name, 0); + } else if (evsel->metric_expr) { + struct parse_ctx pctx; + int i; + + expr__ctx_init(&pctx); + expr__add_id(&pctx, evsel->name, avg); + for (i = 0; evsel->metric_events[i]; i++) { + struct saved_value *v; + + v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false); + if (!v) + break; + expr__add_id(&pctx, evsel->metric_events[i]->name, + avg_stats(&v->stats)); + } + if (!evsel->metric_events[i]) { + const char *p = evsel->metric_expr; + + if (expr__parse(&ratio, &pctx, &p) == 0) + print_metric(ctxp, NULL, "%8.1f", + evsel->metric_name ? + evsel->metric_name : + out->force_header ? evsel->name : "", + ratio); + else + print_metric(ctxp, NULL, NULL, "", 0); + } else + print_metric(ctxp, NULL, NULL, "", 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; char unit_buf[10]; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index c29bb94c48a4..0a65ae23f495 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -85,11 +85,13 @@ struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; new_line_t new_line; + bool force_header; }; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out); +void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 4e59ddeb4eda..0e660dba58ad 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1828,7 +1828,7 @@ void kcore_extract__delete(struct kcore_extract *kce) static int populate_sdt_note(Elf **elf, const char *data, size_t len, struct list_head *sdt_notes) { - const char *provider, *name; + const char *provider, *name, *args; struct sdt_note *tmp = NULL; GElf_Ehdr ehdr; GElf_Addr base_off = 0; @@ -1887,6 +1887,25 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, goto out_free_prov; } + args = memchr(name, '\0', data + len - name); + + /* + * There is no argument if: + * - We reached the end of the note; + * - There is not enough room to hold a potential string; + * - The argument string is empty or just contains ':'. + */ + if (args == NULL || data + len - args < 2 || + args[1] == ':' || args[1] == '\0') + tmp->args = NULL; + else { + tmp->args = strdup(++args); + if (!tmp->args) { + ret = -ENOMEM; + goto out_free_name; + } + } + if (gelf_getclass(*elf) == ELFCLASS32) { memcpy(&tmp->addr, &buf, 3 * sizeof(Elf32_Addr)); tmp->bit32 = true; @@ -1898,7 +1917,7 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, if (!gelf_getehdr(*elf, &ehdr)) { pr_debug("%s : cannot get elf header.\n", __func__); ret = -EBADF; - goto out_free_name; + goto out_free_args; } /* Adjust the prelink effect : @@ -1923,6 +1942,8 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, list_add_tail(&tmp->note_list, sdt_notes); return 0; +out_free_args: + free(tmp->args); out_free_name: free(tmp->name); out_free_prov: diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 6c358b7ed336..9222c7e702f3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -351,6 +351,7 @@ int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb); struct sdt_note { char *name; /* name of the note*/ char *provider; /* provider name */ + char *args; bool bit32; /* whether the location is 32 bits? */ union { /* location, base and semaphore addrs */ Elf64_Addr a64[3]; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index f5af87f66663..dcdb87a5d0a1 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -7,6 +7,7 @@ #include "thread-stack.h" #include "util.h" #include "debug.h" +#include "namespaces.h" #include "comm.h" #include "unwind.h" @@ -40,6 +41,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->tid = tid; thread->ppid = -1; thread->cpu = -1; + INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->comm_list); comm_str = malloc(32); @@ -53,7 +55,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) goto err_thread; list_add(&comm->list, &thread->comm_list); - atomic_set(&thread->refcnt, 1); + refcount_set(&thread->refcnt, 1); RB_CLEAR_NODE(&thread->rb_node); } @@ -66,7 +68,8 @@ err_thread: void thread__delete(struct thread *thread) { - struct comm *comm, *tmp; + struct namespaces *namespaces, *tmp_namespaces; + struct comm *comm, *tmp_comm; BUG_ON(!RB_EMPTY_NODE(&thread->rb_node)); @@ -76,7 +79,12 @@ void thread__delete(struct thread *thread) map_groups__put(thread->mg); thread->mg = NULL; } - list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) { + list_for_each_entry_safe(namespaces, tmp_namespaces, + &thread->namespaces_list, list) { + list_del(&namespaces->list); + namespaces__free(namespaces); + } + list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) { list_del(&comm->list); comm__free(comm); } @@ -88,13 +96,13 @@ void thread__delete(struct thread *thread) struct thread *thread__get(struct thread *thread) { if (thread) - atomic_inc(&thread->refcnt); + refcount_inc(&thread->refcnt); return thread; } void thread__put(struct thread *thread) { - if (thread && atomic_dec_and_test(&thread->refcnt)) { + if (thread && refcount_dec_and_test(&thread->refcnt)) { /* * Remove it from the dead_threads list, as last reference * is gone. @@ -104,6 +112,38 @@ void thread__put(struct thread *thread) } } +struct namespaces *thread__namespaces(const struct thread *thread) +{ + if (list_empty(&thread->namespaces_list)) + return NULL; + + return list_first_entry(&thread->namespaces_list, struct namespaces, list); +} + +int thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event) +{ + struct namespaces *new, *curr = thread__namespaces(thread); + + new = namespaces__new(event); + if (!new) + return -ENOMEM; + + list_add(&new->list, &thread->namespaces_list); + + if (timestamp && curr) { + /* + * setns syscall must have changed few or all the namespaces + * of this thread. Update end time for the namespaces + * previously used. + */ + curr = list_next_entry(new, list); + curr->end_time = timestamp; + } + + return 0; +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 99263cb6e6b6..4eb849e9098f 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -1,7 +1,7 @@ #ifndef __PERF_THREAD_H #define __PERF_THREAD_H -#include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/rbtree.h> #include <linux/list.h> #include <unistd.h> @@ -23,11 +23,12 @@ struct thread { pid_t tid; pid_t ppid; int cpu; - atomic_t refcnt; + refcount_t refcnt; char shortname[3]; bool comm_set; int comm_len; bool dead; /* if set thread has exited */ + struct list_head namespaces_list; struct list_head comm_list; u64 db_id; @@ -40,6 +41,7 @@ struct thread { }; struct machine; +struct namespaces; struct comm; struct thread *thread__new(pid_t pid, pid_t tid); @@ -62,6 +64,10 @@ static inline void thread__exited(struct thread *thread) thread->dead = true; } +struct namespaces *thread__namespaces(const struct thread *thread); +int thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event); + int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp, bool exec); static inline int thread__set_comm(struct thread *thread, const char *comm, diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 7c3fcc538a70..9026408ea55b 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -66,7 +66,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) for (i = 0; i < items; i++) thread_map__set_pid(threads, i, atoi(namelist[i]->d_name)); threads->nr = items; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } for (i=0; i<items; i++) @@ -83,7 +83,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) if (threads != NULL) { thread_map__set_pid(threads, 0, tid); threads->nr = 1; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } return threads; @@ -105,7 +105,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) goto out_free_threads; threads->nr = 0; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); while ((dirent = readdir(proc)) != NULL) { char *end; @@ -235,7 +235,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) out: strlist__delete(slist); if (threads) - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); return threads; out_free_namelist: @@ -255,7 +255,7 @@ struct thread_map *thread_map__new_dummy(void) if (threads != NULL) { thread_map__set_pid(threads, 0, -1); threads->nr = 1; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } return threads; } @@ -300,7 +300,7 @@ struct thread_map *thread_map__new_by_tid_str(const char *tid_str) } out: if (threads) - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); return threads; out_free_threads: @@ -326,7 +326,7 @@ static void thread_map__delete(struct thread_map *threads) if (threads) { int i; - WARN_ONCE(atomic_read(&threads->refcnt) != 0, + WARN_ONCE(refcount_read(&threads->refcnt) != 0, "thread map refcnt unbalanced\n"); for (i = 0; i < threads->nr; i++) free(thread_map__comm(threads, i)); @@ -337,13 +337,13 @@ static void thread_map__delete(struct thread_map *threads) struct thread_map *thread_map__get(struct thread_map *map) { if (map) - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); return map; } void thread_map__put(struct thread_map *map) { - if (map && atomic_dec_and_test(&map->refcnt)) + if (map && refcount_dec_and_test(&map->refcnt)) thread_map__delete(map); } @@ -423,7 +423,7 @@ static void thread_map__copy_event(struct thread_map *threads, threads->map[i].comm = strndup(event->entries[i].comm, 16); } - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } struct thread_map *thread_map__new_event(struct thread_map_event *event) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index ea0ef08c6303..bd34d7a0b9fa 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -3,7 +3,7 @@ #include <sys/types.h> #include <stdio.h> -#include <linux/atomic.h> +#include <linux/refcount.h> struct thread_map_data { pid_t pid; @@ -11,7 +11,7 @@ struct thread_map_data { }; struct thread_map { - atomic_t refcnt; + refcount_t refcnt; int nr; struct thread_map_data map[]; }; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index ac2590a3de2d..829471a1c6d7 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -40,6 +40,7 @@ struct perf_tool { event_op mmap, mmap2, comm, + namespaces, fork, exit, lost, @@ -66,6 +67,7 @@ struct perf_tool { event_op3 auxtrace; bool ordered_events; bool ordering_requires_timestamps; + bool namespace_events; }; #endif /* __PERF_TOOL_H */ diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c74708da8571..b2cfa47990dc 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -355,8 +355,8 @@ void print_binary(unsigned char *data, size_t len, size_t bytes_per_line, print_binary_t printer, void *extra); -#if !defined(__GLIBC__) && !defined(__ANDROID__) -extern int sched_getcpu(void); +#ifndef HAVE_SCHED_GETCPU_SUPPORT +int sched_getcpu(void); #endif int is_printable_array(char *p, unsigned int len); diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 03cb639b292e..fedca3285326 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -16,9 +16,9 @@ idle power-state statistics, temperature and power on X86 processors. There are two ways to invoke turbostat. The first method is to supply a \fBcommand\fP, which is forked and statistics are printed -upon its completion. +in one-shot upon its completion. The second method is to omit the command, -and turbostat displays statistics every 5 seconds. +and turbostat displays statistics every 5 seconds interval. The 5-second interval can be changed using the --interval option. .PP Some information is not available on older processors. @@ -28,9 +28,10 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional. .nf - location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP} + location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP} msrDDD is a decimal offset, eg. msr16 msr0xXXX is a hex offset, eg. msr0x10 + /sys/path... is an absolute path to a sysfs attribute scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP} sample and print the counter for every cpu, core, or package. @@ -45,12 +46,21 @@ name as necessary to disambiguate it from others is necessary. Note that option 'delta' shows the difference in values during the measurement interval. 'percent' shows the delta as a percentage of the cycles elapsed. default: delta + + name: "name_string" + Any string that does not match a key-word above is used + as the column header. .fi .PP +\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 +.PP +\fB--hide column\fP do not show the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. +.PP +\fB--show column\fP show only the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. +.PP \fB--Dump\fP displays the raw counter values. .PP -\fB--debug\fP displays additional system configuration information. Invoking this parameter -more than once may also enable internal turbostat debug information. +\fB--quiet\fP Do not decode and print the system configuration header information. .PP \fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP @@ -61,9 +71,7 @@ The file is truncated if it already exists, and it is created if it does not exi .PP \fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts. .PP -\fB--Package\fP limits output to the system summary plus the 1st thread in each Package. -.PP -\fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package. Ie. it skips hyper-threaded siblings. +\fB--list\fP display column header names available for use by --show and --hide, then exit. .PP \fB--Summary\fP limits output to a 1-line System Summary for each interval. .PP @@ -74,24 +82,25 @@ The file is truncated if it already exists, and it is created if it does not exi The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit, displays the statistics gathered since it was forked. .PP -.SH DEFAULT FIELD DESCRIPTIONS +.SH ROW DESCRIPTIONS +The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system. +.SH COLUMN DESCRIPTIONS .nf +\fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. -\fBAVG_MHz\fP number of cycles executed divided by time elapsed. -\fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. -\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). +\fBPackage\fP processor package number -- not present on systems with a single processor package. +\fBAvg_MHz\fP number of cycles executed divided by time elapsed. Note that this includes idle-time when 0 instructions are executed. +\fBBusy%\fP percent of the measurement interval that the CPU executes instructions, aka. % of time in "C0" state. +\fBBzy_MHz\fP average clock rate while the CPU was not idle (ie. in "c0" state). \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. -.fi -.PP -.SH DEBUG FIELD DESCRIPTIONS -.nf -\fBPackage\fP processor package number. -\fBCore\fP processor core number. -Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). -\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. +\fBIRQ\fP The number of interrupts serviced by that CPU during the measurement interval. The system total line is the sum of interrupts serviced across all CPUs. turbostat parses /proc/interrupts to generate this summary. +\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. +\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. +\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. -\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. +\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. \fBPkgWatt\fP Watts consumed by the whole package. \fBCorWatt\fP Watts consumed by the core part of the package. \fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors. @@ -99,51 +108,110 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. .fi +.SH TOO MUCH INFORMATION EXAMPLE +By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters. +This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug. .PP -.SH PERIODIC EXAMPLE -Without any parameters, turbostat displays statistics ever 5 seconds. -Periodic output goes to stdout, by default, unless --out is used to specify an output file. -The 5-second interval can be changed with th "-i sec" option. -Or a command may be specified as in "FORK EXAMPLE" below. +When you are not interested in all that information, and there are several ways to see only what you want. First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns. Second, you can reduce the columns with the "--hide" and "--show" options. If you use the "--show" option, then turbostat will show only the columns you list. If you use the "--hide" option, turbostat will show all columns, except the ones you list. +.PP +To find out what columns are available for --show and --hide, the "--list" option is available. For convenience, the special strings "sysfs" can be used to refer to all of the sysfs C-state counters at once: +.nf +sudo ./turbostat --show sysfs --quiet sleep 10 +10.003837 sec + C1 C1E C3 C6 C7s C1% C1E% C3% C6% C7s% + 4 21 2 2 459 0.14 0.82 0.00 0.00 98.93 + 1 17 2 2 130 0.00 0.02 0.00 0.00 99.80 + 0 0 0 0 31 0.00 0.00 0.00 0.00 99.95 + 2 1 0 0 52 1.14 6.49 0.00 0.00 92.21 + 1 2 0 0 52 0.00 0.08 0.00 0.00 99.86 + 0 0 0 0 71 0.00 0.00 0.00 0.00 99.89 + 0 0 0 0 25 0.00 0.00 0.00 0.00 99.96 + 0 0 0 0 74 0.00 0.00 0.00 0.00 99.94 + 0 1 0 0 24 0.00 0.00 0.00 0.00 99.84 +.fi +.PP +.SH ONE SHOT COMMAND EXAMPLE +If turbostat is invoked with a command, it will fork that command +and output the statistics gathered after the command exits. +In this case, turbostat output goes to stderr, by default. +Output can instead be saved to a file using the --out option. +In this example, the "sleep 10" command is forked, and turbostat waits for it to complete before saving all statistics into "ts.out". Note that "sleep 10" is not part of turbostat, but is simply an example of a command that turbostat can fork. The "ts.out" file is what you want to edit in a very wide window, paste into a spreadsheet, or attach to a bugzilla entry. + .nf -[root@hsw]# ./turbostat - CPU Avg_MHz Busy% Bzy_MHz TSC_MHz - - 488 12.51 3898 3498 - 0 0 0.01 3885 3498 - 4 3897 99.99 3898 3498 - 1 0 0.00 3861 3498 - 5 0 0.00 3882 3498 - 2 1 0.02 3894 3498 - 6 2 0.06 3898 3498 - 3 0 0.00 3849 3498 - 7 0 0.00 3877 3498 +[root@hsw]# ./turbostat -o ts.out sleep 10 +[root@hsw]# +.fi +.SH PERIODIC INTERVAL EXAMPLE +Without a command to fork, turbostat displays statistics ever 5 seconds. +Periodic output goes to stdout, by default, unless --out is used to specify an output file. +The 5-second interval can be changed with the "-i sec" option. +.nf +sudo ./turbostat --quiet --hide sysfs,IRQ,SMI,CoreTmp,PkgTmp,GFX%rc6,GFXMHz,PkgWatt,CorWatt,GFXWatt + Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7 + - - 488 12.52 3900 3498 12.50 0.00 0.00 74.98 + 0 0 5 0.13 3900 3498 99.87 0.00 0.00 0.00 + 0 4 3897 99.99 3900 3498 0.01 + 1 1 0 0.00 3856 3498 0.01 0.00 0.00 99.98 + 1 5 0 0.00 3861 3498 0.01 + 2 2 1 0.02 3889 3498 0.03 0.00 0.00 99.95 + 2 6 0 0.00 3863 3498 0.05 + 3 3 0 0.01 3869 3498 0.02 0.00 0.00 99.97 + 3 7 0 0.00 3878 3498 0.03 + Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7 + - - 491 12.59 3900 3498 12.42 0.00 0.00 74.99 + 0 0 27 0.69 3900 3498 99.31 0.00 0.00 0.00 + 0 4 3898 99.99 3900 3498 0.01 + 1 1 0 0.00 3883 3498 0.01 0.00 0.00 99.99 + 1 5 0 0.00 3898 3498 0.01 + 2 2 0 0.01 3889 3498 0.02 0.00 0.00 99.98 + 2 6 0 0.00 3889 3498 0.02 + 3 3 0 0.00 3856 3498 0.01 0.00 0.00 99.99 + 3 7 0 0.00 3897 3498 0.01 .fi -.SH DEBUG EXAMPLE -The "--debug" option prints additional system information before measurements: +This example also shows the use of the --hide option to skip columns that are not wanted. +Note that cpu4 in this example is 99.99% busy, while the other CPUs are all under 1% busy. +Notice that cpu4's HT sibling is cpu0, which is under 1% busy, but can get into CPU%c1 only, +because its cpu4's activity on shared hardware keeps it from entering a deeper C-state. -The first row of statistics is a summary for the entire system. -For residency % columns, the summary is a weighted average. -For Temperature columns, the summary is the column maximum. -For Watts columns, the summary is a system total. -Subsequent rows show per-CPU statistics. +.SH SYSTEM CONFIGURATION INFORMATION EXAMPLE + +By default, turbostat always dumps system configuration information +before taking measurements. In the example above, "--quiet" is used +to suppress that output. Here is an example of the configuration information: .nf -turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org> +turbostat version 2017.02.15 - Len Brown <lenb@kernel.org> CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3) -CPUID(6): APERF, DTS, PTM, EPB +CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM TM +CPUID(6): APERF, TURBO, DTS, PTM, No-HWP, No-HWPnotify, No-HWPwindow, No-HWPepp, No-HWPpkg, EPB +cpu4: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST No-MWAIT PREFETCH TURBO) +CPUID(7): No-SGX +cpu4: MSR_MISC_PWR_MGMT: 0x00400000 (ENable-EIST_Coordination DISable-EPB DISable-OOB) RAPL: 3121 sec. Joule Counter Range, at 84 Watts -cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300 -8 * 100 = 800 MHz max efficiency -35 * 100 = 3500 MHz TSC frequency -cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled) -cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0) -cpu0: MSR_TURBO_RATIO_LIMIT: 0x25262727 -37 * 100 = 3700 MHz max turbo 4 active cores -38 * 100 = 3800 MHz max turbo 3 active cores -39 * 100 = 3900 MHz max turbo 2 active cores -39 * 100 = 3900 MHz max turbo 1 active cores +cpu4: MSR_PLATFORM_INFO: 0x80838f3012300 +8 * 100.0 = 800.0 MHz max efficiency frequency +35 * 100.0 = 3500.0 MHz base frequency +cpu4: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled) +cpu4: MSR_TURBO_RATIO_LIMIT: 0x25262727 +37 * 100.0 = 3700.0 MHz max turbo 4 active cores +38 * 100.0 = 3800.0 MHz max turbo 3 active cores +39 * 100.0 = 3900.0 MHz max turbo 2 active cores +39 * 100.0 = 3900.0 MHz max turbo 1 active cores +cpu4: MSR_CONFIG_TDP_NOMINAL: 0x00000023 (base_ratio=35) +cpu4: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 () +cpu4: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 () +cpu4: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1) +cpu4: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0) +cpu4: MSR_PKG_CST_CONFIG_CONTROL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0) +cpu4: POLL: CPUIDLE CORE POLL IDLE +cpu4: C1: MWAIT 0x00 +cpu4: C1E: MWAIT 0x01 +cpu4: C3: MWAIT 0x10 +cpu4: C6: MWAIT 0x20 +cpu4: C7s: MWAIT 0x32 +cpu4: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch) cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) -cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, ) +cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Transitions, MultiCoreTurbo, Amps, Auto-HWP, ) cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: ) cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, ) cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.) @@ -158,23 +226,14 @@ cpu0: MSR_PP1_POLICY: 0 cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C) -cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C) -cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1) -cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) -cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) -cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) - Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt - - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 - 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 - 0 4 3897 99.98 3898 3498 0 0.02 - 1 1 7 0.17 3887 3498 0 0.04 0.00 0.00 99.79 32 - 1 5 0 0.00 3885 3498 0 0.21 - 2 2 29 0.76 3895 3498 0 0.10 0.01 0.01 99.13 32 - 2 6 2 0.06 3896 3498 0 0.80 - 3 3 1 0.02 3832 3498 0 0.03 0.00 0.00 99.95 28 - 3 7 0 0.00 3879 3498 0 0.04 -^C - +cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884c0800 (24 C) +cpu0: MSR_IA32_THERM_STATUS: 0x884c0000 (24 C +/- 1) +cpu1: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1) +cpu2: MSR_IA32_THERM_STATUS: 0x884e0000 (22 C +/- 1) +cpu3: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1) +cpu4: MSR_PKGC3_IRTL: 0x00008842 (valid, 67584 ns) +cpu4: MSR_PKGC6_IRTL: 0x00008873 (valid, 117760 ns) +cpu4: MSR_PKGC7_IRTL: 0x00008891 (valid, 148480 ns) .fi The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency available at the minimum package voltage. The \fBTSC frequency\fP is the base @@ -184,42 +243,22 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling. The remaining rows show what maximum turbo frequency is possible depending on the number of idle cores. Note that not all information is available on all processors. -.PP -The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds. -See the field definitions above. -.SH FORK EXAMPLE -If turbostat is invoked with a command, it will fork that command -and output the statistics gathered after the command exits. -In this case, turbostat output goes to stderr, by default. -Output can instead be saved to a file using the --out option. -eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds -until ^C while the other CPUs are mostly idle: - +.SH ADD COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. +We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL), +labeling it with the column header, "PRF_CTRL", and display it only once, +afte the conclusion of a 0.1 second sleep. .nf -root@hsw: turbostat cat /dev/zero > /dev/null -^C - CPU Avg_MHz Busy% Bzy_MHz TSC_MHz - - 482 12.51 3854 3498 - 0 0 0.01 1960 3498 - 4 0 0.00 2128 3498 - 1 0 0.00 3003 3498 - 5 3854 99.98 3855 3498 - 2 0 0.01 3504 3498 - 6 3 0.08 3884 3498 - 3 0 0.00 2553 3498 - 7 0 0.00 2126 3498 -10.783983 sec +sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1 +0.101604 sec +CPU PRF_CTRL +- 0x00000000 +0 0x00000c00 +1 0x00000800 +2 0x00000a00 +3 0x00000800 .fi -Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. -The first row shows the average MHz and Busy% across all the processors in the system. - -Note that the Avg_MHz column reflects the total number of cycles executed -divided by the measurement interval. If the Busy% column is 100%, -then the processor was running at that speed the entire interval. -The Avg_MHz multiplied by the Busy% results in the Bzy_MHz -- -which is the average frequency while the processor was executing -- -not including any non-busy idle time. .SH NOTES diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f13f61b065c6..828dccd3f01e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -49,17 +49,14 @@ FILE *outf; int *fd_percpu; struct timespec interval_ts = {5, 0}; unsigned int debug; +unsigned int quiet; +unsigned int sums_need_wide_columns; unsigned int rapl_joules; unsigned int summary_only; +unsigned int list_header_only; unsigned int dump_only; -unsigned int do_nhm_cstates; unsigned int do_snb_cstates; unsigned int do_knl_cstates; -unsigned int do_pc2; -unsigned int do_pc3; -unsigned int do_pc6; -unsigned int do_pc7; -unsigned int do_c8_c9_c10; unsigned int do_skl_residency; unsigned int do_slm_cstates; unsigned int use_c1_residency_msr; @@ -71,25 +68,19 @@ unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; unsigned int do_nhm_platform_info; +unsigned int no_MSR_MISC_PWR_MGMT; unsigned int aperf_mperf_multiplier = 1; -int do_irq = 1; -int do_smi; double bclk; double base_hz; unsigned int has_base_hz; double tsc_tweak = 1.0; -unsigned int show_pkg; -unsigned int show_core; -unsigned int show_cpu; unsigned int show_pkg_only; unsigned int show_core_only; char *output_buffer, *outp; unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; -unsigned int do_gfx_rc6_ms; unsigned long long gfx_cur_rc6_ms; -unsigned int do_gfx_mhz; unsigned int gfx_cur_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; @@ -109,6 +100,7 @@ unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ +unsigned int has_misc_feature_control; #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -148,34 +140,38 @@ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters */ #define NAME_BYTES 20 +#define PATH_BYTES 128 int backwards_count; char *progname; -cpu_set_t *cpu_present_set, *cpu_affinity_set; -size_t cpu_present_setsize, cpu_affinity_setsize; +#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ +cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; +size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; +#define MAX_ADDED_COUNTERS 16 struct thread_data { unsigned long long tsc; unsigned long long aperf; unsigned long long mperf; unsigned long long c1; - unsigned int irq_count; + unsigned long long irq_count; unsigned int smi_count; unsigned int cpu_id; unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 - unsigned long long counter[1]; + unsigned long long counter[MAX_ADDED_COUNTERS]; } *thread_even, *thread_odd; struct core_data { unsigned long long c3; unsigned long long c6; unsigned long long c7; + unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ unsigned int core_temp_c; unsigned int core_id; - unsigned long long counter[1]; + unsigned long long counter[MAX_ADDED_COUNTERS]; } *core_even, *core_odd; struct pkg_data { @@ -200,7 +196,7 @@ struct pkg_data { unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; - unsigned long long counter[1]; + unsigned long long counter[MAX_ADDED_COUNTERS]; } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -215,22 +211,27 @@ struct pkg_data { #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; -enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS}; +enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC}; enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT}; struct msr_counter { unsigned int msr_num; char name[NAME_BYTES]; + char path[PATH_BYTES]; unsigned int width; enum counter_type type; enum counter_format format; struct msr_counter *next; + unsigned int flags; +#define FLAGS_HIDE (1 << 0) +#define FLAGS_SHOW (1 << 1) +#define SYSFS_PERCPU (1 << 1) }; struct sys_counters { - unsigned int thread_counter_bytes; - unsigned int core_counter_bytes; - unsigned int package_counter_bytes; + unsigned int added_thread_counters; + unsigned int added_core_counters; + unsigned int added_package_counters; struct msr_counter *tp; struct msr_counter *cp; struct msr_counter *pp; @@ -334,147 +335,333 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); if (retval != sizeof *msr) - err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset); + err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset); return 0; } /* - * Example Format w/ field column widths: - * - * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 ThreadC CoreTmp CoreCnt PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt PkgCnt - * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 + * Each string in this array is compared in --show and --hide cmdline. + * Thus, strings that are proper sub-sets must follow their more specific peers. + */ +struct msr_counter bic[] = { + { 0x0, "Package" }, + { 0x0, "Avg_MHz" }, + { 0x0, "Bzy_MHz" }, + { 0x0, "TSC_MHz" }, + { 0x0, "IRQ" }, + { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL}, + { 0x0, "Busy%" }, + { 0x0, "CPU%c1" }, + { 0x0, "CPU%c3" }, + { 0x0, "CPU%c6" }, + { 0x0, "CPU%c7" }, + { 0x0, "ThreadC" }, + { 0x0, "CoreTmp" }, + { 0x0, "CoreCnt" }, + { 0x0, "PkgTmp" }, + { 0x0, "GFX%rc6" }, + { 0x0, "GFXMHz" }, + { 0x0, "Pkg%pc2" }, + { 0x0, "Pkg%pc3" }, + { 0x0, "Pkg%pc6" }, + { 0x0, "Pkg%pc7" }, + { 0x0, "Pkg%pc8" }, + { 0x0, "Pkg%pc9" }, + { 0x0, "Pkg%pc10" }, + { 0x0, "PkgWatt" }, + { 0x0, "CorWatt" }, + { 0x0, "GFXWatt" }, + { 0x0, "PkgCnt" }, + { 0x0, "RAMWatt" }, + { 0x0, "PKG_%" }, + { 0x0, "RAM_%" }, + { 0x0, "Pkg_J" }, + { 0x0, "Cor_J" }, + { 0x0, "GFX_J" }, + { 0x0, "RAM_J" }, + { 0x0, "Core" }, + { 0x0, "CPU" }, + { 0x0, "Mod%c6" }, + { 0x0, "sysfs" }, +}; + +#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) +#define BIC_Package (1ULL << 0) +#define BIC_Avg_MHz (1ULL << 1) +#define BIC_Bzy_MHz (1ULL << 2) +#define BIC_TSC_MHz (1ULL << 3) +#define BIC_IRQ (1ULL << 4) +#define BIC_SMI (1ULL << 5) +#define BIC_Busy (1ULL << 6) +#define BIC_CPU_c1 (1ULL << 7) +#define BIC_CPU_c3 (1ULL << 8) +#define BIC_CPU_c6 (1ULL << 9) +#define BIC_CPU_c7 (1ULL << 10) +#define BIC_ThreadC (1ULL << 11) +#define BIC_CoreTmp (1ULL << 12) +#define BIC_CoreCnt (1ULL << 13) +#define BIC_PkgTmp (1ULL << 14) +#define BIC_GFX_rc6 (1ULL << 15) +#define BIC_GFXMHz (1ULL << 16) +#define BIC_Pkgpc2 (1ULL << 17) +#define BIC_Pkgpc3 (1ULL << 18) +#define BIC_Pkgpc6 (1ULL << 19) +#define BIC_Pkgpc7 (1ULL << 20) +#define BIC_Pkgpc8 (1ULL << 21) +#define BIC_Pkgpc9 (1ULL << 22) +#define BIC_Pkgpc10 (1ULL << 23) +#define BIC_PkgWatt (1ULL << 24) +#define BIC_CorWatt (1ULL << 25) +#define BIC_GFXWatt (1ULL << 26) +#define BIC_PkgCnt (1ULL << 27) +#define BIC_RAMWatt (1ULL << 28) +#define BIC_PKG__ (1ULL << 29) +#define BIC_RAM__ (1ULL << 30) +#define BIC_Pkg_J (1ULL << 31) +#define BIC_Cor_J (1ULL << 32) +#define BIC_GFX_J (1ULL << 33) +#define BIC_RAM_J (1ULL << 34) +#define BIC_Core (1ULL << 35) +#define BIC_CPU (1ULL << 36) +#define BIC_Mod_c6 (1ULL << 37) +#define BIC_sysfs (1ULL << 38) + +unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL; +unsigned long long bic_present = BIC_sysfs; + +#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) +#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) +#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) + +#define MAX_DEFERRED 16 +char *deferred_skip_names[MAX_DEFERRED]; +int deferred_skip_index; + +/* + * HIDE_LIST - hide this list of counters, show the rest [default] + * SHOW_LIST - show this list of counters, hide the rest */ +enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; -void print_header(void) +void help(void) { - struct msr_counter *mp; + fprintf(outf, + "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" + "\n" + "Turbostat forks the specified COMMAND and prints statistics\n" + "when COMMAND completes.\n" + "If no COMMAND is specified, turbostat wakes every 5-seconds\n" + "to print statistics, until interrupted.\n" + "--add add a counter\n" + " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" + "--cpu cpu-set limit output to summary plus cpu-set:\n" + " {core | package | j,k,l..m,n-p }\n" + "--quiet skip decoding system configuration header\n" + "--interval sec Override default 5-second measurement interval\n" + "--help print this help message\n" + "--list list column headers only\n" + "--out file create or truncate \"file\" for all output\n" + "--version print version information\n" + "\n" + "For more help, run \"man turbostat\"\n"); +} - if (show_pkg) - outp += sprintf(outp, "\tPackage"); - if (show_core) - outp += sprintf(outp, "\tCore"); - if (show_cpu) - outp += sprintf(outp, "\tCPU"); - if (has_aperf) - outp += sprintf(outp, "\tAvg_MHz"); - if (has_aperf) - outp += sprintf(outp, "\tBusy%%"); - if (has_aperf) - outp += sprintf(outp, "\tBzy_MHz"); - outp += sprintf(outp, "\tTSC_MHz"); +/* + * bic_lookup + * for all the strings in comma separate name_list, + * set the approprate bit in return value. + */ +unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) +{ + int i; + unsigned long long retval = 0; - if (!debug) - goto done; + while (name_list) { + char *comma; - if (do_irq) - outp += sprintf(outp, "\tIRQ"); - if (do_smi) - outp += sprintf(outp, "\tSMI"); - - if (do_nhm_cstates) - outp += sprintf(outp, "\tCPU%%c1"); - if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) - outp += sprintf(outp, "\tCPU%%c3"); - if (do_nhm_cstates) - outp += sprintf(outp, "\tCPU%%c6"); - if (do_snb_cstates) - outp += sprintf(outp, "\tCPU%%c7"); + comma = strchr(name_list, ','); + + if (comma) + *comma = '\0'; + + for (i = 0; i < MAX_BIC; ++i) { + if (!strcmp(name_list, bic[i].name)) { + retval |= (1ULL << i); + break; + } + } + if (i == MAX_BIC) { + if (mode == SHOW_LIST) { + fprintf(stderr, "Invalid counter name: %s\n", name_list); + exit(-1); + } + deferred_skip_names[deferred_skip_index++] = name_list; + if (debug) + fprintf(stderr, "deferred \"%s\"\n", name_list); + if (deferred_skip_index >= MAX_DEFERRED) { + fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", + MAX_DEFERRED, name_list); + help(); + exit(1); + } + } + + name_list = comma; + if (name_list) + name_list++; + + } + return retval; +} + + +void print_header(char *delim) +{ + struct msr_counter *mp; + int printed = 0; + + if (DO_BIC(BIC_Package)) + outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); + if (DO_BIC(BIC_Core)) + outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU)) + outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); + if (DO_BIC(BIC_Avg_MHz)) + outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_Busy)) + outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : "")); + if (DO_BIC(BIC_Bzy_MHz)) + outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_TSC_MHz)) + outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); + + if (DO_BIC(BIC_IRQ)) { + if (sums_need_wide_columns) + outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); + else + outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); + } + + if (DO_BIC(BIC_SMI)) + outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); for (mp = sys.tp; mp; mp = mp->next) { + if (mp->format == FORMAT_RAW) { if (mp->width == 64) - outp += sprintf(outp, "\t%18.18s", mp->name); + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); else - outp += sprintf(outp, "\t%10.10s", mp->name); + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); } else { - outp += sprintf(outp, "\t%-7.7s", mp->name); + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); } } - if (do_dts) - outp += sprintf(outp, "\tCoreTmp"); + if (DO_BIC(BIC_CPU_c1)) + outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) + outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU_c6)) + outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU_c7)) + outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : "")); + + if (DO_BIC(BIC_Mod_c6)) + outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : "")); + + if (DO_BIC(BIC_CoreTmp)) + outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); for (mp = sys.cp; mp; mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 64) - outp += sprintf(outp, "\t%18.18s", mp->name); + outp += sprintf(outp, "%s%18.18s", delim, mp->name); else - outp += sprintf(outp, "\t%10.10s", mp->name); + outp += sprintf(outp, "%s%10.10s", delim, mp->name); } else { - outp += sprintf(outp, "\t%-7.7s", mp->name); + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", delim, mp->name); + else + outp += sprintf(outp, "%s%s", delim, mp->name); } } - if (do_ptm) - outp += sprintf(outp, "\tPkgTmp"); + if (DO_BIC(BIC_PkgTmp)) + outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); - if (do_gfx_rc6_ms) - outp += sprintf(outp, "\tGFX%%rc6"); + if (DO_BIC(BIC_GFX_rc6)) + outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : "")); - if (do_gfx_mhz) - outp += sprintf(outp, "\tGFXMHz"); + if (DO_BIC(BIC_GFXMHz)) + outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); if (do_skl_residency) { - outp += sprintf(outp, "\tTotl%%C0"); - outp += sprintf(outp, "\tAny%%C0"); - outp += sprintf(outp, "\tGFX%%C0"); - outp += sprintf(outp, "\tCPUGFX%%"); - } - - if (do_pc2) - outp += sprintf(outp, "\tPkg%%pc2"); - if (do_pc3) - outp += sprintf(outp, "\tPkg%%pc3"); - if (do_pc6) - outp += sprintf(outp, "\tPkg%%pc6"); - if (do_pc7) - outp += sprintf(outp, "\tPkg%%pc7"); - if (do_c8_c9_c10) { - outp += sprintf(outp, "\tPkg%%pc8"); - outp += sprintf(outp, "\tPkg%%pc9"); - outp += sprintf(outp, "\tPk%%pc10"); + outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); + outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); + outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); + outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); } + if (DO_BIC(BIC_Pkgpc2)) + outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc3)) + outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc6)) + outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc7)) + outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc8)) + outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc9)) + outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc10)) + outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); + if (do_rapl && !rapl_joules) { - if (do_rapl & RAPL_PKG) - outp += sprintf(outp, "\tPkgWatt"); - if (do_rapl & RAPL_CORES_ENERGY_STATUS) - outp += sprintf(outp, "\tCorWatt"); - if (do_rapl & RAPL_GFX) - outp += sprintf(outp, "\tGFXWatt"); - if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, "\tRAMWatt"); - if (do_rapl & RAPL_PKG_PERF_STATUS) - outp += sprintf(outp, "\tPKG_%%"); - if (do_rapl & RAPL_DRAM_PERF_STATUS) - outp += sprintf(outp, "\tRAM_%%"); + if (DO_BIC(BIC_PkgWatt)) + outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_CorWatt)) + outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFXWatt)) + outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_RAMWatt)) + outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_PKG__)) + outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); + if (DO_BIC(BIC_RAM__)) + outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); } else if (do_rapl && rapl_joules) { - if (do_rapl & RAPL_PKG) - outp += sprintf(outp, "\tPkg_J"); - if (do_rapl & RAPL_CORES_ENERGY_STATUS) - outp += sprintf(outp, "\tCor_J"); - if (do_rapl & RAPL_GFX) - outp += sprintf(outp, "\tGFX_J"); - if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, "\tRAM_J"); - if (do_rapl & RAPL_PKG_PERF_STATUS) - outp += sprintf(outp, "\tPKG_%%"); - if (do_rapl & RAPL_DRAM_PERF_STATUS) - outp += sprintf(outp, "\tRAM_%%"); + if (DO_BIC(BIC_Pkg_J)) + outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); + if (DO_BIC(BIC_Cor_J)) + outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFX_J)) + outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); + if (DO_BIC(BIC_RAM_J)) + outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : "")); + if (DO_BIC(BIC_PKG__)) + outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); + if (DO_BIC(BIC_RAM__)) + outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); } for (mp = sys.pp; mp; mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 64) - outp += sprintf(outp, "\t%18.18s", mp->name); + outp += sprintf(outp, "%s%18.18s", delim, mp->name); else - outp += sprintf(outp, "\t%10.10s", mp->name); + outp += sprintf(outp, "%s%10.10s", delim, mp->name); } else { - outp += sprintf(outp, "\t%-7.7s", mp->name); + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", delim, mp->name); + else + outp += sprintf(outp, "%s%s", delim, mp->name); } } -done: outp += sprintf(outp, "\n"); } @@ -494,10 +681,10 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "mperf: %016llX\n", t->mperf); outp += sprintf(outp, "c1: %016llX\n", t->c1); - if (do_irq) - outp += sprintf(outp, "IRQ: %08X\n", t->irq_count); - if (do_smi) - outp += sprintf(outp, "SMI: %08X\n", t->smi_count); + if (DO_BIC(BIC_IRQ)) + outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); + if (DO_BIC(BIC_SMI)) + outp += sprintf(outp, "SMI: %d\n", t->smi_count); for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", @@ -516,6 +703,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]); } + outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); } if (p) { @@ -527,11 +715,11 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); outp += sprintf(outp, "pc2: %016llX\n", p->pc2); - if (do_pc3) + if (DO_BIC(BIC_Pkgpc3)) outp += sprintf(outp, "pc3: %016llX\n", p->pc3); - if (do_pc6) + if (DO_BIC(BIC_Pkgpc6)) outp += sprintf(outp, "pc6: %016llX\n", p->pc6); - if (do_pc7) + if (DO_BIC(BIC_Pkgpc7)) outp += sprintf(outp, "pc7: %016llX\n", p->pc7); outp += sprintf(outp, "pc8: %016llX\n", p->pc8); outp += sprintf(outp, "pc9: %016llX\n", p->pc9); @@ -563,10 +751,12 @@ int dump_counters(struct thread_data *t, struct core_data *c, int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { - double interval_float; + double interval_float, tsc; char *fmt8; int i; struct msr_counter *mp; + char *delim = "\t"; + int printed = 0; /* if showing only 1st thread in core and this isn't one, bail out */ if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) @@ -576,106 +766,126 @@ int format_counters(struct thread_data *t, struct core_data *c, if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; + /*if not summary line and --cpu is used */ + if ((t != &average.threads) && + (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) + return 0; + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; + tsc = t->tsc * tsc_tweak; + /* topo columns, print blanks on 1st (average) line */ if (t == &average.threads) { - if (show_pkg) - outp += sprintf(outp, "\t-"); - if (show_core) - outp += sprintf(outp, "\t-"); - if (show_cpu) - outp += sprintf(outp, "\t-"); + if (DO_BIC(BIC_Package)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_Core)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } else { - if (show_pkg) { + if (DO_BIC(BIC_Package)) { if (p) - outp += sprintf(outp, "\t%d", p->package_id); + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id); else - outp += sprintf(outp, "\t-"); + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } - if (show_core) { + if (DO_BIC(BIC_Core)) { if (c) - outp += sprintf(outp, "\t%d", c->core_id); + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); else - outp += sprintf(outp, "\t-"); + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } - if (show_cpu) - outp += sprintf(outp, "\t%d", t->cpu_id); + if (DO_BIC(BIC_CPU)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); } - /* Avg_MHz */ - if (has_aperf) - outp += sprintf(outp, "\t%.0f", + if (DO_BIC(BIC_Avg_MHz)) + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); - /* Busy% */ - if (has_aperf) - outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); + if (DO_BIC(BIC_Busy)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc); - /* Bzy_MHz */ - if (has_aperf) { + if (DO_BIC(BIC_Bzy_MHz)) { if (has_base_hz) - outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); else - outp += sprintf(outp, "\t%.0f", - 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), + tsc / units * t->aperf / t->mperf / interval_float); } - /* TSC_MHz */ - outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float); - - if (!debug) - goto done; + if (DO_BIC(BIC_TSC_MHz)) + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float); /* IRQ */ - if (do_irq) - outp += sprintf(outp, "\t%d", t->irq_count); + if (DO_BIC(BIC_IRQ)) { + if (sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); + } /* SMI */ - if (do_smi) - outp += sprintf(outp, "\t%d", t->smi_count); - - if (do_nhm_cstates) - outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc); - - /* print per-core data only for 1st thread in core */ - if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) - goto done; - - if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) - outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc); - if (do_nhm_cstates) - outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc); - if (do_snb_cstates) - outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc); + if (DO_BIC(BIC_SMI)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); + /* Added counters */ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 32) - outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]); + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]); else - outp += sprintf(outp, "\t0x%016llx", t->counter[i]); + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); } else if (mp->format == FORMAT_DELTA) { - outp += sprintf(outp, "\t%8lld", t->counter[i]); + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc); + if (mp->type == COUNTER_USEC) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000); + else + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc); } } + /* C1 */ + if (DO_BIC(BIC_CPU_c1)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc); - if (do_dts) - outp += sprintf(outp, "\t%d", c->core_temp_c); + + /* print per-core data only for 1st thread in core */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + goto done; + + if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); + if (DO_BIC(BIC_CPU_c6)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); + if (DO_BIC(BIC_CPU_c7)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc); + + /* Mod%c6 */ + if (DO_BIC(BIC_Mod_c6)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); + + if (DO_BIC(BIC_CoreTmp)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 32) - outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]); + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]); else - outp += sprintf(outp, "\t0x%016llx", c->counter[i]); + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); } else if (mp->format == FORMAT_DELTA) { - outp += sprintf(outp, "\t%8lld", c->counter[i]); + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc); } } @@ -684,95 +894,89 @@ int format_counters(struct thread_data *t, struct core_data *c, goto done; /* PkgTmp */ - if (do_ptm) - outp += sprintf(outp, "\t%d", p->pkg_temp_c); + if (DO_BIC(BIC_PkgTmp)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c); /* GFXrc6 */ - if (do_gfx_rc6_ms) { + if (DO_BIC(BIC_GFX_rc6)) { if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ - outp += sprintf(outp, "\t**.**"); + outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); } else { - outp += sprintf(outp, "\t%.2f", + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->gfx_rc6_ms / 10.0 / interval_float); } } /* GFXMHz */ - if (do_gfx_mhz) - outp += sprintf(outp, "\t%d", p->gfx_mhz); + if (DO_BIC(BIC_GFXMHz)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (do_skl_residency) { - outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); - outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc); - outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); - outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); - } - - if (do_pc2) - outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc); - if (do_pc3) - outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc); - if (do_pc6) - outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc); - if (do_pc7) - outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc); - if (do_c8_c9_c10) { - outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc); - outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc); - outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc); } + if (DO_BIC(BIC_Pkgpc2)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc); + if (DO_BIC(BIC_Pkgpc3)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc); + if (DO_BIC(BIC_Pkgpc6)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc); + if (DO_BIC(BIC_Pkgpc7)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc); + if (DO_BIC(BIC_Pkgpc8)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc); + if (DO_BIC(BIC_Pkgpc9)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc); + if (DO_BIC(BIC_Pkgpc10)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc); + /* * If measurement interval exceeds minimum RAPL Joule Counter range, * indicate that results are suspect by printing "**" in fraction place. */ if (interval_float < rapl_joule_counter_range) - fmt8 = "\t%.2f"; + fmt8 = "%s%.2f"; else fmt8 = "%6.0f**"; - if (do_rapl && !rapl_joules) { - if (do_rapl & RAPL_PKG) - outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float); - if (do_rapl & RAPL_CORES_ENERGY_STATUS) - outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float); - if (do_rapl & RAPL_GFX) - outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); - if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float); - if (do_rapl & RAPL_PKG_PERF_STATUS) - outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); - if (do_rapl & RAPL_DRAM_PERF_STATUS) - outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); - } else if (do_rapl && rapl_joules) { - if (do_rapl & RAPL_PKG) - outp += sprintf(outp, fmt8, - p->energy_pkg * rapl_energy_units); - if (do_rapl & RAPL_CORES) - outp += sprintf(outp, fmt8, - p->energy_cores * rapl_energy_units); - if (do_rapl & RAPL_GFX) - outp += sprintf(outp, fmt8, - p->energy_gfx * rapl_energy_units); - if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, fmt8, - p->energy_dram * rapl_dram_energy_units); - if (do_rapl & RAPL_PKG_PERF_STATUS) - outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); - if (do_rapl & RAPL_DRAM_PERF_STATUS) - outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); - } + if (DO_BIC(BIC_PkgWatt)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); + if (DO_BIC(BIC_CorWatt)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); + if (DO_BIC(BIC_GFXWatt)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); + if (DO_BIC(BIC_RAMWatt)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float); + if (DO_BIC(BIC_Pkg_J)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); + if (DO_BIC(BIC_Cor_J)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units); + if (DO_BIC(BIC_GFX_J)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units); + if (DO_BIC(BIC_RAM_J)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units); + if (DO_BIC(BIC_PKG__)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); + if (DO_BIC(BIC_RAM__)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 32) - outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]); + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]); else - outp += sprintf(outp, "\t0x%016llx", p->counter[i]); + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); } else if (mp->format == FORMAT_DELTA) { - outp += sprintf(outp, "\t%8lld", p->counter[i]); + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc); } } @@ -807,7 +1011,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ static int printed; if (!printed || !summary_only) - print_header(); + print_header("\t"); if (topo.num_cpus > 1) format_counters(&average.threads, &average.cores, @@ -841,11 +1045,11 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; } old->pc2 = new->pc2 - old->pc2; - if (do_pc3) + if (DO_BIC(BIC_Pkgpc3)) old->pc3 = new->pc3 - old->pc3; - if (do_pc6) + if (DO_BIC(BIC_Pkgpc6)) old->pc6 = new->pc6 - old->pc6; - if (do_pc7) + if (DO_BIC(BIC_Pkgpc7)) old->pc7 = new->pc7 - old->pc7; old->pc8 = new->pc8 - old->pc8; old->pc9 = new->pc9 - old->pc9; @@ -887,6 +1091,7 @@ delta_core(struct core_data *new, struct core_data *old) old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; old->core_temp_c = new->core_temp_c; + old->mc6_us = new->mc6_us - old->mc6_us; for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -916,7 +1121,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->c1 = new->c1 - old->c1; - if (has_aperf) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) { if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { old->aperf = new->aperf - old->aperf; old->mperf = new->mperf - old->mperf; @@ -941,7 +1146,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->c1 = 0; else { /* normal case, derive c1 */ - old->c1 = old->tsc - old->mperf - core_delta->c3 + old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - core_delta->c6 - core_delta->c7; } } @@ -952,10 +1157,10 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->mperf = 1; /* divide by 0 protection */ } - if (do_irq) + if (DO_BIC(BIC_IRQ)) old->irq_count = new->irq_count - old->irq_count; - if (do_smi) + if (DO_BIC(BIC_SMI)) old->smi_count = new->smi_count - old->smi_count; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { @@ -1008,6 +1213,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c3 = 0; c->c6 = 0; c->c7 = 0; + c->mc6_us = 0; c->core_temp_c = 0; p->pkg_wtd_core_c0 = 0; @@ -1016,11 +1222,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->pkg_both_core_gfxe_c0 = 0; p->pc2 = 0; - if (do_pc3) + if (DO_BIC(BIC_Pkgpc3)) p->pc3 = 0; - if (do_pc6) + if (DO_BIC(BIC_Pkgpc6)) p->pc6 = 0; - if (do_pc7) + if (DO_BIC(BIC_Pkgpc7)) p->pc7 = 0; p->pc8 = 0; p->pc9 = 0; @@ -1036,7 +1242,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->gfx_rc6_ms = 0; p->gfx_mhz = 0; - for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) t->counter[i] = 0; @@ -1073,6 +1278,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.cores.c3 += c->c3; average.cores.c6 += c->c6; average.cores.c7 += c->c7; + average.cores.mc6_us += c->mc6_us; average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); @@ -1094,11 +1300,11 @@ int sum_counters(struct thread_data *t, struct core_data *c, } average.packages.pc2 += p->pc2; - if (do_pc3) + if (DO_BIC(BIC_Pkgpc3)) average.packages.pc3 += p->pc3; - if (do_pc6) + if (DO_BIC(BIC_Pkgpc6)) average.packages.pc6 += p->pc6; - if (do_pc7) + if (DO_BIC(BIC_Pkgpc7)) average.packages.pc7 += p->pc7; average.packages.pc8 += p->pc8; average.packages.pc9 += p->pc9; @@ -1143,9 +1349,13 @@ void compute_average(struct thread_data *t, struct core_data *c, average.threads.mperf /= topo.num_cpus; average.threads.c1 /= topo.num_cpus; + if (average.threads.irq_count > 9999999) + sums_need_wide_columns = 1; + average.cores.c3 /= topo.num_cores; average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; + average.cores.mc6_us /= topo.num_cores; if (do_skl_residency) { average.packages.pkg_wtd_core_c0 /= topo.num_packages; @@ -1155,11 +1365,11 @@ void compute_average(struct thread_data *t, struct core_data *c, } average.packages.pc2 /= topo.num_packages; - if (do_pc3) + if (DO_BIC(BIC_Pkgpc3)) average.packages.pc3 /= topo.num_packages; - if (do_pc6) + if (DO_BIC(BIC_Pkgpc6)) average.packages.pc6 /= topo.num_packages; - if (do_pc7) + if (DO_BIC(BIC_Pkgpc7)) average.packages.pc7 /= topo.num_packages; average.packages.pc8 /= topo.num_packages; @@ -1169,16 +1379,29 @@ void compute_average(struct thread_data *t, struct core_data *c, for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) continue; + if (mp->type == COUNTER_ITEMS) { + if (average.threads.counter[i] > 9999999) + sums_need_wide_columns = 1; + continue; + } average.threads.counter[i] /= topo.num_cpus; } for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) continue; + if (mp->type == COUNTER_ITEMS) { + if (average.cores.counter[i] > 9999999) + sums_need_wide_columns = 1; + } average.cores.counter[i] /= topo.num_cores; } for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) continue; + if (mp->type == COUNTER_ITEMS) { + if (average.packages.counter[i] > 9999999) + sums_need_wide_columns = 1; + } average.packages.counter[i] /= topo.num_packages; } } @@ -1193,6 +1416,60 @@ static unsigned long long rdtsc(void) } /* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ + FILE *filep = fopen(path, mode); + + if (!filep) + err(1, "%s: open failed", path); + return filep; +} +/* + * snapshot_sysfs_counter() + * + * return snapshot of given counter + */ +unsigned long long snapshot_sysfs_counter(char *path) +{ + FILE *fp; + int retval; + unsigned long long counter; + + fp = fopen_or_die(path, "r"); + + retval = fscanf(fp, "%lld", &counter); + if (retval != 1) + err(1, "snapshot_sysfs_counter(%s)", path); + + fclose(fp); + + return counter; +} + +int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) +{ + if (mp->msr_num != 0) { + if (get_msr(cpu, mp->msr_num, counterp)) + return -1; + } else { + char path[128]; + + if (mp->flags & SYSFS_PERCPU) { + sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", + cpu, mp->path); + + *counterp = snapshot_sysfs_counter(path); + } else { + *counterp = snapshot_sysfs_counter(mp->path); + } + } + + return 0; +} + +/* * get_counters(...) * migrate to cpu * acquire and record local counters for that cpu @@ -1213,7 +1490,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) retry: t->tsc = rdtsc(); /* we are running on local CPU of interest */ - if (has_aperf) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) { unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; /* @@ -1269,35 +1546,33 @@ retry: t->mperf = t->mperf * aperf_mperf_multiplier; } - if (do_irq) + if (DO_BIC(BIC_IRQ)) t->irq_count = irqs_per_cpu[cpu]; - if (do_smi) { + if (DO_BIC(BIC_SMI)) { if (get_msr(cpu, MSR_SMI_COUNT, &msr)) return -5; t->smi_count = msr & 0xFFFFFFFF; } - - if (use_c1_residency_msr) { + if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) { if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) return -6; } for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - if (get_msr(cpu, mp->msr_num, &t->counter[i])) + if (get_mp(cpu, mp, &t->counter[i])) return -10; } - /* collect core counters only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; - if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) { + if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } - if (do_nhm_cstates && !do_knl_cstates) { + if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; } else if (do_knl_cstates) { @@ -1305,18 +1580,22 @@ retry: return -7; } - if (do_snb_cstates) + if (DO_BIC(BIC_CPU_c7)) if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) return -8; - if (do_dts) { + if (DO_BIC(BIC_Mod_c6)) + if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) + return -8; + + if (DO_BIC(BIC_CoreTmp)) { if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) return -9; c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - if (get_msr(cpu, mp->msr_num, &c->counter[i])) + if (get_mp(cpu, mp, &c->counter[i])) return -10; } @@ -1334,26 +1613,35 @@ retry: if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) return -13; } - if (do_pc3) + if (DO_BIC(BIC_Pkgpc3)) if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) return -9; - if (do_pc6) - if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) - return -10; - if (do_pc2) + if (DO_BIC(BIC_Pkgpc6)) { + if (do_slm_cstates) { + if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6)) + return -10; + } else { + if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) + return -10; + } + } + + if (DO_BIC(BIC_Pkgpc2)) if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) return -11; - if (do_pc7) + if (DO_BIC(BIC_Pkgpc7)) if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) return -12; - if (do_c8_c9_c10) { + if (DO_BIC(BIC_Pkgpc8)) if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8)) return -13; + if (DO_BIC(BIC_Pkgpc9)) if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9)) return -13; + if (DO_BIC(BIC_Pkgpc10)) if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10)) return -13; - } + if (do_rapl & RAPL_PKG) { if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) return -13; @@ -1384,20 +1672,20 @@ retry: return -16; p->rapl_dram_perf_status = msr & 0xFFFFFFFF; } - if (do_ptm) { + if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) return -17; p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } - if (do_gfx_rc6_ms) + if (DO_BIC(BIC_GFX_rc6)) p->gfx_rc6_ms = gfx_cur_rc6_ms; - if (do_gfx_mhz) + if (DO_BIC(BIC_GFXMHz)) p->gfx_mhz = gfx_cur_mhz; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (get_msr(cpu, mp->msr_num, &p->counter[i])) + if (get_mp(cpu, mp, &p->counter[i])) return -10; } @@ -1433,8 +1721,8 @@ char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7}; +int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; @@ -1457,11 +1745,11 @@ dump_nhm_platform_info(void) fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 40) & 0xFF; - fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n", + fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; - fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n", + fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); @@ -1483,12 +1771,12 @@ dump_hsw_turbo_ratio_limits(void) ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); return; } @@ -1505,99 +1793,175 @@ dump_ivt_turbo_ratio_limits(void) ratio = (msr >> 56) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 48) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 40) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 32) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 24) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 16) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); return; } +int has_turbo_ratio_group_limits(int family, int model) +{ + + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ATOM_DENVERTON: + return 1; + } + return 0; +} static void -dump_nhm_turbo_ratio_limits(void) +dump_turbo_ratio_limits(int family, int model) { - unsigned long long msr; - unsigned int ratio; + unsigned long long msr, core_counts; + unsigned int ratio, group_size; get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); - fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); + if (has_turbo_ratio_group_limits(family, model)) { + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); + } else { + core_counts = 0x0807060504030201; + } + ratio = (msr >> 56) & 0xFF; + group_size = (core_counts >> 56) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, group_size); ratio = (msr >> 48) & 0xFF; + group_size = (core_counts >> 48) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, group_size); ratio = (msr >> 40) & 0xFF; + group_size = (core_counts >> 40) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, group_size); ratio = (msr >> 32) & 0xFF; + group_size = (core_counts >> 32) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, group_size); ratio = (msr >> 24) & 0xFF; + group_size = (core_counts >> 24) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, group_size); ratio = (msr >> 16) & 0xFF; + group_size = (core_counts >> 16) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, group_size); ratio = (msr >> 8) & 0xFF; + group_size = (core_counts >> 8) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, group_size); ratio = (msr >> 0) & 0xFF; + group_size = (core_counts >> 0) & 0xFF; if (ratio) - fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, group_size); return; } static void +dump_atom_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; + + get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr); + fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); + + ratio = (msr >> 0) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", + ratio, bclk, ratio * bclk); + + get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); + fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); + + ratio = (msr >> 24) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", + ratio, bclk, ratio * bclk); +} + +static void dump_knl_turbo_ratio_limits(void) { const unsigned int buckets_no = 7; @@ -1652,7 +2016,7 @@ dump_knl_turbo_ratio_limits(void) for (i = buckets_no - 1; i >= 0; i--) if (i > 0 ? ratio[i] != ratio[i - 1] : 1) fprintf(outf, - "%d * %.0f = %.0f MHz max turbo %d active cores\n", + "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio[i], bclk, ratio[i] * bclk, cores[i]); } @@ -1661,12 +2025,12 @@ dump_nhm_cst_cfg(void) { unsigned long long msr; - get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", @@ -1810,16 +2174,6 @@ void free_all_buffers(void) free(irqs_per_cpu); } -/* - * Open a file, and exit on failure - */ -FILE *fopen_or_die(const char *path, const char *mode) -{ - FILE *filep = fopen(path, mode); - if (!filep) - err(1, "%s: open failed", path); - return filep; -} /* * Parse a file containing a single int. @@ -2148,13 +2502,14 @@ int snapshot_gfx_mhz(void) */ int snapshot_proc_sysfs_files(void) { - if (snapshot_proc_interrupts()) - return 1; + if (DO_BIC(BIC_IRQ)) + if (snapshot_proc_interrupts()) + return 1; - if (do_gfx_rc6_ms) + if (DO_BIC(BIC_GFX_rc6)) snapshot_gfx_rc6_ms(); - if (do_gfx_mhz) + if (DO_BIC(BIC_GFXMHz)) snapshot_gfx_mhz(); return 0; @@ -2283,7 +2638,9 @@ void check_permissions() * MSR_SMI_COUNT 0x00000034 * * MSR_PLATFORM_INFO 0x000000ce - * MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 + * MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 + * + * MSR_MISC_PWR_MGMT 0x000001aa * * MSR_PKG_C3_RESIDENCY 0x000003f8 * MSR_PKG_C6_RESIDENCY 0x000003f9 @@ -2291,7 +2648,8 @@ void check_permissions() * MSR_CORE_C6_RESIDENCY 0x000003fd * * Side effect: - * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL + * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL + * sets has_misc_feature_control */ int probe_nhm_msrs(unsigned int family, unsigned int model) { @@ -2322,6 +2680,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_IVYBRIDGE: /* IVB */ case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ pkg_cstate_limits = snb_pkg_cstate_limits; + has_misc_feature_control = 1; break; case INTEL_FAM6_HASWELL_CORE: /* HSW */ case INTEL_FAM6_HASWELL_X: /* HSX */ @@ -2336,29 +2695,34 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ pkg_cstate_limits = hsw_pkg_cstate_limits; + has_misc_feature_control = 1; break; case INTEL_FAM6_SKYLAKE_X: /* SKX */ pkg_cstate_limits = skx_pkg_cstate_limits; + has_misc_feature_control = 1; break; case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ + no_MSR_MISC_PWR_MGMT = 1; case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ pkg_cstate_limits = slv_pkg_cstate_limits; break; case INTEL_FAM6_ATOM_AIRMONT: /* AMT */ pkg_cstate_limits = amt_pkg_cstate_limits; + no_MSR_MISC_PWR_MGMT = 1; break; case INTEL_FAM6_XEON_PHI_KNL: /* PHI */ case INTEL_FAM6_XEON_PHI_KNM: pkg_cstate_limits = phi_pkg_cstate_limits; break; case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GEMINI_LAKE: case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ pkg_cstate_limits = bxt_pkg_cstate_limits; break; default: return 0; } - get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); @@ -2368,8 +2732,69 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) has_base_hz = 1; return 1; } -int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) +/* + * SLV client has support for unique MSRs: + * + * MSR_CC6_DEMOTION_POLICY_CONFIG + * MSR_MC6_DEMOTION_POLICY_CONFIG + */ + +int has_slv_msrs(unsigned int family, unsigned int model) { + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_SILVERMONT1: + case INTEL_FAM6_ATOM_MERRIFIELD: + case INTEL_FAM6_ATOM_MOOREFIELD: + return 1; + } + return 0; +} +int is_dnv(unsigned int family, unsigned int model) +{ + + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_DENVERTON: + return 1; + } + return 0; +} +int is_bdx(unsigned int family, unsigned int model) +{ + + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_BROADWELL_XEON_D: + return 1; + } + return 0; +} +int is_skx(unsigned int family, unsigned int model) +{ + + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_SKYLAKE_X: + return 1; + } + return 0; +} + +int has_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (has_slv_msrs(family, model)) + return 0; + switch (model) { /* Nehalem compatible, but do not include turbo-ratio limit support */ case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ @@ -2381,6 +2806,13 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) return 1; } } +int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (has_slv_msrs(family, model)) + return 1; + + return 0; +} int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -2429,6 +2861,22 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; } } +int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_SKYLAKE_X: + return 1; + default: + return 0; + } +} int has_config_tdp(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -2475,8 +2923,11 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model) if (has_ivt_turbo_ratio_limit(family, model)) dump_ivt_turbo_ratio_limits(); - if (has_nhm_turbo_ratio_limit(family, model)) - dump_nhm_turbo_ratio_limits(); + if (has_turbo_ratio_limit(family, model)) + dump_turbo_ratio_limits(family, model); + + if (has_atom_turbo_ratio_limit(family, model)) + dump_atom_turbo_ratio_limits(); if (has_knl_turbo_ratio_limit(family, model)) dump_knl_turbo_ratio_limits(); @@ -2487,6 +2938,96 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model) dump_nhm_cst_cfg(); } +static void +dump_sysfs_cstate_config(void) +{ + char path[64]; + char name_buf[16]; + char desc[64]; + FILE *input; + int state; + char *sp; + + if (!DO_BIC(BIC_sysfs)) + return; + + for (state = 0; state < 10; ++state) { + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", + base_cpu, state); + input = fopen(path, "r"); + if (input == NULL) + continue; + fgets(name_buf, sizeof(name_buf), input); + + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + sp = strchr(name_buf, '-'); + if (!sp) + sp = strchrnul(name_buf, '\n'); + *sp = '\0'; + + fclose(input); + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", + base_cpu, state); + input = fopen(path, "r"); + if (input == NULL) + continue; + fgets(desc, sizeof(desc), input); + + fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); + fclose(input); + } +} +static void +dump_sysfs_pstate_config(void) +{ + char path[64]; + char driver_buf[64]; + char governor_buf[64]; + FILE *input; + int turbo; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", + base_cpu); + input = fopen(path, "r"); + if (input == NULL) { + fprintf(stderr, "NSFOD %s\n", path); + return; + } + fgets(driver_buf, sizeof(driver_buf), input); + fclose(input); + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", + base_cpu); + input = fopen(path, "r"); + if (input == NULL) { + fprintf(stderr, "NSFOD %s\n", path); + return; + } + fgets(governor_buf, sizeof(governor_buf), input); + fclose(input); + + fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); + fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf); + + sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); + input = fopen(path, "r"); + if (input != NULL) { + fscanf(input, "%d", &turbo); + fprintf(outf, "cpufreq boost: %d\n", turbo); + fclose(input); + } + + sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); + input = fopen(path, "r"); + if (input != NULL) { + fscanf(input, "%d", &turbo); + fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); + fclose(input); + } +} + /* * print_epb() @@ -2790,15 +3331,40 @@ void rapl_probe(unsigned int family, unsigned int model) case INTEL_FAM6_BROADWELL_CORE: /* BDW */ case INTEL_FAM6_BROADWELL_GT3E: /* BDW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_GFX_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_GFXWatt); + } break; case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GEMINI_LAKE: do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO; + if (rapl_joules) + BIC_PRESENT(BIC_Pkg_J); + else + BIC_PRESENT(BIC_PkgWatt); break; case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */ case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + BIC_PRESENT(BIC_RAM__); + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_RAM_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_RAMWatt); + } break; case INTEL_FAM6_HASWELL_X: /* HSX */ case INTEL_FAM6_BROADWELL_X: /* BDX */ @@ -2807,17 +3373,55 @@ void rapl_probe(unsigned int family, unsigned int model) case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ case INTEL_FAM6_XEON_PHI_KNM: do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + BIC_PRESENT(BIC_RAM__); + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_RAM_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_RAMWatt); + } break; case INTEL_FAM6_SANDYBRIDGE_X: case INTEL_FAM6_IVYBRIDGE_X: do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + BIC_PRESENT(BIC_RAM__); + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_RAM_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_RAMWatt); + } break; case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ do_rapl = RAPL_PKG | RAPL_CORES; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + } break; case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; + BIC_PRESENT(BIC_PKG__); + BIC_PRESENT(BIC_RAM__); + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_RAM_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_RAMWatt); + } break; default: return; @@ -2844,7 +3448,7 @@ void rapl_probe(unsigned int family, unsigned int model) tdp = get_tdp(model); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; - if (debug) + if (!quiet) fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); return; @@ -2969,11 +3573,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) return -1; - if (debug) { - fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " - "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, - rapl_power_units, rapl_energy_units, rapl_time_units); - } + fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr, + rapl_power_units, rapl_energy_units, rapl_time_units); + if (do_rapl & RAPL_PKG_POWER_INFO) { if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) @@ -2994,7 +3596,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -9; fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 63) & 1 ? "": "UN"); + cpu, msr, (msr >> 63) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "PKG Limit #1"); fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", @@ -3020,40 +3622,34 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "": "UN"); + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "DRAM Limit"); } if (do_rapl & RAPL_CORE_POLICY) { - if (debug) { - if (get_msr(cpu, MSR_PP0_POLICY, &msr)) - return -7; + if (get_msr(cpu, MSR_PP0_POLICY, &msr)) + return -7; - fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); - } + fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); } if (do_rapl & RAPL_CORES_POWER_LIMIT) { - if (debug) { - if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) - return -9; - fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "": "UN"); - print_power_limit_msr(cpu, msr, "Cores Limit"); - } + if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) + return -9; + fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + print_power_limit_msr(cpu, msr, "Cores Limit"); } if (do_rapl & RAPL_GFX) { - if (debug) { - if (get_msr(cpu, MSR_PP1_POLICY, &msr)) - return -8; + if (get_msr(cpu, MSR_PP1_POLICY, &msr)) + return -8; - fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); + fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); - if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) - return -9; - fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "": "UN"); - print_power_limit_msr(cpu, msr, "GFX Limit"); - } + if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) + return -9; + fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + print_power_limit_msr(cpu, msr, "GFX Limit"); } return 0; } @@ -3090,6 +3686,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GEMINI_LAKE: case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ return 1; } @@ -3121,6 +3718,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GEMINI_LAKE: return 1; } return 0; @@ -3149,8 +3747,6 @@ int has_skl_msrs(unsigned int family, unsigned int model) return 0; } - - int is_slm(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -3201,7 +3797,8 @@ double slm_bclk(void) } freq = slm_freq_table[i]; - fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); + if (!quiet) + fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); return freq; } @@ -3264,7 +3861,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk target_c_local = (msr >> 16) & 0xFF; - if (debug) + if (!quiet) fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, target_c_local); @@ -3299,13 +3896,30 @@ void decode_misc_enable_msr(void) unsigned long long msr; if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) - fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n", + fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", base_cpu, msr, - msr & (1 << 3) ? "TCC" : "", - msr & (1 << 16) ? "EIST" : "", - msr & (1 << 18) ? "MONITOR" : ""); + msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", + msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", + msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "", + msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", + msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); } +void decode_misc_feature_control(void) +{ + unsigned long long msr; + + if (!has_misc_feature_control) + return; + + if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) + fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", + base_cpu, msr, + msr & (0 << 0) ? "No-" : "", + msr & (1 << 0) ? "No-" : "", + msr & (2 << 0) ? "No-" : "", + msr & (3 << 0) ? "No-" : ""); +} /* * Decode MSR_MISC_PWR_MGMT * @@ -3320,6 +3934,9 @@ void decode_misc_pwr_mgmt_msr(void) if (!do_nhm_platform_info) return; + if (no_MSR_MISC_PWR_MGMT) + return; + if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", base_cpu, msr, @@ -3327,11 +3944,30 @@ void decode_misc_pwr_mgmt_msr(void) msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); } +/* + * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG + * + * This MSRs are present on Silvermont processors, + * Intel Atom processor E3000 series (Baytrail), and friends. + */ +void decode_c6_demotion_policy_msr(void) +{ + unsigned long long msr; + + if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) + fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", + base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); + + if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr)) + fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", + base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); +} void process_cpuid() { unsigned int eax, ebx, ecx, edx, max_level, max_extended_level; unsigned int fms, family, model, stepping; + unsigned int has_turbo; eax = ebx = ecx = edx = 0; @@ -3340,7 +3976,7 @@ void process_cpuid() if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) genuine_intel = 1; - if (debug) + if (!quiet) fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", (char *)&ebx, (char *)&edx, (char *)&ecx); @@ -3351,7 +3987,7 @@ void process_cpuid() if (family == 6 || family == 0xf) model += ((fms >> 16) & 0xf) << 4; - if (debug) { + if (!quiet) { fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", @@ -3394,8 +4030,18 @@ void process_cpuid() __cpuid(0x6, eax, ebx, ecx, edx); has_aperf = ecx & (1 << 0); + if (has_aperf) { + BIC_PRESENT(BIC_Avg_MHz); + BIC_PRESENT(BIC_Busy); + BIC_PRESENT(BIC_Bzy_MHz); + } do_dts = eax & (1 << 0); + if (do_dts) + BIC_PRESENT(BIC_CoreTmp); + has_turbo = eax & (1 << 1); do_ptm = eax & (1 << 6); + if (do_ptm) + BIC_PRESENT(BIC_PkgTmp); has_hwp = eax & (1 << 7); has_hwp_notify = eax & (1 << 8); has_hwp_activity_window = eax & (1 << 9); @@ -3403,10 +4049,11 @@ void process_cpuid() has_hwp_pkg = eax & (1 << 11); has_epb = ecx & (1 << 3); - if (debug) - fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, " + if (!quiet) + fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", has_aperf ? "" : "No-", + has_turbo ? "" : "No-", do_dts ? "" : "No-", do_ptm ? "" : "No-", has_hwp ? "" : "No-", @@ -3416,10 +4063,11 @@ void process_cpuid() has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); - if (debug) + if (!quiet) decode_misc_enable_msr(); - if (max_level >= 0x7 && debug) { + + if (max_level >= 0x7 && !quiet) { int has_sgx; ecx = 0; @@ -3445,7 +4093,7 @@ void process_cpuid() if (ebx_tsc != 0) { - if (debug && (ebx != 0)) + if (!quiet && (ebx != 0)) fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz); @@ -3462,6 +4110,7 @@ void process_cpuid() crystal_hz = 25000000; /* 25.0 MHz */ break; case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GEMINI_LAKE: crystal_hz = 19200000; /* 19.2 MHz */ break; default: @@ -3470,7 +4119,7 @@ void process_cpuid() if (crystal_hz) { tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; - if (debug) + if (!quiet) fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); } @@ -3485,7 +4134,7 @@ void process_cpuid() base_mhz = max_mhz = bus_mhz = edx = 0; __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); - if (debug) + if (!quiet) fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", base_mhz, max_mhz, bus_mhz); } @@ -3493,56 +4142,96 @@ void process_cpuid() if (has_aperf) aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); - do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); + BIC_PRESENT(BIC_IRQ); + BIC_PRESENT(BIC_TSC_MHz); + + if (probe_nhm_msrs(family, model)) { + do_nhm_platform_info = 1; + BIC_PRESENT(BIC_CPU_c1); + BIC_PRESENT(BIC_CPU_c3); + BIC_PRESENT(BIC_CPU_c6); + BIC_PRESENT(BIC_SMI); + } do_snb_cstates = has_snb_msrs(family, model); + + if (do_snb_cstates) + BIC_PRESENT(BIC_CPU_c7); + do_irtl_snb = has_snb_msrs(family, model); - do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); - do_pc3 = (pkg_cstate_limit >= PCL__3); - do_pc6 = (pkg_cstate_limit >= PCL__6); - do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); - do_c8_c9_c10 = has_hsw_msrs(family, model); + if (do_snb_cstates && (pkg_cstate_limit >= PCL__2)) + BIC_PRESENT(BIC_Pkgpc2); + if (pkg_cstate_limit >= PCL__3) + BIC_PRESENT(BIC_Pkgpc3); + if (pkg_cstate_limit >= PCL__6) + BIC_PRESENT(BIC_Pkgpc6); + if (do_snb_cstates && (pkg_cstate_limit >= PCL__7)) + BIC_PRESENT(BIC_Pkgpc7); + if (has_slv_msrs(family, model)) { + BIC_NOT_PRESENT(BIC_Pkgpc2); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_PRESENT(BIC_Pkgpc6); + BIC_NOT_PRESENT(BIC_Pkgpc7); + BIC_PRESENT(BIC_Mod_c6); + use_c1_residency_msr = 1; + } + if (is_dnv(family, model)) { + BIC_PRESENT(BIC_CPU_c1); + BIC_NOT_PRESENT(BIC_CPU_c3); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc7); + use_c1_residency_msr = 1; + } + if (is_skx(family, model)) { + BIC_NOT_PRESENT(BIC_CPU_c3); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc7); + } + if (is_bdx(family, model)) { + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc7); + } + if (has_hsw_msrs(family, model)) { + BIC_PRESENT(BIC_Pkgpc8); + BIC_PRESENT(BIC_Pkgpc9); + BIC_PRESENT(BIC_Pkgpc10); + } do_irtl_hsw = has_hsw_msrs(family, model); do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); - if (debug) + if (!quiet) decode_misc_pwr_mgmt_msr(); + if (!quiet && has_slv_msrs(family, model)) + decode_c6_demotion_policy_msr(); + rapl_probe(family, model); perf_limit_reasons_probe(family, model); - if (debug) + if (!quiet) dump_cstate_pstate_config_info(family, model); + if (!quiet) + dump_sysfs_cstate_config(); + if (!quiet) + dump_sysfs_pstate_config(); + if (has_skl_msrs(family, model)) calculate_tsc_tweak(); - do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK); + if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) + BIC_PRESENT(BIC_GFX_rc6); - do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK); + if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXMHz); - return; -} + if (!quiet) + decode_misc_feature_control(); -void help() -{ - fprintf(outf, - "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" - "\n" - "Turbostat forks the specified COMMAND and prints statistics\n" - "when COMMAND completes.\n" - "If no COMMAND is specified, turbostat wakes every 5-seconds\n" - "to print statistics, until interrupted.\n" - "--add add a counter\n" - " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" - "--debug run in \"debug\" mode\n" - "--interval sec Override default 5-second measurement interval\n" - "--help print this help message\n" - "--out file create or truncate \"file\" for all output\n" - "--version print version information\n" - "\n" - "For more help, run \"man turbostat\"\n"); + return; } @@ -3579,7 +4268,7 @@ void topology_probe() topo.max_cpu_num = 0; for_all_proc_cpus(count_cpus); if (!summary_only && topo.num_cpus > 1) - show_cpu = 1; + BIC_PRESENT(BIC_CPU); if (debug > 1) fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); @@ -3599,6 +4288,15 @@ void topology_probe() for_all_proc_cpus(mark_cpu_present); /* + * Validate that all cpus in cpu_subset are also in cpu_present_set + */ + for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { + if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) + if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) + err(1, "cpu%d not present", i); + } + + /* * Allocate and initialize cpu_affinity_set */ cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); @@ -3639,15 +4337,15 @@ void topology_probe() if (debug > 1) fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.num_cores_per_pkg); - if (debug && !summary_only && topo.num_cores_per_pkg > 1) - show_core = 1; + if (!summary_only && topo.num_cores_per_pkg > 1) + BIC_PRESENT(BIC_Core); topo.num_packages = max_package_id + 1; if (debug > 1) fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); - if (debug && !summary_only && topo.num_packages > 1) - show_pkg = 1; + if (!summary_only && topo.num_packages > 1) + BIC_PRESENT(BIC_Package); topo.num_threads_per_core = max_siblings; if (debug > 1) @@ -3662,7 +4360,7 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data int i; *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * - topo.num_packages, sizeof(struct thread_data) + sys.thread_counter_bytes); + topo.num_packages, sizeof(struct thread_data)); if (*t == NULL) goto error; @@ -3671,14 +4369,14 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data (*t)[i].cpu_id = -1; *c = calloc(topo.num_cores_per_pkg * topo.num_packages, - sizeof(struct core_data) + sys.core_counter_bytes); + sizeof(struct core_data)); if (*c == NULL) goto error; for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) (*c)[i].core_id = -1; - *p = calloc(topo.num_packages, sizeof(struct pkg_data) + sys.package_counter_bytes); + *p = calloc(topo.num_packages, sizeof(struct pkg_data)); if (*p == NULL) goto error; @@ -3789,24 +4487,24 @@ void turbostat_init() process_cpuid(); - if (debug) + if (!quiet) for_all_cpus(print_hwp, ODD_COUNTERS); - if (debug) + if (!quiet) for_all_cpus(print_epb, ODD_COUNTERS); - if (debug) + if (!quiet) for_all_cpus(print_perf_limit, ODD_COUNTERS); - if (debug) + if (!quiet) for_all_cpus(print_rapl, ODD_COUNTERS); for_all_cpus(set_temperature_target, ODD_COUNTERS); - if (debug) + if (!quiet) for_all_cpus(print_thermal, ODD_COUNTERS); - if (debug && do_irtl_snb) + if (!quiet && do_irtl_snb) print_irtl(); } @@ -3815,6 +4513,7 @@ int fork_it(char **argv) pid_t child_pid; int status; + snapshot_proc_sysfs_files(); status = for_all_cpus(get_counters, EVEN_COUNTERS); if (status) exit(status); @@ -3826,6 +4525,7 @@ int fork_it(char **argv) if (!child_pid) { /* child */ execvp(argv[0], argv); + err(errno, "exec %s", argv[0]); } else { /* parent */ @@ -3841,6 +4541,7 @@ int fork_it(char **argv) * n.b. fork_it() does not check for errors from for_all_cpus() * because re-starting is problematic when forking */ + snapshot_proc_sysfs_files(); for_all_cpus(get_counters, ODD_COUNTERS); gettimeofday(&tv_odd, (struct timezone *)NULL); timersub(&tv_odd, &tv_even, &tv_delta); @@ -3862,6 +4563,7 @@ int get_and_dump_counters(void) { int status; + snapshot_proc_sysfs_files(); status = for_all_cpus(get_counters, ODD_COUNTERS); if (status) return status; @@ -3876,13 +4578,13 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 4.16 24 Dec 2016" + fprintf(outf, "turbostat version 17.02.24" " - Len Brown <lenb@kernel.org>\n"); } -int add_counter(unsigned int msr_num, char *name, unsigned int width, - enum counter_scope scope, enum counter_type type, - enum counter_format format) +int add_counter(unsigned int msr_num, char *path, char *name, + unsigned int width, enum counter_scope scope, + enum counter_type type, enum counter_format format, int flags) { struct msr_counter *msrp; @@ -3894,31 +4596,46 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width, msrp->msr_num = msr_num; strncpy(msrp->name, name, NAME_BYTES); + if (path) + strncpy(msrp->path, path, PATH_BYTES); msrp->width = width; msrp->type = type; msrp->format = format; + msrp->flags = flags; switch (scope) { case SCOPE_CPU: - sys.thread_counter_bytes += 64; msrp->next = sys.tp; sys.tp = msrp; - sys.thread_counter_bytes += sizeof(unsigned long long); + sys.added_thread_counters++; + if (sys.added_thread_counters > MAX_ADDED_COUNTERS) { + fprintf(stderr, "exceeded max %d added thread counters\n", + MAX_ADDED_COUNTERS); + exit(-1); + } break; case SCOPE_CORE: - sys.core_counter_bytes += 64; msrp->next = sys.cp; sys.cp = msrp; - sys.core_counter_bytes += sizeof(unsigned long long); + sys.added_core_counters++; + if (sys.added_core_counters > MAX_ADDED_COUNTERS) { + fprintf(stderr, "exceeded max %d added core counters\n", + MAX_ADDED_COUNTERS); + exit(-1); + } break; case SCOPE_PACKAGE: - sys.package_counter_bytes += 64; msrp->next = sys.pp; sys.pp = msrp; - sys.package_counter_bytes += sizeof(unsigned long long); + sys.added_package_counters++; + if (sys.added_package_counters > MAX_ADDED_COUNTERS) { + fprintf(stderr, "exceeded max %d added package counters\n", + MAX_ADDED_COUNTERS); + exit(-1); + } break; } @@ -3928,7 +4645,8 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width, void parse_add_command(char *add_command) { int msr_num = 0; - char name_buffer[NAME_BYTES]; + char *path = NULL; + char name_buffer[NAME_BYTES] = ""; int width = 64; int fail = 0; enum counter_scope scope = SCOPE_CPU; @@ -3943,6 +4661,11 @@ void parse_add_command(char *add_command) if (sscanf(add_command, "msr%d", &msr_num) == 1) goto next; + if (*add_command == '/') { + path = add_command; + goto next; + } + if (sscanf(add_command, "u%d", &width) == 1) { if ((width == 32) || (width == 64)) goto next; @@ -3968,6 +4691,10 @@ void parse_add_command(char *add_command) type = COUNTER_SECONDS; goto next; } + if (!strncmp(add_command, "usec", strlen("usec"))) { + type = COUNTER_USEC; + goto next; + } if (!strncmp(add_command, "raw", strlen("raw"))) { format = FORMAT_RAW; goto next; @@ -3992,36 +4719,26 @@ void parse_add_command(char *add_command) next: add_command = strchr(add_command, ','); - if (add_command) + if (add_command) { + *add_command = '\0'; add_command++; + } } - if (msr_num == 0) { - fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n"); + if ((msr_num == 0) && (path == NULL)) { + fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n"); fail++; } /* generate default column header */ if (*name_buffer == '\0') { - if (format == FORMAT_RAW) { - if (width == 32) - sprintf(name_buffer, "msr%d", msr_num); - else - sprintf(name_buffer, "MSR%d", msr_num); - } else if (format == FORMAT_DELTA) { - if (width == 32) - sprintf(name_buffer, "cnt%d", msr_num); - else - sprintf(name_buffer, "CNT%d", msr_num); - } else if (format == FORMAT_PERCENT) { - if (width == 32) - sprintf(name_buffer, "msr%d%%", msr_num); - else - sprintf(name_buffer, "MSR%d%%", msr_num); - } + if (width == 32) + sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + else + sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); } - if (add_counter(msr_num, name_buffer, width, scope, type, format)) + if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0)) fail++; if (fail) { @@ -4029,20 +4746,214 @@ next: exit(1); } } + +int is_deferred_skip(char *name) +{ + int i; + + for (i = 0; i < deferred_skip_index; ++i) + if (!strcmp(name, deferred_skip_names[i])) + return 1; + return 0; +} + +void probe_sysfs(void) +{ + char path[64]; + char name_buf[16]; + FILE *input; + int state; + char *sp; + + if (!DO_BIC(BIC_sysfs)) + return; + + for (state = 10; state > 0; --state) { + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", + base_cpu, state); + input = fopen(path, "r"); + if (input == NULL) + continue; + fgets(name_buf, sizeof(name_buf), input); + + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + sp = strchr(name_buf, '-'); + if (!sp) + sp = strchrnul(name_buf, '\n'); + *sp = '%'; + *(sp + 1) = '\0'; + + fclose(input); + + sprintf(path, "cpuidle/state%d/time", state); + + if (is_deferred_skip(name_buf)) + continue; + + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, + FORMAT_PERCENT, SYSFS_PERCPU); + } + + for (state = 10; state > 0; --state) { + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", + base_cpu, state); + input = fopen(path, "r"); + if (input == NULL) + continue; + fgets(name_buf, sizeof(name_buf), input); + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + sp = strchr(name_buf, '-'); + if (!sp) + sp = strchrnul(name_buf, '\n'); + *sp = '\0'; + fclose(input); + + sprintf(path, "cpuidle/state%d/usage", state); + + if (is_deferred_skip(name_buf)) + continue; + + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, + FORMAT_DELTA, SYSFS_PERCPU); + } + +} + + +/* + * parse cpuset with following syntax + * 1,2,4..6,8-10 and set bits in cpu_subset + */ +void parse_cpu_command(char *optarg) +{ + unsigned int start, end; + char *next; + + if (!strcmp(optarg, "core")) { + if (cpu_subset) + goto error; + show_core_only++; + return; + } + if (!strcmp(optarg, "package")) { + if (cpu_subset) + goto error; + show_pkg_only++; + return; + } + if (show_core_only || show_pkg_only) + goto error; + + cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS); + if (cpu_subset == NULL) + err(3, "CPU_ALLOC"); + cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS); + + CPU_ZERO_S(cpu_subset_size, cpu_subset); + + next = optarg; + + while (next && *next) { + + if (*next == '-') /* no negative cpu numbers */ + goto error; + + start = strtoul(next, &next, 10); + + if (start >= CPU_SUBSET_MAXCPUS) + goto error; + CPU_SET_S(start, cpu_subset_size, cpu_subset); + + if (*next == '\0') + break; + + if (*next == ',') { + next += 1; + continue; + } + + if (*next == '-') { + next += 1; /* start range */ + } else if (*next == '.') { + next += 1; + if (*next == '.') + next += 1; /* start range */ + else + goto error; + } + + end = strtoul(next, &next, 10); + if (end <= start) + goto error; + + while (++start <= end) { + if (start >= CPU_SUBSET_MAXCPUS) + goto error; + CPU_SET_S(start, cpu_subset_size, cpu_subset); + } + + if (*next == ',') + next += 1; + else if (*next != '\0') + goto error; + } + + return; + +error: + fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); + help(); + exit(-1); +} + +int shown; +/* + * parse_show_hide() - process cmdline to set default counter action + */ +void parse_show_hide(char *optarg, enum show_hide_mode new_mode) +{ + /* + * --show: show only those specified + * The 1st invocation will clear and replace the enabled mask + * subsequent invocations can add to it. + */ + if (new_mode == SHOW_LIST) { + if (shown == 0) + bic_enabled = bic_lookup(optarg, new_mode); + else + bic_enabled |= bic_lookup(optarg, new_mode); + shown = 1; + + return; + } + + /* + * --hide: do not show those specified + * multiple invocations simply clear more bits in enabled mask + */ + bic_enabled &= ~bic_lookup(optarg, new_mode); + +} + void cmdline(int argc, char **argv) { int opt; int option_index = 0; static struct option long_options[] = { {"add", required_argument, 0, 'a'}, + {"cpu", required_argument, 0, 'c'}, {"Dump", no_argument, 0, 'D'}, - {"debug", no_argument, 0, 'd'}, + {"debug", no_argument, 0, 'd'}, /* internal, not documented */ {"interval", required_argument, 0, 'i'}, {"help", no_argument, 0, 'h'}, + {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"Joules", no_argument, 0, 'J'}, + {"list", no_argument, 0, 'l'}, {"out", required_argument, 0, 'o'}, - {"Package", no_argument, 0, 'p'}, - {"processor", no_argument, 0, 'p'}, + {"quiet", no_argument, 0, 'q'}, + {"show", required_argument, 0, 's'}, {"Summary", no_argument, 0, 'S'}, {"TCC", required_argument, 0, 'T'}, {"version", no_argument, 0, 'v' }, @@ -4051,18 +4962,24 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': parse_add_command(optarg); break; + case 'c': + parse_cpu_command(optarg); + break; case 'D': dump_only++; break; case 'd': debug++; break; + case 'H': + parse_show_hide(optarg, HIDE_LIST); + break; case 'h': default: help(); @@ -4084,14 +5001,18 @@ void cmdline(int argc, char **argv) case 'J': rapl_joules++; break; + case 'l': + list_header_only++; + quiet++; + break; case 'o': outf = fopen_or_die(optarg, "w"); break; - case 'P': - show_pkg_only++; + case 'q': + quiet = 1; break; - case 'p': - show_core_only++; + case 's': + parse_show_hide(optarg, SHOW_LIST); break; case 'S': summary_only++; @@ -4113,15 +5034,24 @@ int main(int argc, char **argv) cmdline(argc, argv); - if (debug) + if (!quiet) print_version(); + probe_sysfs(); + turbostat_init(); /* dump counters and exit */ if (dump_only) return get_and_dump_counters(); + /* list header and exit */ + if (list_header_only) { + print_header(","); + flush_output_stdout(); + return 0; + } + /* * if any params left, it must be a command to fork */ diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 621578aa12d6..fc74db62fef4 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -43,6 +43,15 @@ ifneq ($(CC), clang) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif +# Hack to avoid type-punned warnings on old systems such as RHEL5: +# We should be changing CFLAGS and checking gcc version, but this +# will do for now and keep the above -Wstrict-aliasing=3 in place +# in newer systems. +# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h +ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3 +EXTRA_WARNINGS += -fno-strict-aliasing +endif + ifneq ($(findstring $(MAKEFLAGS), w),w) PRINT_DIR = --no-print-directory else diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 6e4eb2fc2d1e..0c8b61f8398e 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1880,6 +1880,7 @@ sub get_grub_index { sub wait_for_input { my ($fp, $time) = @_; + my $start_time; my $rin; my $rout; my $nr; @@ -1895,17 +1896,22 @@ sub wait_for_input vec($rin, fileno($fp), 1) = 1; vec($rin, fileno(\*STDIN), 1) = 1; + $start_time = time; + while (1) { $nr = select($rout=$rin, undef, undef, $time); - if ($nr <= 0) { - return undef; - } + last if ($nr <= 0); # copy data from stdin to the console if (vec($rout, fileno(\*STDIN), 1) == 1) { - sysread(\*STDIN, $buf, 1000); - syswrite($fp, $buf, 1000); + $nr = sysread(\*STDIN, $buf, 1000); + syswrite($fp, $buf, $nr) if ($nr > 0); + } + + # The timeout is based on time waiting for the fp data + if (vec($rout, fileno($fp), 1) != 1) { + last if (defined($time) && (time - $start_time > $time)); next; } @@ -1917,12 +1923,11 @@ sub wait_for_input last if ($ch eq "\n"); } - if (!length($line)) { - return undef; - } + last if (!length($line)); return $line; } + return undef; } sub reboot_to { diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 45be8b55a663..798f17655433 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -887,7 +887,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->range_index = 0+1; memdev->region_index = 4+1; memdev->region_size = SPA0_SIZE/2; - memdev->region_offset = t->spa_set_dma[0]; + memdev->region_offset = 1; memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 2; @@ -902,7 +902,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->range_index = 0+1; memdev->region_index = 5+1; memdev->region_size = SPA0_SIZE/2; - memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2; + memdev->region_offset = (1 << 8); memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 2; @@ -917,7 +917,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->range_index = 1+1; memdev->region_index = 4+1; memdev->region_size = SPA1_SIZE/4; - memdev->region_offset = t->spa_set_dma[1]; + memdev->region_offset = (1 << 16); memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; @@ -932,7 +932,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->range_index = 1+1; memdev->region_index = 5+1; memdev->region_size = SPA1_SIZE/4; - memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4; + memdev->region_offset = (1 << 24); memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; @@ -947,7 +947,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->range_index = 1+1; memdev->region_index = 6+1; memdev->region_size = SPA1_SIZE/4; - memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4; + memdev->region_offset = (1ULL << 32); memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; @@ -962,7 +962,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->range_index = 1+1; memdev->region_index = 7+1; memdev->region_size = SPA1_SIZE/4; - memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4; + memdev->region_offset = (1ULL << 40); memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; @@ -1380,7 +1380,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->range_index = 11+1; memdev->region_index = 9+1; memdev->region_size = SPA0_SIZE; - memdev->region_offset = t->spa_set_dma[2]; + memdev->region_offset = (1ULL << 48); memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index f11315bedefc..6a9480c03cbd 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,6 +1,7 @@ CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -LDFLAGS += -lpthread -lurcu +LDFLAGS += -fsanitize=address +LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ @@ -10,23 +11,25 @@ ifndef SHIFT SHIFT=3 endif +ifeq ($(BUILD), 32) + CFLAGS += -m32 + LDFLAGS += -m32 +endif + targets: mapshift $(TARGETS) main: $(OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o main idr-test: idr-test.o $(CORE_OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test multiorder: multiorder.o $(CORE_OFILES) - $(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder clean: $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h vpath %.c ../../lib -$(OFILES): *.h */*.h generated/map-shift.h \ +$(OFILES): Makefile *.h */*.h generated/map-shift.h \ ../../include/linux/*.h \ ../../include/asm/*.h \ ../../../include/linux/radix-tree.h \ @@ -41,7 +44,7 @@ idr.c: ../../../lib/idr.c .PHONY: mapshift mapshift: - @if ! grep -qw $(SHIFT) generated/map-shift.h; then \ + @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ fi diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c index 9b09ddfe462f..99c40f3ed133 100644 --- a/tools/testing/radix-tree/benchmark.c +++ b/tools/testing/radix-tree/benchmark.c @@ -17,6 +17,9 @@ #include <time.h> #include "test.h" +#define for_each_index(i, base, order) \ + for (i = base; i < base + (1 << order); i++) + #define NSEC_PER_SEC 1000000000L static long long benchmark_iter(struct radix_tree_root *root, bool tagged) @@ -57,27 +60,176 @@ again: return nsec; } +static void benchmark_insert(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + item_insert_order(root, index, order); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n", + size, step, order, nsec); +} + +static void benchmark_tagging(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + radix_tree_tag_set(root, index, 0); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n", + size, step, order, nsec); +} + +static void benchmark_delete(struct radix_tree_root *root, + unsigned long size, unsigned long step, int order) +{ + struct timespec start, finish; + unsigned long index, i; + long long nsec; + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (index = 0 ; index < size ; index += step) + for_each_index(i, index, order) + item_delete(root, i); + + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n", + size, step, order, nsec); +} + static void benchmark_size(unsigned long size, unsigned long step, int order) { RADIX_TREE(tree, GFP_KERNEL); long long normal, tagged; - unsigned long index; - for (index = 0 ; index < size ; index += step) { - item_insert_order(&tree, index, order); - radix_tree_tag_set(&tree, index, 0); - } + benchmark_insert(&tree, size, step, order); + benchmark_tagging(&tree, size, step, order); tagged = benchmark_iter(&tree, true); normal = benchmark_iter(&tree, false); - printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n", - size, step, order, tagged, normal); + printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n", + size, step, order, tagged); + printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n", + size, step, order, normal); + + benchmark_delete(&tree, size, step, order); item_kill_tree(&tree); rcu_barrier(); } +static long long __benchmark_split(unsigned long index, + int old_order, int new_order) +{ + struct timespec start, finish; + long long nsec; + RADIX_TREE(tree, GFP_ATOMIC); + + item_insert_order(&tree, index, old_order); + + clock_gettime(CLOCK_MONOTONIC, &start); + radix_tree_split(&tree, index, new_order); + clock_gettime(CLOCK_MONOTONIC, &finish); + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + item_kill_tree(&tree); + + return nsec; + +} + +static void benchmark_split(unsigned long size, unsigned long step) +{ + int i, j, idx; + long long nsec = 0; + + + for (idx = 0; idx < size; idx += step) { + for (i = 3; i < 11; i++) { + for (j = 0; j < i; j++) { + nsec += __benchmark_split(idx, i, j); + } + } + } + + printv(2, "Size %8ld, step %8ld, split time %10lld ns\n", + size, step, nsec); + +} + +static long long __benchmark_join(unsigned long index, + unsigned order1, unsigned order2) +{ + unsigned long loc; + struct timespec start, finish; + long long nsec; + void *item, *item2 = item_create(index + 1, order1); + RADIX_TREE(tree, GFP_KERNEL); + + item_insert_order(&tree, index, order2); + item = radix_tree_lookup(&tree, index); + + clock_gettime(CLOCK_MONOTONIC, &start); + radix_tree_join(&tree, index + 1, order1, item2); + clock_gettime(CLOCK_MONOTONIC, &finish); + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + + loc = find_item(&tree, item); + if (loc == -1) + free(item); + + item_kill_tree(&tree); + + return nsec; +} + +static void benchmark_join(unsigned long step) +{ + int i, j, idx; + long long nsec = 0; + + for (idx = 0; idx < 1 << 10; idx += step) { + for (i = 1; i < 15; i++) { + for (j = 0; j < i; j++) { + nsec += __benchmark_join(idx, i, j); + } + } + } + + printv(2, "Size %8d, step %8ld, join time %10lld ns\n", + 1 << 10, step, nsec); +} + void benchmark(void) { unsigned long size[] = {1 << 10, 1 << 20, 0}; @@ -95,4 +247,11 @@ void benchmark(void) for (c = 0; size[c]; c++) for (s = 0; step[s]; s++) benchmark_size(size[c], step[s] << 9, 9); + + for (c = 0; size[c]; c++) + for (s = 0; step[s]; s++) + benchmark_split(size[c], step[s]); + + for (s = 0; step[s]; s++) + benchmark_join(step[s]); } diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index a26098c6123d..30cd0b296f1a 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -153,6 +153,30 @@ void idr_nowait_test(void) idr_destroy(&idr); } +void idr_get_next_test(void) +{ + unsigned long i; + int nextid; + DEFINE_IDR(idr); + + int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0}; + + for(i = 0; indices[i]; i++) { + struct item *item = item_create(indices[i], 0); + assert(idr_alloc(&idr, item, indices[i], indices[i+1], + GFP_KERNEL) == indices[i]); + } + + for(i = 0, nextid = 0; indices[i]; i++) { + idr_get_next(&idr, &nextid); + assert(nextid == indices[i]); + nextid++; + } + + idr_for_each(&idr, item_idr_free, &idr); + idr_destroy(&idr); +} + void idr_checks(void) { unsigned long i; @@ -202,6 +226,7 @@ void idr_checks(void) idr_alloc_test(); idr_null_test(); idr_nowait_test(); + idr_get_next_test(); } /* @@ -338,7 +363,7 @@ void ida_check_random(void) { DEFINE_IDA(ida); DECLARE_BITMAP(bitmap, 2048); - int id; + int id, err; unsigned int i; time_t s = time(NULL); @@ -352,8 +377,11 @@ void ida_check_random(void) ida_remove(&ida, bit); } else { __set_bit(bit, bitmap); - ida_pre_get(&ida, GFP_KERNEL); - assert(!ida_get_new_above(&ida, bit, &id)); + do { + ida_pre_get(&ida, GFP_KERNEL); + err = ida_get_new_above(&ida, bit, &id); + } while (err == -ENOMEM); + assert(!err); assert(id == bit); } } @@ -362,6 +390,24 @@ void ida_check_random(void) goto repeat; } +void ida_simple_get_remove_test(void) +{ + DEFINE_IDA(ida); + unsigned long i; + + for (i = 0; i < 10000; i++) { + assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i); + } + assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0); + + for (i = 0; i < 10000; i++) { + ida_simple_remove(&ida, i); + } + assert(ida_is_empty(&ida)); + + ida_destroy(&ida); +} + void ida_checks(void) { DEFINE_IDA(ida); @@ -428,15 +474,41 @@ void ida_checks(void) ida_check_max(); ida_check_conv(); ida_check_random(); + ida_simple_get_remove_test(); radix_tree_cpu_dead(1); } +static void *ida_random_fn(void *arg) +{ + rcu_register_thread(); + ida_check_random(); + rcu_unregister_thread(); + return NULL; +} + +void ida_thread_tests(void) +{ + pthread_t threads[10]; + int i; + + for (i = 0; i < ARRAY_SIZE(threads); i++) + if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) { + perror("creating ida thread"); + exit(1); + } + + while (i--) + pthread_join(threads[i], NULL); +} + int __weak main(void) { radix_tree_init(); idr_checks(); ida_checks(); + ida_thread_tests(); + radix_tree_cpu_dead(1); rcu_barrier(); if (nr_allocated) printf("nr_allocated = %d\n", nr_allocated); diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index b829127d5670..bc9a78449572 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -368,6 +368,7 @@ int main(int argc, char **argv) iteration_test(0, 10 + 90 * long_run); iteration_test(7, 10 + 90 * long_run); single_thread_tests(long_run); + ida_thread_tests(); /* Free any remaining preallocated nodes */ radix_tree_cpu_dead(0); diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index d4ff00989245..36dcf7d6945d 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -330,6 +330,34 @@ static void single_check(void) item_kill_tree(&tree); } +void radix_tree_clear_tags_test(void) +{ + unsigned long index; + struct radix_tree_node *node; + struct radix_tree_iter iter; + void **slot; + + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 0); + item_tag_set(&tree, 0, 0); + __radix_tree_lookup(&tree, 0, &node, &slot); + radix_tree_clear_tags(&tree, node, slot); + assert(item_tag_get(&tree, 0, 0) == 0); + + for (index = 0; index < 1000; index++) { + item_insert(&tree, index); + item_tag_set(&tree, index, 0); + } + + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + radix_tree_clear_tags(&tree, iter.node, slot); + assert(item_tag_get(&tree, iter.index, 0) == 0); + } + + item_kill_tree(&tree); +} + void tag_check(void) { single_check(); @@ -347,4 +375,5 @@ void tag_check(void) thrash_tags(); rcu_barrier(); printv(2, "after thrash_tags: %d allocated\n", nr_allocated); + radix_tree_clear_tags_test(); } diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index b30e11d9d271..0f8220cc6166 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -36,6 +36,7 @@ void iteration_test(unsigned order, unsigned duration); void benchmark(void); void idr_checks(void); void ida_checks(void); +void ida_thread_tests(void); struct item * item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index e8b79a7b50bd..d8593f1251ec 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -26,6 +26,7 @@ TARGETS += ptrace TARGETS += seccomp TARGETS += sigaltstack TARGETS += size +TARGETS += splice TARGETS += static_keys TARGETS += sync TARGETS += sysctl diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4b498265dae6..67531f47781b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,12 +1,14 @@ LIBDIR := ../../../lib BPFOBJ := $(LIBDIR)/bpf/bpf.o -CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) +CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ) TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map TEST_PROGS := test_kmod.sh +all: $(TEST_GEN_PROGS) + .PHONY: all clean force # force a rebuild of BPFOBJ when its dependencies are updated diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index e1f5b9eea1e8..d1555e4240c0 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8,6 +8,8 @@ * License as published by the Free Software Foundation. */ +#include <asm/types.h> +#include <linux/types.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -4583,10 +4585,12 @@ static bool is_admin(void) cap_flag_value_t sysadmin = CAP_CLEAR; const cap_value_t cap_val = CAP_SYS_ADMIN; +#ifdef CAP_IS_SUPPORTED if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) { perror("cap_get_flag"); return false; } +#endif caps = cap_get_proc(); if (!caps) { perror("cap_get_proc"); diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index ce96d80ad64f..775c589ac3c0 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -2,6 +2,10 @@ # Makefile can operate with or without the kbuild infrastructure. CC := $(CROSS_COMPILE)gcc +ifeq (0,$(MAKELEVEL)) +OUTPUT := $(shell pwd) +endif + TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 248a820048df..66d31de60b9a 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name) rc = run_test(test_function, name); - if (rc == MAGIC_SKIP_RETURN_VALUE) + if (rc == MAGIC_SKIP_RETURN_VALUE) { test_skip(name); - else + /* so that skipped test is not marked as failed */ + rc = 0; + } else test_finish(name, rc); return rc; diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h index d828bfb6ef2d..54064ced9e95 100644 --- a/tools/testing/selftests/powerpc/include/vsx_asm.h +++ b/tools/testing/selftests/powerpc/include/vsx_asm.h @@ -16,56 +16,56 @@ */ FUNC_START(load_vsx) li r5,0 - lxvx vs20,r5,r3 + lxvd2x vs20,r5,r3 addi r5,r5,16 - lxvx vs21,r5,r3 + lxvd2x vs21,r5,r3 addi r5,r5,16 - lxvx vs22,r5,r3 + lxvd2x vs22,r5,r3 addi r5,r5,16 - lxvx vs23,r5,r3 + lxvd2x vs23,r5,r3 addi r5,r5,16 - lxvx vs24,r5,r3 + lxvd2x vs24,r5,r3 addi r5,r5,16 - lxvx vs25,r5,r3 + lxvd2x vs25,r5,r3 addi r5,r5,16 - lxvx vs26,r5,r3 + lxvd2x vs26,r5,r3 addi r5,r5,16 - lxvx vs27,r5,r3 + lxvd2x vs27,r5,r3 addi r5,r5,16 - lxvx vs28,r5,r3 + lxvd2x vs28,r5,r3 addi r5,r5,16 - lxvx vs29,r5,r3 + lxvd2x vs29,r5,r3 addi r5,r5,16 - lxvx vs30,r5,r3 + lxvd2x vs30,r5,r3 addi r5,r5,16 - lxvx vs31,r5,r3 + lxvd2x vs31,r5,r3 blr FUNC_END(load_vsx) FUNC_START(store_vsx) li r5,0 - stxvx vs20,r5,r3 + stxvd2x vs20,r5,r3 addi r5,r5,16 - stxvx vs21,r5,r3 + stxvd2x vs21,r5,r3 addi r5,r5,16 - stxvx vs22,r5,r3 + stxvd2x vs22,r5,r3 addi r5,r5,16 - stxvx vs23,r5,r3 + stxvd2x vs23,r5,r3 addi r5,r5,16 - stxvx vs24,r5,r3 + stxvd2x vs24,r5,r3 addi r5,r5,16 - stxvx vs25,r5,r3 + stxvd2x vs25,r5,r3 addi r5,r5,16 - stxvx vs26,r5,r3 + stxvd2x vs26,r5,r3 addi r5,r5,16 - stxvx vs27,r5,r3 + stxvd2x vs27,r5,r3 addi r5,r5,16 - stxvx vs28,r5,r3 + stxvd2x vs28,r5,r3 addi r5,r5,16 - stxvx vs29,r5,r3 + stxvd2x vs29,r5,r3 addi r5,r5,16 - stxvx vs30,r5,r3 + stxvd2x vs30,r5,r3 addi r5,r5,16 - stxvx vs31,r5,r3 + stxvd2x vs31,r5,r3 blr FUNC_END(store_vsx) diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile new file mode 100644 index 000000000000..de51f439d4a6 --- /dev/null +++ b/tools/testing/selftests/splice/Makefile @@ -0,0 +1,8 @@ +TEST_PROGS := default_file_splice_read.sh +EXTRA := default_file_splice_read +all: $(TEST_PROGS) $(EXTRA) + +include ../lib.mk + +clean: + rm -fr $(TEST_PROGS) $(EXTRA) diff --git a/tools/testing/selftests/splice/default_file_splice_read.c b/tools/testing/selftests/splice/default_file_splice_read.c new file mode 100644 index 000000000000..01dd6091554c --- /dev/null +++ b/tools/testing/selftests/splice/default_file_splice_read.c @@ -0,0 +1,8 @@ +#define _GNU_SOURCE +#include <fcntl.h> + +int main(int argc, char **argv) +{ + splice(0, 0, 1, 0, 1<<30, 0); + return 0; +} diff --git a/tools/testing/selftests/splice/default_file_splice_read.sh b/tools/testing/selftests/splice/default_file_splice_read.sh new file mode 100755 index 000000000000..1ea2adeabc94 --- /dev/null +++ b/tools/testing/selftests/splice/default_file_splice_read.sh @@ -0,0 +1,7 @@ +#!/bin/sh +n=`./default_file_splice_read </dev/null | wc -c` + +test "$n" = 0 && exit 0 + +echo "default_file_splice_read broken: leaked $n" +exit 1 diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 4cff7e7ddcc4..41642ba5e318 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,5 +1,9 @@ # Makefile for vm selftests +ifndef OUTPUT + OUTPUT := $(shell pwd) +endif + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 3a5ebae5303e..38e0a9ca5d71 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,7 +5,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ - check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \ + check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ protection_keys test_vdso TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 5b2b4b3c634c..b4967d875236 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -245,7 +245,7 @@ void do_unexpected_base(void) long ret; asm volatile ("int $0x80" : "=a" (ret) : "a" (243), "b" (low_desc) - : "flags"); + : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc)); diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c new file mode 100644 index 000000000000..b77313ba2ab1 --- /dev/null +++ b/tools/testing/selftests/x86/ioperm.c @@ -0,0 +1,170 @@ +/* + * ioperm.c - Test case for ioperm(2) + * Copyright (c) 2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE +#include <err.h> +#include <stdio.h> +#include <stdint.h> +#include <signal.h> +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdbool.h> +#include <sched.h> +#include <sys/io.h> + +static int nerrs = 0; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *si, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static bool try_outb(unsigned short port) +{ + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); + if (sigsetjmp(jmpbuf, 1) != 0) { + return false; + } else { + asm volatile ("outb %%al, %w[port]" + : : [port] "Nd" (port), "a" (0)); + return true; + } + clearhandler(SIGSEGV); +} + +static void expect_ok(unsigned short port) +{ + if (!try_outb(port)) { + printf("[FAIL]\toutb to 0x%02hx failed\n", port); + exit(1); + } + + printf("[OK]\toutb to 0x%02hx worked\n", port); +} + +static void expect_gp(unsigned short port) +{ + if (try_outb(port)) { + printf("[FAIL]\toutb to 0x%02hx worked\n", port); + exit(1); + } + + printf("[OK]\toutb to 0x%02hx failed\n", port); +} + +int main(void) +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 0"); + + expect_gp(0x80); + expect_gp(0xed); + + /* + * Probe for ioperm support. Note that clearing ioperm bits + * works even as nonroot. + */ + printf("[RUN]\tenable 0x80\n"); + if (ioperm(0x80, 1, 1) != 0) { + printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n", + errno); + return 0; + } + expect_ok(0x80); + expect_gp(0xed); + + printf("[RUN]\tdisable 0x80\n"); + if (ioperm(0x80, 1, 0) != 0) { + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); + return 1; + } + expect_gp(0x80); + expect_gp(0xed); + + /* Make sure that fork() preserves ioperm. */ + if (ioperm(0x80, 1, 1) != 0) { + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); + return 1; + } + + pid_t child = fork(); + if (child == -1) + err(1, "fork"); + + if (child == 0) { + printf("[RUN]\tchild: check that we inherited permissions\n"); + expect_ok(0x80); + expect_gp(0xed); + return 0; + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } + + /* Test the capability checks. */ + + printf("\tDrop privileges\n"); + if (setresuid(1, 1, 1) != 0) { + printf("[WARN]\tDropping privileges failed\n"); + return 0; + } + + printf("[RUN]\tdisable 0x80\n"); + if (ioperm(0x80, 1, 0) != 0) { + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); + return 1; + } + printf("[OK]\tit worked\n"); + + printf("[RUN]\tenable 0x80 again\n"); + if (ioperm(0x80, 1, 1) == 0) { + printf("[FAIL]\tit succeeded but should have failed.\n"); + return 1; + } + printf("[OK]\tit failed\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 4af47079cf04..f6121612e769 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -45,6 +45,12 @@ #define AR_DB (1 << 22) #define AR_G (1 << 23) +#ifdef __x86_64__ +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" +#else +# define INT80_CLOBBERS +#endif + static int nerrs; /* Points to an array of 1024 ints, each holding its own index. */ @@ -588,7 +594,7 @@ static int invoke_set_thread_area(void) asm volatile ("int $0x80" : "=a" (ret), "+m" (low_user_desc) : "a" (243), "b" (low_user_desc) - : "flags"); + : INT80_CLOBBERS); return ret; } @@ -657,7 +663,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -688,7 +694,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -721,7 +727,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); @@ -774,7 +780,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index b037ce9cf116..eaea92439708 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args) asm volatile ("int $0x80" : "+a" (args->nr), "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)); + "+S" (args->arg3), "+D" (args->arg4), "+r" (bp) + : : "r8", "r9", "r10", "r11"); args->arg5 = bp; #else sys32_helper(args, int80_and_ret); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50c26358e8b7..a48da95c18fd 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps; #ifdef __x86_64__ # define REG_IP REG_RIP # define WIDTH "q" +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" #else # define REG_IP REG_EIP # define WIDTH "l" +# define INT80_CLOBBERS #endif static unsigned long get_eflags(void) @@ -140,7 +142,8 @@ int main() printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); - asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) + : INT80_CLOBBERS); check_result(); /* |