diff options
Diffstat (limited to 'tools')
491 files changed, 25263 insertions, 4582 deletions
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 5ef1c15e88ad..11e86739456d 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t) "RECLAIM %12s%15s%15s\n" " %15llu%15llu%15llums\n" "THRASHING%12s%15s%15s\n" + " %15llu%15llu%15llums\n" + "COMPACT %12s%15s%15s\n" " %15llu%15llu%15llums\n", "count", "real total", "virtual total", "delay total", "delay average", @@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t) "count", "delay total", "delay average", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, - average_ms(t->thrashing_delay_total, t->thrashing_count)); + average_ms(t->thrashing_delay_total, t->thrashing_count), + "count", "delay total", "delay average", + (unsigned long long)t->compact_count, + (unsigned long long)t->compact_delay_total, + average_ms(t->compact_delay_total, t->compact_count)); } static void task_context_switch_counts(struct taskstats *t) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e..323e251ed37b 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2 + /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d5b5f2ab87a0..65d147974f8d 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -299,7 +299,9 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -315,6 +317,7 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index dc632b41f135..65c0d9ce1e29 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -124,7 +124,7 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ -#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ +#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 01e2650b9585..a4a39c3e0f19 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -476,6 +476,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 @@ -486,6 +487,23 @@ #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f +/* AMD Collaborative Processor Performance Control MSRs */ +#define MSR_AMD_CPPC_CAP1 0xc00102b0 +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +#define MSR_AMD_CPPC_CAP2 0xc00102b2 +#define MSR_AMD_CPPC_REQ 0xc00102b3 +#define MSR_AMD_CPPC_STATUS 0xc00102b4 + +#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) +#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) +#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) +#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) + +#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) +#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) +#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) +#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index b2d504f11937..aff774775c67 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -35,11 +35,7 @@ # define NEED_CMOV 0 #endif -#ifdef CONFIG_X86_USE_3DNOW -# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) -#else # define NEED_3DNOW 0 -#endif #if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) # define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 5a776a08f78c..bf6e96011dfe 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -373,9 +373,23 @@ struct kvm_debugregs { __u64 reserved[9]; }; -/* for KVM_CAP_XSAVE */ +/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */ struct kvm_xsave { + /* + * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes + * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * respectively, when invoked on the vm file descriptor. + * + * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * will always be at least 4096. Currently, it is only greater + * than 4096 if a dynamic feature has been enabled with + * ``arch_prctl()``, but this may change in the future. + * + * The offsets of the state save areas in struct kvm_xsave follow + * the contents of CPUID leaf 0xD on the host. + */ __u32 region[1024]; + __u32 extra[0]; }; #define KVM_MAX_XCRS 16 @@ -438,6 +452,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h index 754a07856817..500b96e71f18 100644 --- a/tools/arch/x86/include/uapi/asm/prctl.h +++ b/tools/arch/x86/include/uapi/asm/prctl.h @@ -2,20 +2,22 @@ #ifndef _ASM_X86_PRCTL_H #define _ASM_X86_PRCTL_H -#define ARCH_SET_GS 0x1001 -#define ARCH_SET_FS 0x1002 -#define ARCH_GET_FS 0x1003 -#define ARCH_GET_GS 0x1004 +#define ARCH_SET_GS 0x1001 +#define ARCH_SET_FS 0x1002 +#define ARCH_GET_FS 0x1003 +#define ARCH_GET_GS 0x1004 -#define ARCH_GET_CPUID 0x1011 -#define ARCH_SET_CPUID 0x1012 +#define ARCH_GET_CPUID 0x1011 +#define ARCH_SET_CPUID 0x1012 -#define ARCH_GET_XCOMP_SUPP 0x1021 -#define ARCH_GET_XCOMP_PERM 0x1022 -#define ARCH_REQ_XCOMP_PERM 0x1023 +#define ARCH_GET_XCOMP_SUPP 0x1021 +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 +#define ARCH_GET_XCOMP_GUEST_PERM 0x1024 +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 -#define ARCH_MAP_VDSO_X32 0x2001 -#define ARCH_MAP_VDSO_32 0x2002 -#define ARCH_MAP_VDSO_64 0x2003 +#define ARCH_MAP_VDSO_X32 0x2001 +#define ARCH_MAP_VDSO_32 0x2002 +#define ARCH_MAP_VDSO_64 0x2003 #endif /* _ASM_X86_PRCTL_H */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 1cc9da6e29c7..d0d7b9bc6cad 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -27,8 +27,7 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -39,12 +38,13 @@ SYM_FUNC_START_WEAK(memcpy) rep movsq movl %edx, %ecx rep movsb - ret -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) + RET +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + /* * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. @@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb - ret + RET SYM_FUNC_END(memcpy_erms) SYM_FUNC_START_LOCAL(memcpy_orig) @@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq %r9, 1*8(%rdi) movq %r10, -2*8(%rdi, %rdx) movq %r11, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_16bytes: cmpl $8, %edx @@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq -1*8(%rsi, %rdx), %r9 movq %r8, 0*8(%rdi) movq %r9, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_8bytes: cmpl $4, %edx @@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movl -4(%rsi, %rdx), %r8d movl %ecx, (%rdi) movl %r8d, -4(%rdi, %rdx) - retq + RET .p2align 4 .Lless_3bytes: subl $1, %edx @@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movb %cl, (%rdi) .Lend: - retq + RET SYM_FUNC_END(memcpy_orig) .popsection diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 9827ae267f96..fc9ffd3ff3b2 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -17,7 +17,6 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -40,12 +39,13 @@ SYM_FUNC_START(__memset) movl %edx,%ecx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) -EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) +EXPORT_SYMBOL(memset) + /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. @@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms) movq %rdx,%rcx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(memset_erms) SYM_FUNC_START_LOCAL(memset_orig) @@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig) .Lende: movq %r10,%rax - ret + RET .Lbad_alignment: cmpq $7,%rdx diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index ec31f5b60323..d12d1358f96d 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -690,7 +690,10 @@ AVXcode: 2 45: vpsrlvd/q Vx,Hx,Wx (66),(v) 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x4b +# Skip 0x48 +49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B) +# Skip 0x4a +4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v) 4c: vrcp14ps/d Vpd,Wpd (66),(ev) 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) @@ -705,7 +708,10 @@ AVXcode: 2 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) -# Skip 0x5c-0x61 +5c: TDPBF16PS Vt,Wt,Ht (F3),(v1) +# Skip 0x5d +5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1) +# Skip 0x5f-0x61 62: vpexpandb/w Vx,Wx (66),(ev) 63: vpcompressb/w Wx,Vx (66),(ev) 64: vpblendmd/q Vx,Hx,Wx (66),(ev) @@ -822,9 +828,9 @@ AVXcode: 3 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo) 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) -0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo) 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) @@ -846,8 +852,8 @@ AVXcode: 3 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) -26: vgetmantps/d Vx,Wx,Ib (66),(ev) -27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev) 30: kshiftrb/w Vk,Uk,Ib (66),(v) 31: kshiftrd/q Vk,Uk,Ib (66),(v) 32: kshiftlb/w Vk,Uk,Ib (66),(v) @@ -871,23 +877,102 @@ AVXcode: 3 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) -56: vreduceps/d Vx,Wx,Ib (66),(ev) -57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) -66: vfpclassps/d Vk,Wx,Ib (66),(ev) -67: vfpclassss/d Vk,Wx,Ib (66),(ev) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev) 70: vpshldw Vx,Hx,Wx,Ib (66),(ev) 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) +c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev) cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) -f0: RORX Gy,Ey,Ib (F2),(v) +f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B) +EndTable + +Table: EVEX map 5 +Referrer: +AVXcode: 5 +10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev) +11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev) +1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev) +2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev) +2c: vcvttsh2si Vx,Wx (F3),(ev) +2d: vcvtsh2si Vx,Wx (F3),(ev) +2e: vucomish Vx,Wx (ev) +2f: vcomish Vx,Wx (ev) +51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev) +58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev) +59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev) +5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev) +5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev) +5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev) +5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev) +5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev) +5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev) +6e: vmovw Vx,Wx (66),(ev) +78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev) +79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev) +7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev) +7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev) +7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev) +7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev) +7e: vmovw Wx,Vx (66),(ev) +EndTable + +Table: EVEX map 6 +Referrer: +AVXcode: 6 +13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev) +2c: vscalefph Vx,Hx,Wx (66),(ev) +2d: vscalefsh Vx,Hx,Wx (66),(ev) +42: vgetexpph Vx,Wx (66),(ev) +43: vgetexpsh Vx,Hx,Wx (66),(ev) +4c: vrcpph Vx,Wx (66),(ev) +4d: vrcpsh Vx,Hx,Wx (66),(ev) +4e: vrsqrtph Vx,Wx (66),(ev) +4f: vrsqrtsh Vx,Hx,Wx (66),(ev) +56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev) +57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev) +96: vfmaddsub132ph Vx,Hx,Wx (66),(ev) +97: vfmsubadd132ph Vx,Hx,Wx (66),(ev) +98: vfmadd132ph Vx,Hx,Wx (66),(ev) +99: vfmadd132sh Vx,Hx,Wx (66),(ev) +9a: vfmsub132ph Vx,Hx,Wx (66),(ev) +9b: vfmsub132sh Vx,Hx,Wx (66),(ev) +9c: vfnmadd132ph Vx,Hx,Wx (66),(ev) +9d: vfnmadd132sh Vx,Hx,Wx (66),(ev) +9e: vfnmsub132ph Vx,Hx,Wx (66),(ev) +9f: vfnmsub132sh Vx,Hx,Wx (66),(ev) +a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev) +a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev) +a8: vfmadd213ph Vx,Hx,Wx (66),(ev) +a9: vfmadd213sh Vx,Hx,Wx (66),(ev) +aa: vfmsub213ph Vx,Hx,Wx (66),(ev) +ab: vfmsub213sh Vx,Hx,Wx (66),(ev) +ac: vfnmadd213ph Vx,Hx,Wx (66),(ev) +ad: vfnmadd213sh Vx,Hx,Wx (66),(ev) +ae: vfnmsub213ph Vx,Hx,Wx (66),(ev) +af: vfnmsub213sh Vx,Hx,Wx (66),(ev) +b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev) +b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev) +b8: vfmadd231ph Vx,Hx,Wx (66),(ev) +b9: vfmadd231sh Vx,Hx,Wx (66),(ev) +ba: vfmsub231ph Vx,Hx,Wx (66),(ev) +bb: vfmsub231sh Vx,Hx,Wx (66),(ev) +bc: vfnmadd231ph Vx,Hx,Wx (66),(ev) +bd: vfnmadd231sh Vx,Hx,Wx (66),(ev) +be: vfnmsub231ph Vx,Hx,Wx (66),(ev) +bf: vfnmsub231sh Vx,Hx,Wx (66),(ev) +d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev) +d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev) EndTable GrpTable: Grp1 @@ -970,7 +1055,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -987,7 +1072,7 @@ GrpTable: Grp9 3: xrstors 4: xsavec 5: xsaves -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c index d9b420972934..f70702fcb224 100644 --- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -71,8 +71,8 @@ int iter(struct bpf_iter__task_file *ctx) e.pid = task->tgid; e.id = get_obj_id(file->private_data, obj_type); - bpf_probe_read_kernel(&e.comm, sizeof(e.comm), - task->group_leader->comm); + bpf_probe_read_kernel_str(&e.comm, sizeof(e.comm), + task->group_leader->comm); bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); return 0; diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 9ddeca947635..320a88ac28c9 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -9,7 +9,11 @@ ifeq ($(V),1) msg = else Q = @ - msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; + ifeq ($(silent),1) + msg = + else + msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; + endif MAKEFLAGS=--no-print-directory endif diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index ab9353f2fd46..9a5c1f008fe6 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx) */ struct task_struct *prev = (struct task_struct *)ctx[1]; struct task_struct *next = (struct task_struct *)ctx[2]; - struct event event = {}; + struct runq_event event = {}; u64 *tsp, delta_us; long state; u32 pid; diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c index 2414cc764461..d78f4148597f 100644 --- a/tools/bpf/runqslower/runqslower.c +++ b/tools/bpf/runqslower/runqslower.c @@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void) void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { - const struct event *e = data; + const struct runq_event *e = data; struct tm *tm; char ts[32]; time_t t; diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h index 9db225425e5f..4f70f07200c2 100644 --- a/tools/bpf/runqslower/runqslower.h +++ b/tools/bpf/runqslower/runqslower.h @@ -4,7 +4,7 @@ #define TASK_COMM_LEN 16 -struct event { +struct runq_event { char task[TASK_COMM_LEN]; __u64 delta_us; pid_t pid; diff --git a/tools/build/Build.include b/tools/build/Build.include index 2cf3b1bde86e..c2a95ab47379 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -99,7 +99,7 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX ### ## HOSTCC C flags -host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) +host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) # output directory for tests below TMPOUT = .tmp_$$$$ diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index 1600b17dbb8a..1d3a90d93fe2 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -11,7 +11,7 @@ from drgn.helpers.linux import list_for_each_entry, list_empty from drgn.helpers.linux import for_each_page from drgn.helpers.linux.cpumask import for_each_online_cpu from drgn.helpers.linux.percpu import per_cpu_ptr -from drgn import container_of, FaultError, Object +from drgn import container_of, FaultError, Object, cast DESC = """ @@ -69,15 +69,15 @@ def oo_objects(s): def count_partial(n, fn): - nr_pages = 0 - for page in list_for_each_entry('struct page', n.partial.address_of_(), - 'lru'): - nr_pages += fn(page) - return nr_pages + nr_objs = 0 + for slab in list_for_each_entry('struct slab', n.partial.address_of_(), + 'slab_list'): + nr_objs += fn(slab) + return nr_objs -def count_free(page): - return page.objects - page.inuse +def count_free(slab): + return slab.objects - slab.inuse def slub_get_slabinfo(s, cfg): @@ -145,14 +145,14 @@ def detect_kernel_config(): return cfg -def for_each_slab_page(prog): +def for_each_slab(prog): PGSlab = 1 << prog.constant('PG_slab') PGHead = 1 << prog.constant('PG_head') for page in for_each_page(prog): try: if page.flags.value_() & PGSlab: - yield page + yield cast('struct slab *', page) except FaultError: pass @@ -190,13 +190,13 @@ def main(): 'list'): obj_cgroups.add(ptr.value_()) - # look over all slab pages, belonging to non-root memcgs - # and look for objects belonging to the given memory cgroup - for page in for_each_slab_page(prog): - objcg_vec_raw = page.memcg_data.value_() + # look over all slab folios and look for objects belonging + # to the given memory cgroup + for slab in for_each_slab(prog): + objcg_vec_raw = slab.memcg_data.value_() if objcg_vec_raw == 0: continue - cache = page.slab_cache + cache = slab.slab_cache if not cache: continue addr = cache.value_() diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index 0076437f6e3f..b94a16ba5c6c 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -279,6 +279,7 @@ static void print_event(struct iio_event_data *event) printf(", direction: %s", iio_ev_dir_text[dir]); printf("\n"); + fflush(stdout); } /* Enable or disable events in sysfs if the knob is available */ diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h index 5d2ab38965cc..9ab313e93555 100644 --- a/tools/include/asm-generic/bitops.h +++ b/tools/include/asm-generic/bitops.h @@ -18,7 +18,6 @@ #include <asm-generic/bitops/fls.h> #include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> -#include <asm-generic/bitops/find.h> #ifndef _TOOLS_LINUX_BITOPS_H_ #error only <linux/bitops.h> can be included directly diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 95611df1d26e..ea97804d04d4 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -1,9 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PERF_BITOPS_H -#define _PERF_BITOPS_H +#ifndef _TOOLS_LINUX_BITMAP_H +#define _TOOLS_LINUX_BITMAP_H #include <string.h> #include <linux/bitops.h> +#include <linux/find.h> #include <stdlib.h> #include <linux/kernel.h> @@ -181,4 +182,4 @@ static inline int bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } -#endif /* _PERF_BITOPS_H */ +#endif /* _TOOLS_LINUX_BITMAP_H */ diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/linux/find.h index 6481fd11012a..47e2bd6c5174 100644 --- a/tools/include/asm-generic/bitops/find.h +++ b/tools/include/linux/find.h @@ -1,11 +1,19 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ -#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ +#ifndef _TOOLS_LINUX_FIND_H_ +#define _TOOLS_LINUX_FIND_H_ + +#ifndef _TOOLS_LINUX_BITMAP_H +#error tools: only <linux/bitmap.h> can be included directly +#endif + +#include <linux/bitops.h> extern unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start, unsigned long invert, unsigned long le); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); @@ -96,7 +104,6 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, #endif #ifndef find_first_bit - /** * find_first_bit - find the first set bit in a memory region * @addr: The address to start the search at @@ -116,11 +123,34 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) return _find_first_bit(addr, size); } +#endif + +#ifndef find_first_and_bit +/** + * find_first_and_bit - find the first set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); -#endif /* find_first_bit */ + return val ? __ffs(val) : size; + } -#ifndef find_first_zero_bit + return _find_first_and_bit(addr1, addr2, size); +} +#endif +#ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region * @addr: The address to start the search at @@ -142,4 +172,43 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) } #endif -#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */ +#ifndef find_last_bit +/** + * find_last_bit - find the last set bit in a memory region + * @addr: The address to start the search at + * @size: The number of bits to search + * + * Returns the bit number of the last set bit, or size. + */ +static inline +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __fls(val) : size; + } + + return _find_last_bit(addr, size); +} +#endif + +/** + * find_next_clump8 - find next 8-bit clump with set bits in a memory region + * @clump: location to store copy of found clump + * @addr: address to base the search on + * @size: bitmap size in number of bits + * @offset: bit offset at which to start searching + * + * Returns the bit offset for the next set clump; the found clump value is + * copied to the location pointed by @clump. If no bits are set, returns @size. + */ +extern unsigned long find_next_clump8(unsigned long *clump, + const unsigned long *addr, + unsigned long size, unsigned long offset); + +#define find_first_clump8(clump, bits, size) \ + find_next_clump8((clump), (bits), (size), 0) + + +#endif /*__LINUX_FIND_H_ */ diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h index ad6fa21d977b..38edaa08f862 100644 --- a/tools/include/linux/hash.h +++ b/tools/include/linux/hash.h @@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val) return val * GOLDEN_RATIO_32; } -#ifndef HAVE_ARCH_HASH_32 -#define hash_32 hash_32_generic -#endif -static inline u32 hash_32_generic(u32 val, unsigned int bits) +static inline u32 hash_32(u32 val, unsigned int bits) { /* High bits are more random, so use them. */ return __hash_32(val) >> (32 - bits); diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 3430667b0d24..c1c285fe494a 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -265,12 +265,17 @@ struct stat { * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively * - the system call is performed by calling the syscall instruction * - syscall return comes in rax - * - rcx and r8..r11 may be clobbered, others are preserved. + * - rcx and r11 are clobbered, others are preserved. * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. * - the syscall number is always specified last in order to allow to force * some registers before (gcc refuses a %-register at the last position). + * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 + * Calling Conventions. + * + * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI + * */ #define my_syscall0(num) \ @@ -280,9 +285,9 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -295,10 +300,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "r"(_arg1), \ "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -312,10 +317,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), \ "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -330,10 +335,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -349,10 +354,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret), "=r"(_arg4) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ "0"(_num) \ - : "rcx", "r8", "r9", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -369,10 +374,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ "0"(_num) \ - : "rcx", "r9", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -390,7 +395,7 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ "r"(_arg6), "0"(_num) \ : "rcx", "r11", "memory", "cc" \ @@ -399,17 +404,23 @@ struct stat { }) /* startup code */ +/* + * x86-64 System V ABI mandates: + * 1) %rsp must be 16-byte aligned right before the function call. + * 2) The deepest stack frame should be zero (the %rbp). + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %rdi\n" // argc (first arg, %rdi) "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) - "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when - "sub $8, %rsp\n" // entering the callee + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call "call main\n" // main() returns the status code, we'll exit with it. - "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits - "mov $60, %rax\n" // NR_exit == 60 + "mov %eax, %edi\n" // retrieve exit code (32 bit) + "mov $60, %eax\n" // NR_exit == 60 "syscall\n" // really exit "hlt\n" // ensure it does not return ""); @@ -577,20 +588,28 @@ struct sys_stat_struct { }) /* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %eax\n" // argc (first arg, %eax) "mov %esp, %ebx\n" // argv[] (second arg, %ebx) "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) - "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before + "sub $4, %esp\n" // the call instruction (args are aligned) "push %ecx\n" // push all registers on the stack so that we "push %ebx\n" // support both regparm and plain stack modes "push %eax\n" "call main\n" // main() returns the status code in %eax - "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits - "movl $1, %eax\n" // NR_exit == 1 - "int $0x80\n" // exit now + "mov %eax, %ebx\n" // retrieve exit code (32-bit int) + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now "hlt\n" // ensure it does not ""); @@ -774,7 +793,6 @@ asm(".section .text\n" "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) "bl main\n" // main() returns the status code, we'll exit with it. - "and %r0, %r0, $0xff\n" // limit exit code to 8 bits "movs r7, $1\n" // NR_exit == 1 "svc $0x00\n" ""); @@ -971,7 +989,6 @@ asm(".section .text\n" "add x2, x2, x1\n" // + argv "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee "bl main\n" // main() returns the status code, we'll exit with it. - "and x0, x0, 0xff\n" // limit exit code to 8 bits "mov x8, 93\n" // NR_exit == 93 "svc #0\n" ""); @@ -1176,7 +1193,7 @@ asm(".section .text\n" "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! "jal main\n" // main() returns the status code, we'll exit with it. "nop\n" // delayed slot - "and $a0, $v0, 0xff\n" // limit exit code to 8 bits + "move $a0, $v0\n" // retrieve 32-bit exit code from v0 "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" @@ -1374,7 +1391,6 @@ asm(".section .text\n" "add a2,a2,a1\n" // + argv "andi sp,a1,-16\n" // sp must be 16-byte aligned "call main\n" // main() returns the status code, we'll exit with it. - "andi a0, a0, 0xff\n" // limit exit code to 8 bits "li a7, 93\n" // NR_exit == 93 "ecall\n" ""); @@ -1556,6 +1572,12 @@ pid_t sys_getpid(void) } static __attribute__((unused)) +pid_t sys_gettid(void) +{ + return my_syscall0(__NR_gettid); +} + +static __attribute__((unused)) int sys_gettimeofday(struct timeval *tv, struct timezone *tz) { return my_syscall2(__NR_gettimeofday, tv, tz); @@ -2014,6 +2036,18 @@ pid_t getpid(void) } static __attribute__((unused)) +pid_t gettid(void) +{ + pid_t ret = sys_gettid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) int gettimeofday(struct timeval *tv, struct timezone *tz) { int ret = sys_gettimeofday(tv, tz); diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 4557a8b6086f..1c48b0ae3ba3 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) #define __NR_futex_waitv 449 __SYSCALL(__NR_futex_waitv, sys_futex_waitv) +#define __NR_set_mempolicy_home_node 450 +__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) + #undef __NR_syscalls -#define __NR_syscalls 450 +#define __NR_syscalls 451 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 3b810b53ba8b..642808520d92 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1096,6 +1096,24 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Planes are valid until one has a + * zero handle -- this can be used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) /* diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 1daa45268de2..507ee1f2aa96 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1131,6 +1131,10 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 +#define KVM_CAP_VM_GPA_BITS 207 +#define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 #ifdef KVM_CAP_IRQ_ROUTING @@ -1162,11 +1166,20 @@ struct kvm_irq_routing_hv_sint { __u32 sint; }; +struct kvm_irq_routing_xen_evtchn { + __u32 port; + __u32 vcpu; + __u32 priority; +}; + +#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1)) + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 +#define KVM_IRQ_ROUTING_XEN_EVTCHN 5 struct kvm_irq_routing_entry { __u32 gsi; @@ -1178,6 +1191,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; struct kvm_irq_routing_hv_sint hv_sint; + struct kvm_irq_routing_xen_evtchn xen_evtchn; __u32 pad[8]; } u; }; @@ -1208,6 +1222,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) struct kvm_xen_hvm_config { __u32 flags; @@ -2031,4 +2046,7 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h deleted file mode 100644 index 45fcbf99d72e..000000000000 --- a/tools/include/uapi/linux/lirc.h +++ /dev/null @@ -1,229 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * lirc.h - linux infrared remote control header file - * last modified 2010/07/13 by Jarod Wilson - */ - -#ifndef _LINUX_LIRC_H -#define _LINUX_LIRC_H - -#include <linux/types.h> -#include <linux/ioctl.h> - -#define PULSE_BIT 0x01000000 -#define PULSE_MASK 0x00FFFFFF - -#define LIRC_MODE2_SPACE 0x00000000 -#define LIRC_MODE2_PULSE 0x01000000 -#define LIRC_MODE2_FREQUENCY 0x02000000 -#define LIRC_MODE2_TIMEOUT 0x03000000 - -#define LIRC_VALUE_MASK 0x00FFFFFF -#define LIRC_MODE2_MASK 0xFF000000 - -#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) -#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) -#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) -#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) - -#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK) -#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK) - -#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE) -#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE) -#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY) -#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT) - -/* used heavily by lirc userspace */ -#define lirc_t int - -/*** lirc compatible hardware features ***/ - -#define LIRC_MODE2SEND(x) (x) -#define LIRC_SEND2MODE(x) (x) -#define LIRC_MODE2REC(x) ((x) << 16) -#define LIRC_REC2MODE(x) ((x) >> 16) - -#define LIRC_MODE_RAW 0x00000001 -#define LIRC_MODE_PULSE 0x00000002 -#define LIRC_MODE_MODE2 0x00000004 -#define LIRC_MODE_SCANCODE 0x00000008 -#define LIRC_MODE_LIRCCODE 0x00000010 - - -#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW) -#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE) -#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2) -#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE) - -#define LIRC_CAN_SEND_MASK 0x0000003f - -#define LIRC_CAN_SET_SEND_CARRIER 0x00000100 -#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200 -#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400 - -#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW) -#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE) -#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2) -#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE) -#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE) - -#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK) - -#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) - -#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000 -#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 -#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 -#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000 -#define LIRC_CAN_SET_REC_FILTER 0x08000000 - -#define LIRC_CAN_MEASURE_CARRIER 0x02000000 -#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000 - -#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) -#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) - -#define LIRC_CAN_NOTIFY_DECODE 0x01000000 - -/*** IOCTL commands for lirc driver ***/ - -#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) - -#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32) -#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32) -#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32) - -#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32) -#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32) - -/* code length in bits, currently only for LIRC_MODE_LIRCCODE */ -#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32) - -#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32) -#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32) -/* Note: these can reset the according pulse_width */ -#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32) -#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32) -#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32) -#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32) - -/* - * when a timeout != 0 is set the driver will send a - * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is - * never sent, timeout is disabled by default - */ -#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32) - -/* 1 enables, 0 disables timeout reports in MODE2 */ -#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32) - -/* - * if enabled from the next key press on the driver will send - * LIRC_MODE2_FREQUENCY packets - */ -#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32) - -/* - * to set a range use LIRC_SET_REC_CARRIER_RANGE with the - * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound - */ -#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32) - -#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32) - -/* - * Return the recording timeout, which is either set by - * the ioctl LIRC_SET_REC_TIMEOUT or by the kernel after setting the protocols. - */ -#define LIRC_GET_REC_TIMEOUT _IOR('i', 0x00000024, __u32) - -/* - * struct lirc_scancode - decoded scancode with protocol for use with - * LIRC_MODE_SCANCODE - * - * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR - * was decoded. - * @flags: should be 0 for transmit. When receiving scancodes, - * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set - * depending on the protocol - * @rc_proto: see enum rc_proto - * @keycode: the translated keycode. Set to 0 for transmit. - * @scancode: the scancode received or to be sent - */ -struct lirc_scancode { - __u64 timestamp; - __u16 flags; - __u16 rc_proto; - __u32 keycode; - __u64 scancode; -}; - -/* Set if the toggle bit of rc-5 or rc-6 is enabled */ -#define LIRC_SCANCODE_FLAG_TOGGLE 1 -/* Set if this is a nec or sanyo repeat */ -#define LIRC_SCANCODE_FLAG_REPEAT 2 - -/** - * enum rc_proto - the Remote Controller protocol - * - * @RC_PROTO_UNKNOWN: Protocol not known - * @RC_PROTO_OTHER: Protocol known but proprietary - * @RC_PROTO_RC5: Philips RC5 protocol - * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol - * @RC_PROTO_RC5_SZ: StreamZap variant of RC5 - * @RC_PROTO_JVC: JVC protocol - * @RC_PROTO_SONY12: Sony 12 bit protocol - * @RC_PROTO_SONY15: Sony 15 bit protocol - * @RC_PROTO_SONY20: Sony 20 bit protocol - * @RC_PROTO_NEC: NEC protocol - * @RC_PROTO_NECX: Extended NEC protocol - * @RC_PROTO_NEC32: NEC 32 bit protocol - * @RC_PROTO_SANYO: Sanyo protocol - * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard - * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse - * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol - * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol - * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol - * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol - * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol - * @RC_PROTO_SHARP: Sharp protocol - * @RC_PROTO_XMP: XMP protocol - * @RC_PROTO_CEC: CEC protocol - * @RC_PROTO_IMON: iMon Pad protocol - * @RC_PROTO_RCMM12: RC-MM protocol 12 bits - * @RC_PROTO_RCMM24: RC-MM protocol 24 bits - * @RC_PROTO_RCMM32: RC-MM protocol 32 bits - */ -enum rc_proto { - RC_PROTO_UNKNOWN = 0, - RC_PROTO_OTHER = 1, - RC_PROTO_RC5 = 2, - RC_PROTO_RC5X_20 = 3, - RC_PROTO_RC5_SZ = 4, - RC_PROTO_JVC = 5, - RC_PROTO_SONY12 = 6, - RC_PROTO_SONY15 = 7, - RC_PROTO_SONY20 = 8, - RC_PROTO_NEC = 9, - RC_PROTO_NECX = 10, - RC_PROTO_NEC32 = 11, - RC_PROTO_SANYO = 12, - RC_PROTO_MCIR2_KBD = 13, - RC_PROTO_MCIR2_MSE = 14, - RC_PROTO_RC6_0 = 15, - RC_PROTO_RC6_6A_20 = 16, - RC_PROTO_RC6_6A_24 = 17, - RC_PROTO_RC6_6A_32 = 18, - RC_PROTO_RC6_MCE = 19, - RC_PROTO_SHARP = 20, - RC_PROTO_XMP = 21, - RC_PROTO_CEC = 22, - RC_PROTO_IMON = 23, - RC_PROTO_RCMM12 = 24, - RC_PROTO_RCMM24 = 25, - RC_PROTO_RCMM32 = 26, -}; - -#endif diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index bd8860eeb291..d37629dbad72 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -251,6 +251,8 @@ enum { PERF_BR_SYSRET = 8, /* syscall return */ PERF_BR_COND_CALL = 9, /* conditional function call */ PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_ERET = 11, /* exception return */ + PERF_BR_IRQ = 12, /* irq */ PERF_BR_MAX, }; @@ -465,6 +467,8 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via * siginfo_t::si_perf_data, e.g. to permit user to identify the event. + * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be + * truncated accordingly on 32 bit architectures. */ __u64 sig_data; }; @@ -1332,7 +1336,10 @@ union perf_mem_data_src { /* hop level */ #define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -/* 2-7 available */ +#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ +#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ +#define PERF_MEM_HOPS_3 0x04 /* remote board */ +/* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index bb73e9a0b24f..e998764f0262 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -272,4 +272,7 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +#define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 5fbb79e30819..2d3e5df39a59 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -56,8 +56,10 @@ * * ****************************************************************************/ +#define AES_IEC958_STATUS_SIZE 24 + struct snd_aes_iec958 { - unsigned char status[24]; /* AES/IEC958 channel status bits */ + unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ unsigned char subcode[147]; /* AES/IEC958 subcode bits */ unsigned char pad; /* nothing */ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ @@ -202,6 +204,11 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */ +/* + * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the + * available bit count in most significant bit. It's for the case of so-called 'left-justified' or + * `right-padding` sample which has less width than 32 bit. + */ #define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10) #define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11) #define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12) @@ -300,7 +307,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ #define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ - +#define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */ #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index 109aa7ffcf97..ba4b8d94e004 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -96,6 +96,26 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) } #endif +#ifndef find_first_and_bit +/* + * Find the first set bit in two memory regions. + */ +unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + unsigned long idx, val; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + val = addr1[idx] & addr2[idx]; + if (val) + return min(idx * BITS_PER_LONG + __ffs(val), size); + } + + return size; +} +#endif + #ifndef find_first_zero_bit /* * Find the first cleared bit in a memory region. diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 63ae5e0195ce..32c5051c24eb 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -48,6 +48,7 @@ SYNOPSIS int perf_cpu_map__nr(const struct perf_cpu_map *cpus); bool perf_cpu_map__empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); + bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) -- @@ -135,16 +136,16 @@ SYNOPSIS int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel); - void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); + void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); int perf_evsel__mmap(struct perf_evsel *evsel, int pages); void perf_evsel__munmap(struct perf_evsel *evsel); - void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); - int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); + int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count); int perf_evsel__enable(struct perf_evsel *evsel); - int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); int perf_evsel__disable(struct perf_evsel *evsel); - int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index adaad3dddf6e..ee66760f1e63 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -10,15 +10,24 @@ #include <ctype.h> #include <limits.h> -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) { - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); + struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); if (cpus != NULL) { - cpus->nr = 1; - cpus->map[0] = -1; + cpus->nr = nr_cpus; refcount_set(&cpus->refcnt, 1); + } + return cpus; +} + +struct perf_cpu_map *perf_cpu_map__dummy_new(void) +{ + struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); + + if (cpus) + cpus->map[0].cpu = -1; return cpus; } @@ -54,15 +63,12 @@ static struct perf_cpu_map *cpu_map__default_new(void) if (nr_cpus < 0) return NULL; - cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int)); + cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { int i; for (i = 0; i < nr_cpus; ++i) - cpus->map[i] = i; - - cpus->nr = nr_cpus; - refcount_set(&cpus->refcnt, 1); + cpus->map[i].cpu = i; } return cpus; @@ -73,31 +79,32 @@ struct perf_cpu_map *perf_cpu_map__default_new(void) return cpu_map__default_new(); } -static int cmp_int(const void *a, const void *b) + +static int cmp_cpu(const void *a, const void *b) { - return *(const int *)a - *(const int*)b; + const struct perf_cpu *cpu_a = a, *cpu_b = b; + + return cpu_a->cpu - cpu_b->cpu; } -static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) +static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus) { - size_t payload_size = nr_cpus * sizeof(int); - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size); + size_t payload_size = nr_cpus * sizeof(struct perf_cpu); + struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus); int i, j; if (cpus != NULL) { memcpy(cpus->map, tmp_cpus, payload_size); - qsort(cpus->map, nr_cpus, sizeof(int), cmp_int); + qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); /* Remove dups */ j = 0; for (i = 0; i < nr_cpus; i++) { - if (i == 0 || cpus->map[i] != cpus->map[i - 1]) - cpus->map[j++] = cpus->map[i]; + if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu) + cpus->map[j++].cpu = cpus->map[i].cpu; } cpus->nr = j; assert(j <= nr_cpus); - refcount_set(&cpus->refcnt, 1); } - return cpus; } @@ -105,7 +112,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) { struct perf_cpu_map *cpus = NULL; int nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; + struct perf_cpu *tmp_cpus = NULL, *tmp; int max_entries = 0; int n, cpu, prev; char sep; @@ -124,24 +131,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) if (new_max >= max_entries) { max_entries = new_max + MAX_NR_CPUS / 2; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } while (++prev < cpu) - tmp_cpus[nr_cpus++] = prev; + tmp_cpus[nr_cpus++].cpu = prev; } if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++] = cpu; + tmp_cpus[nr_cpus++].cpu = cpu; if (n == 2 && sep == '-') prev = cpu; else @@ -179,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) unsigned long start_cpu, end_cpu = 0; char *p = NULL; int i, nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; + struct perf_cpu *tmp_cpus = NULL, *tmp; int max_entries = 0; if (!cpu_list) @@ -220,17 +227,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) for (; start_cpu <= end_cpu; start_cpu++) { /* check for duplicates */ for (i = 0; i < nr_cpus; i++) - if (tmp_cpus[i] == (int)start_cpu) + if (tmp_cpus[i].cpu == (int)start_cpu) goto invalid; if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto invalid; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++] = (int)start_cpu; + tmp_cpus[nr_cpus++].cpu = (int)start_cpu; } if (*p) ++p; @@ -250,12 +257,16 @@ out: return cpus; } -int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { + struct perf_cpu result = { + .cpu = -1 + }; + if (cpus && idx < cpus->nr) return cpus->map[idx]; - return -1; + return result; } int perf_cpu_map__nr(const struct perf_cpu_map *cpus) @@ -265,21 +276,26 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus) bool perf_cpu_map__empty(const struct perf_cpu_map *map) { - return map ? map->map[0] == -1 : true; + return map ? map->map[0].cpu == -1 : true; } -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) { - int low = 0, high = cpus->nr; + int low, high; + if (!cpus) + return -1; + + low = 0; + high = cpus->nr; while (low < high) { - int idx = (low + high) / 2, - cpu_at_idx = cpus->map[idx]; + int idx = (low + high) / 2; + struct perf_cpu cpu_at_idx = cpus->map[idx]; - if (cpu_at_idx == cpu) + if (cpu_at_idx.cpu == cpu.cpu) return idx; - if (cpu_at_idx > cpu) + if (cpu_at_idx.cpu > cpu.cpu) high = idx; else low = idx + 1; @@ -288,10 +304,19 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) return -1; } -int perf_cpu_map__max(struct perf_cpu_map *map) +bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) { + return perf_cpu_map__idx(cpus, cpu) != -1; +} + +struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map) +{ + struct perf_cpu result = { + .cpu = -1 + }; + // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return map->nr > 0 ? map->map[map->nr - 1] : -1; + return map->nr > 0 ? map->map[map->nr - 1] : result; } /* @@ -305,7 +330,7 @@ int perf_cpu_map__max(struct perf_cpu_map *map) struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other) { - int *tmp_cpus; + struct perf_cpu *tmp_cpus; int tmp_len; int i, j, k; struct perf_cpu_map *merged; @@ -319,19 +344,19 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, if (!other) return orig; if (orig->nr == other->nr && - !memcmp(orig->map, other->map, orig->nr * sizeof(int))) + !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu))) return orig; tmp_len = orig->nr + other->nr; - tmp_cpus = malloc(tmp_len * sizeof(int)); + tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); if (!tmp_cpus) return NULL; /* Standard merge algorithm from wikipedia */ i = j = k = 0; while (i < orig->nr && j < other->nr) { - if (orig->map[i] <= other->map[j]) { - if (orig->map[i] == other->map[j]) + if (orig->map[i].cpu <= other->map[j].cpu) { + if (orig->map[i].cpu == other->map[j].cpu) j++; tmp_cpus[k++] = orig->map[i++]; } else diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index e37dfad31383..9a770bfdc804 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -407,7 +407,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) static int perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int output, int cpu) + int output, struct perf_cpu cpu) { return perf_mmap__mmap(map, mp, output, cpu); } @@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, int thread, int *_output, int *_output_overwrite) { - int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); struct perf_evsel *evsel; int revent; @@ -643,14 +643,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; } -void __perf_evlist__set_leader(struct list_head *list) +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader) { - struct perf_evsel *evsel, *leader; + struct perf_evsel *first, *last, *evsel; - leader = list_entry(list->next, struct perf_evsel, node); - evsel = list_entry(list->prev, struct perf_evsel, node); + first = list_first_entry(list, struct perf_evsel, node); + last = list_last_entry(list, struct perf_evsel, node); - leader->nr_members = evsel->idx - leader->idx + 1; + leader->nr_members = last->idx - first->idx + 1; __perf_evlist__for_each_entry(list, evsel) evsel->leader = leader; @@ -659,7 +659,10 @@ void __perf_evlist__set_leader(struct list_head *list) void perf_evlist__set_leader(struct perf_evlist *evlist) { if (evlist->nr_entries) { + struct perf_evsel *first = list_entry(evlist->entries.next, + struct perf_evsel, node); + evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; - __perf_evlist__set_leader(&evlist->entries); + __perf_evlist__set_leader(&evlist->entries, first); } } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8441e3e1aaac..210ea7c06ce8 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) -#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) +#define FD(_evsel, _cpu_map_idx, _thread) \ + ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread)) +#define MMAP(_evsel, _cpu_map_idx, _thread) \ + (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \ + : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); if (evsel->fd) { - int cpu, thread; - for (cpu = 0; cpu < ncpus; cpu++) { + int idx, thread; + + for (idx = 0; idx < ncpus; idx++) { for (thread = 0; thread < nthreads; thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); if (fd) *fd = -1; @@ -74,13 +78,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre static int sys_perf_event_open(struct perf_event_attr *attr, - pid_t pid, int cpu, int group_fd, + pid_t pid, struct perf_cpu cpu, int group_fd, unsigned long flags) { - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags); } -static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) +static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd) { struct perf_evsel *leader = evsel->leader; int *fd; @@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou if (!leader->fd) return -ENOTCONN; - fd = FD(leader, cpu, thread); + fd = FD(leader, cpu_map_idx, thread); if (fd == NULL || *fd == -1) return -EBADF; @@ -109,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - int cpu, thread, err = 0; + struct perf_cpu cpu; + int idx, thread, err = 0; if (cpus == NULL) { static struct perf_cpu_map *empty_cpu_map; @@ -136,24 +141,24 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, } if (evsel->fd == NULL && - perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) + perf_evsel__alloc_fd(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) return -ENOMEM; - for (cpu = 0; cpu < cpus->nr; cpu++) { + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { for (thread = 0; thread < threads->nr; thread++) { int fd, group_fd, *evsel_fd; - evsel_fd = FD(evsel, cpu, thread); + evsel_fd = FD(evsel, idx, thread); if (evsel_fd == NULL) return -EINVAL; - err = get_group_fd(evsel, cpu, thread, &group_fd); + err = get_group_fd(evsel, idx, thread, &group_fd); if (err < 0) return err; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, - cpus->map[cpu], group_fd, 0); + cpu, group_fd, 0); if (fd < 0) return -errno; @@ -165,12 +170,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, return err; } -static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) +static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); if (fd && *fd >= 0) { close(*fd); @@ -181,10 +186,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) void perf_evsel__close_fd(struct perf_evsel *evsel) { - int cpu; - - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) - perf_evsel__close_fd_cpu(evsel, cpu); + for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++) + perf_evsel__close_fd_cpu(evsel, idx); } void perf_evsel__free_fd(struct perf_evsel *evsel) @@ -202,29 +205,29 @@ void perf_evsel__close(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); } -void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu) +void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx) { if (evsel->fd == NULL) return; - perf_evsel__close_fd_cpu(evsel, cpu); + perf_evsel__close_fd_cpu(evsel, cpu_map_idx); } void perf_evsel__munmap(struct perf_evsel *evsel) { - int cpu, thread; + int idx, thread; if (evsel->fd == NULL || evsel->mmap == NULL) return; - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); if (fd == NULL || *fd < 0) continue; - perf_mmap__munmap(MMAP(evsel, cpu, thread)); + perf_mmap__munmap(MMAP(evsel, idx, thread)); } } @@ -234,7 +237,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel) int perf_evsel__mmap(struct perf_evsel *evsel, int pages) { - int ret, cpu, thread; + int ret, idx, thread; struct perf_mmap_param mp = { .prot = PROT_READ | PROT_WRITE, .mask = (pages * page_size) - 1, @@ -246,15 +249,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0) return -ENOMEM; - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); struct perf_mmap *map; + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx); if (fd == NULL || *fd < 0) continue; - map = MMAP(evsel, cpu, thread); + map = MMAP(evsel, idx, thread); perf_mmap__init(map, NULL, false, NULL); ret = perf_mmap__mmap(map, &mp, *fd, cpu); @@ -268,14 +272,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) return 0; } -void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) +void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); - if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) + if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL) return NULL; - return MMAP(evsel, cpu, thread)->base; + return MMAP(evsel, cpu_map_idx, thread)->base; } int perf_evsel__read_size(struct perf_evsel *evsel) @@ -303,19 +307,19 @@ int perf_evsel__read_size(struct perf_evsel *evsel) return size; } -int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); memset(count, 0, sizeof(*count)); if (fd == NULL || *fd < 0) return -EINVAL; - if (MMAP(evsel, cpu, thread) && - !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) + if (MMAP(evsel, cpu_map_idx, thread) && + !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count)) return 0; if (readn(*fd, count->values, size) <= 0) @@ -326,13 +330,13 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ioc, void *arg, - int cpu) + int cpu_map_idx) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { int err; - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); if (fd == NULL || *fd < 0) return -1; @@ -346,9 +350,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, return 0; } -int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx); } int perf_evsel__enable(struct perf_evsel *evsel) @@ -361,9 +365,9 @@ int perf_evsel__enable(struct perf_evsel *evsel) return err; } -int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx); } int perf_evsel__disable(struct perf_evsel *evsel) @@ -380,7 +384,7 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter) { int err = 0, i; - for (i = 0; i < evsel->cpus->nr && !err; i++) + for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++) err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_SET_FILTER, (void *)filter, i); @@ -431,3 +435,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel) zfree(&evsel->id); evsel->ids = 0; } + +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, __s8 *pscaled) +{ + s8 scaled = 0; + + if (scale) { + if (count->run == 0) { + scaled = -1; + count->val = 0; + } else if (count->run < count->ena) { + scaled = 1; + count->val = (u64)((double)count->val * count->ena / count->run); + } + } + + if (pscaled) + *pscaled = scaled; +} diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 840d4032587b..1973a18c096b 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -3,17 +3,27 @@ #define __LIBPERF_INTERNAL_CPUMAP_H #include <linux/refcount.h> +#include <perf/cpumap.h> +/** + * A sized, reference counted, sorted array of integers representing CPU + * numbers. This is commonly used to capture which CPUs a PMU is associated + * with. The indices into the cpumap are frequently used as they avoid having + * gaps if CPU numbers were used. For events associated with a pid, rather than + * a CPU, a single dummy map with an entry of -1 is used. + */ struct perf_cpu_map { refcount_t refcnt; + /** Length of the map array. */ int nr; - int map[]; + /** The CPU values. */ + struct perf_cpu map[]; }; #ifndef MAX_NR_CPUS #define MAX_NR_CPUS 2048 #endif -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index f366dbad6a88..4cefade540bd 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -4,6 +4,7 @@ #include <linux/list.h> #include <api/fd/array.h> +#include <internal/cpumap.h> #include <internal/evsel.h> #define PERF_EVLIST__HLIST_BITS 8 @@ -36,7 +37,7 @@ typedef void typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int -(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); +(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu); struct perf_evlist_mmap_ops { perf_evlist_mmap__cb_idx_t idx; @@ -127,5 +128,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, void perf_evlist__reset_id_hash(struct perf_evlist *evlist); -void __perf_evlist__set_leader(struct list_head *list); +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 1f3eacbad2e8..cfc9ebd7968e 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -6,8 +6,8 @@ #include <linux/perf_event.h> #include <stdbool.h> #include <sys/types.h> +#include <internal/cpumap.h> -struct perf_cpu_map; struct perf_thread_map; struct xyarray; @@ -27,7 +27,7 @@ struct perf_sample_id { * queue number. */ int idx; - int cpu; + struct perf_cpu cpu; pid_t tid; /* Holds total ID period value for PERF_SAMPLE_READ processing. */ diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5e3422f40ed5..5a062af8e9d8 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -6,6 +6,7 @@ #include <linux/refcount.h> #include <linux/types.h> #include <stdbool.h> +#include <internal/cpumap.h> /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) @@ -24,7 +25,7 @@ struct perf_mmap { void *base; int mask; int fd; - int cpu; + struct perf_cpu cpu; refcount_t refcnt; u64 prev; u64 start; @@ -46,7 +47,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu); + int fd, struct perf_cpu cpu); void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); void perf_mmap__put(struct perf_mmap *map); diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 7c27766ea0bf..4a2edbdb5e2b 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,10 +3,14 @@ #define __LIBPERF_CPUMAP_H #include <perf/core.h> +#include <perf/cpumap.h> #include <stdio.h> #include <stdbool.h> -struct perf_cpu_map; +/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ +struct perf_cpu { + int cpu; +}; LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); @@ -16,10 +20,11 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 60eae25076d3..2a9516b42d15 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -4,6 +4,8 @@ #include <stdint.h> #include <perf/core.h> +#include <stdbool.h> +#include <linux/types.h> struct perf_evsel; struct perf_event_attr; @@ -26,18 +28,20 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); -LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages); LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel); -LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); -LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); +LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); +LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, __s8 *pscaled); #endif /* __LIBPERF_EVSEL_H */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 71468606e8a7..6fa0d651576b 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -2,6 +2,7 @@ LIBPERF_0.0.1 { global: libperf_init; perf_cpu_map__dummy_new; + perf_cpu_map__default_new; perf_cpu_map__get; perf_cpu_map__put; perf_cpu_map__new; @@ -10,6 +11,7 @@ LIBPERF_0.0.1 { perf_cpu_map__cpu; perf_cpu_map__empty; perf_cpu_map__max; + perf_cpu_map__has; perf_thread_map__new_dummy; perf_thread_map__set_pid; perf_thread_map__comm; @@ -50,6 +52,7 @@ LIBPERF_0.0.1 { perf_mmap__read_init; perf_mmap__read_done; perf_mmap__read_event; + perf_counts_values__scale; local: *; }; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index c89dfa5f67b3..0d1634cedf44 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -13,6 +13,7 @@ #include <internal/lib.h> #include <linux/kernel.h> #include <linux/math64.h> +#include <linux/stringify.h> #include "internal.h" void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, @@ -32,7 +33,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) } int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu) + int fd, struct perf_cpu cpu) { map->prev = 0; map->mask = mp->mask; @@ -294,6 +295,103 @@ static u64 read_timestamp(void) return low | ((u64)high) << 32; } +#elif defined(__aarch64__) +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +static u64 read_pmccntr(void) +{ + return read_sysreg(pmccntr_el0); +} + +#define PMEVCNTR_READ(idx) \ + static u64 read_pmevcntr_##idx(void) { \ + return read_sysreg(pmevcntr##idx##_el0); \ + } + +PMEVCNTR_READ(0); +PMEVCNTR_READ(1); +PMEVCNTR_READ(2); +PMEVCNTR_READ(3); +PMEVCNTR_READ(4); +PMEVCNTR_READ(5); +PMEVCNTR_READ(6); +PMEVCNTR_READ(7); +PMEVCNTR_READ(8); +PMEVCNTR_READ(9); +PMEVCNTR_READ(10); +PMEVCNTR_READ(11); +PMEVCNTR_READ(12); +PMEVCNTR_READ(13); +PMEVCNTR_READ(14); +PMEVCNTR_READ(15); +PMEVCNTR_READ(16); +PMEVCNTR_READ(17); +PMEVCNTR_READ(18); +PMEVCNTR_READ(19); +PMEVCNTR_READ(20); +PMEVCNTR_READ(21); +PMEVCNTR_READ(22); +PMEVCNTR_READ(23); +PMEVCNTR_READ(24); +PMEVCNTR_READ(25); +PMEVCNTR_READ(26); +PMEVCNTR_READ(27); +PMEVCNTR_READ(28); +PMEVCNTR_READ(29); +PMEVCNTR_READ(30); + +/* + * Read a value direct from PMEVCNTR<idx> + */ +static u64 read_perf_counter(unsigned int counter) +{ + static u64 (* const read_f[])(void) = { + read_pmevcntr_0, + read_pmevcntr_1, + read_pmevcntr_2, + read_pmevcntr_3, + read_pmevcntr_4, + read_pmevcntr_5, + read_pmevcntr_6, + read_pmevcntr_7, + read_pmevcntr_8, + read_pmevcntr_9, + read_pmevcntr_10, + read_pmevcntr_11, + read_pmevcntr_13, + read_pmevcntr_12, + read_pmevcntr_14, + read_pmevcntr_15, + read_pmevcntr_16, + read_pmevcntr_17, + read_pmevcntr_18, + read_pmevcntr_19, + read_pmevcntr_20, + read_pmevcntr_21, + read_pmevcntr_22, + read_pmevcntr_23, + read_pmevcntr_24, + read_pmevcntr_25, + read_pmevcntr_26, + read_pmevcntr_27, + read_pmevcntr_28, + read_pmevcntr_29, + read_pmevcntr_30, + read_pmccntr + }; + + if (counter < ARRAY_SIZE(read_f)) + return (read_f[counter])(); + + return 0; +} + +static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } + #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; } @@ -353,8 +451,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count count->ena += delta; if (idx) count->run += delta; - - cnt = mul_u64_u64_div64(cnt, count->ena, count->run); } count->val = cnt; diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index d39378eaf897..87b0510a556f 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -14,6 +14,8 @@ static int libperf_print(enum libperf_print_level level, int test_cpumap(int argc, char **argv) { struct perf_cpu_map *cpus; + struct perf_cpu cpu; + int idx; __T_START; @@ -27,6 +29,15 @@ int test_cpumap(int argc, char **argv) perf_cpu_map__put(cpus); perf_cpu_map__put(cpus); + cpus = perf_cpu_map__default_new(); + if (!cpus) + return -1; + + perf_cpu_map__for_each_cpu(cpu, idx, cpus) + __T("wrong cpu number", cpu.cpu != -1); + + perf_cpu_map__put(cpus); + __T_END; return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ce91a582f0e4..fa854c83b7e7 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET) +#include <inttypes.h> #include <sched.h> #include <stdio.h> #include <stdarg.h> @@ -21,6 +22,9 @@ #include "tests.h" #include <internal/evsel.h> +#define EVENT_NUM 15 +#define WAIT_COUNT 100000000UL + static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { @@ -331,7 +335,8 @@ static int test_mmap_cpus(void) }; cpu_set_t saved_mask; char path[PATH_MAX]; - int id, err, cpu, tmp; + int id, err, tmp; + struct perf_cpu cpu; union perf_event *event; int count = 0; @@ -374,7 +379,7 @@ static int test_mmap_cpus(void) cpu_set_t mask; CPU_ZERO(&mask); - CPU_SET(cpu, &mask); + CPU_SET(cpu.cpu, &mask); err = sched_setaffinity(0, sizeof(mask), &mask); __T("sched_setaffinity failed", err == 0); @@ -413,6 +418,159 @@ static int test_mmap_cpus(void) return 0; } +static double display_error(long long average, + long long high, + long long low, + long long expected) +{ + double error; + + error = (((double)average - expected) / expected) * 100.0; + + __T_VERBOSE(" Expected: %lld\n", expected); + __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n", + high, low, average); + + __T_VERBOSE(" Average Error = %.2f%%\n", error); + + return error; +} + +static int test_stat_multiplexing(void) +{ + struct perf_counts_values expected_counts = { .val = 0 }; + struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },}; + struct perf_thread_map *threads; + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING, + .disabled = 1, + }; + int err, i, nonzero = 0; + unsigned long count; + long long max = 0, min = 0, avg = 0; + double error = 0.0; + s8 scaled = 0; + + /* read for non-multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + err = perf_evsel__open(evsel, NULL, threads); + __T("failed to open evsel", err == 0); + + err = perf_evsel__enable(evsel); + __T("failed to enable evsel", err == 0); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + perf_evsel__read(evsel, 0, 0, &expected_counts); + __T("failed to read value for evsel", expected_counts.val != 0); + __T("failed to read non-multiplexing event count", + expected_counts.ena == expected_counts.run); + + err = perf_evsel__disable(evsel); + __T("failed to enable evsel", err == 0); + + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + + perf_thread_map__put(threads); + + /* read for multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evlist = perf_evlist__new(); + __T("failed to create evlist", evlist); + + for (i = 0; i < EVENT_NUM; i++) { + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + perf_evlist__add(evlist, evsel); + } + perf_evlist__set_maps(evlist, NULL, threads); + + err = perf_evlist__open(evlist); + __T("failed to open evsel", err == 0); + + perf_evlist__enable(evlist); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + i = 0; + perf_evlist__for_each_evsel(evlist, evsel) { + perf_evsel__read(evsel, 0, 0, &counts[i]); + __T("failed to read value for evsel", counts[i].val != 0); + i++; + } + + perf_evlist__disable(evlist); + + min = counts[0].val; + for (i = 0; i < EVENT_NUM; i++) { + __T_VERBOSE("Event %2d -- Raw count = %" PRIu64 ", run = %" PRIu64 ", enable = %" PRIu64 "\n", + i, counts[i].val, counts[i].run, counts[i].ena); + + perf_counts_values__scale(&counts[i], true, &scaled); + if (scaled == 1) { + __T_VERBOSE("\t Scaled count = %" PRIu64 " (%.2lf%%, %" PRIu64 "/%" PRIu64 ")\n", + counts[i].val, + (double)counts[i].run / (double)counts[i].ena * 100.0, + counts[i].run, counts[i].ena); + } else if (scaled == -1) { + __T_VERBOSE("\t Not Running\n"); + } else { + __T_VERBOSE("\t Not Scaling\n"); + } + + if (counts[i].val > max) + max = counts[i].val; + + if (counts[i].val < min) + min = counts[i].val; + + avg += counts[i].val; + + if (counts[i].val != 0) + nonzero++; + } + + if (nonzero != 0) + avg = avg / nonzero; + else + avg = 0; + + error = display_error(avg, max, min, expected_counts.val); + + __T("Error out of range!", ((error <= 1.0) && (error >= -1.0))); + + perf_evlist__close(evlist); + perf_evlist__delete(evlist); + + perf_thread_map__put(threads); + + return 0; +} + int test_evlist(int argc, char **argv) { __T_START; @@ -424,6 +582,7 @@ int test_evlist(int argc, char **argv) test_stat_thread_enable(); test_mmap_thread(); test_mmap_cpus(); + test_stat_multiplexing(); __T_END; return tests_failed == 0 ? 0 : -1; diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 33ae9334861a..89be89afb24d 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -130,6 +130,9 @@ static int test_stat_user_read(int event) struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = event, +#ifdef __aarch64__ + .config1 = 0x2, /* Request user access */ +#endif }; int err, i; @@ -150,7 +153,7 @@ static int test_stat_user_read(int event) pc = perf_evsel__mmap_base(evsel, 0, 0); __T("failed to get mmapped address", pc); -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) __T("userspace counter access not supported", pc->cap_user_rdpmc); __T("userspace counter access not enabled", pc->index); __T("userspace counter width not set", pc->pmc_width >= 32); diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index 794a375dad36..b2aec04fce8f 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -50,15 +50,8 @@ static NORETURN inline void die(const char *err, ...) static inline void *xrealloc(void *ptr, size_t size) { void *ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) { - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } + if (!ret) + die("Out of memory, realloc failed"); return ret; } diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index fe58843d047c..8e24c4c78c7f 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1367,6 +1367,14 @@ static int field_is_dynamic(struct tep_format_field *field) return 0; } +static int field_is_relative_dynamic(struct tep_format_field *field) +{ + if (strncmp(field->type, "__rel_loc", 9) == 0) + return 1; + + return 0; +} + static int field_is_long(struct tep_format_field *field) { /* includes long long */ @@ -1622,6 +1630,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** field->flags |= TEP_FIELD_IS_STRING; if (field_is_dynamic(field)) field->flags |= TEP_FIELD_IS_DYNAMIC; + if (field_is_relative_dynamic(field)) + field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE; if (field_is_long(field)) field->flags |= TEP_FIELD_IS_LONG; @@ -2928,7 +2938,7 @@ process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, arg->type = TEP_PRINT_STRING; arg->string.string = token; - arg->string.offset = -1; + arg->string.field = NULL; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -2957,7 +2967,7 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar arg->type = TEP_PRINT_BITMASK; arg->bitmask.bitmask = token; - arg->bitmask.offset = -1; + arg->bitmask.field = NULL; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -3123,19 +3133,23 @@ process_function(struct tep_event *event, struct tep_print_arg *arg, free_token(token); return process_int_array(event, arg, tok); } - if (strcmp(token, "__get_str") == 0) { + if (strcmp(token, "__get_str") == 0 || + strcmp(token, "__get_rel_str") == 0) { free_token(token); return process_str(event, arg, tok); } - if (strcmp(token, "__get_bitmask") == 0) { + if (strcmp(token, "__get_bitmask") == 0 || + strcmp(token, "__get_rel_bitmask") == 0) { free_token(token); return process_bitmask(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array") == 0) { + if (strcmp(token, "__get_dynamic_array") == 0 || + strcmp(token, "__get_rel_dynamic_array") == 0) { free_token(token); return process_dynamic_array(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array_len") == 0) { + if (strcmp(token, "__get_dynamic_array_len") == 0 || + strcmp(token, "__get_rel_dynamic_array_len") == 0) { free_token(token); return process_dynamic_array_len(event, arg, tok); } @@ -4163,14 +4177,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, case TEP_PRINT_STRING: { int str_offset; - if (arg->string.offset == -1) { - struct tep_format_field *f; + if (!arg->string.field) + arg->string.field = tep_find_any_field(event, arg->string.string); + if (!arg->string.field) + break; - f = tep_find_any_field(event, arg->string.string); - arg->string.offset = f->offset; - } - str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset)); + str_offset = data2host4(tep, + *(unsigned int *)(data + arg->string.field->offset)); str_offset &= 0xffff; + if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE) + str_offset += arg->string.field->offset + arg->string.field->size; print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); break; } @@ -4181,15 +4197,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, int bitmask_offset; int bitmask_size; - if (arg->bitmask.offset == -1) { - struct tep_format_field *f; - - f = tep_find_any_field(event, arg->bitmask.bitmask); - arg->bitmask.offset = f->offset; - } - bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); + if (!arg->bitmask.field) + arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask); + if (!arg->bitmask.field) + break; + bitmask_offset = data2host4(tep, + *(unsigned int *)(data + arg->bitmask.field->offset)); bitmask_size = bitmask_offset >> 16; bitmask_offset &= 0xffff; + if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE) + bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size; print_bitmask_to_seq(tep, s, format, len_arg, data + bitmask_offset, bitmask_size); break; @@ -5109,6 +5126,8 @@ void tep_print_field(struct trace_seq *s, void *data, offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -6987,6 +7006,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, data + offset, field->size); *len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } else *len = field->size; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index a67ad9a5b835..41d4f9f6a843 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -125,6 +125,7 @@ enum tep_format_flags { TEP_FIELD_IS_LONG = 32, TEP_FIELD_IS_FLAG = 64, TEP_FIELD_IS_SYMBOLIC = 128, + TEP_FIELD_IS_RELATIVE = 256, }; struct tep_format_field { @@ -153,12 +154,12 @@ struct tep_print_arg_atom { struct tep_print_arg_string { char *string; - int offset; + struct tep_format_field *field; }; struct tep_print_arg_bitmask { char *bitmask; - int offset; + struct tep_format_field *field; }; struct tep_print_arg_field { diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 368826bb5a57..5df177070d53 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1712,8 +1712,11 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record * if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) { addr = *(unsigned int *)val; - val = record->data + (addr & 0xffff); size = addr >> 16; + addr &= 0xffff; + if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE) + addr += arg->str.field->offset + arg->str.field->size; + val = record->data + addr; } /* diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index 5d72f3112e56..ee819a402b69 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -485,6 +485,57 @@ have R ->po X. It wouldn't make sense for a computation to depend somehow on a value that doesn't get loaded from shared memory until later in the code! +Here's a trick question: When is a dependency not a dependency? Answer: +When it is purely syntactic rather than semantic. We say a dependency +between two accesses is purely syntactic if the second access doesn't +actually depend on the result of the first. Here is a trivial example: + + r1 = READ_ONCE(x); + WRITE_ONCE(y, r1 * 0); + +There appears to be a data dependency from the load of x to the store +of y, since the value to be stored is computed from the value that was +loaded. But in fact, the value stored does not really depend on +anything since it will always be 0. Thus the data dependency is only +syntactic (it appears to exist in the code) but not semantic (the +second access will always be the same, regardless of the value of the +first access). Given code like this, a compiler could simply discard +the value returned by the load from x, which would certainly destroy +any dependency. (The compiler is not permitted to eliminate entirely +the load generated for a READ_ONCE() -- that's one of the nice +properties of READ_ONCE() -- but it is allowed to ignore the load's +value.) + +It's natural to object that no one in their right mind would write +code like the above. However, macro expansions can easily give rise +to this sort of thing, in ways that often are not apparent to the +programmer. + +Another mechanism that can lead to purely syntactic dependencies is +related to the notion of "undefined behavior". Certain program +behaviors are called "undefined" in the C language specification, +which means that when they occur there are no guarantees at all about +the outcome. Consider the following example: + + int a[1]; + int i; + + r1 = READ_ONCE(i); + r2 = READ_ONCE(a[r1]); + +Access beyond the end or before the beginning of an array is one kind +of undefined behavior. Therefore the compiler doesn't have to worry +about what will happen if r1 is nonzero, and it can assume that r1 +will always be zero regardless of the value actually loaded from i. +(If the assumption turns out to be wrong the resulting behavior will +be undefined anyway, so the compiler doesn't care!) Thus the value +from the load can be discarded, breaking the address dependency. + +The LKMM is unaware that purely syntactic dependencies are different +from semantic dependencies and therefore mistakenly predicts that the +accesses in the two examples above will be ordered. This is another +example of how the compiler can undermine the memory model. Be warned. + THE READS-FROM RELATION: rf, rfi, and rfe ----------------------------------------- @@ -1813,15 +1864,16 @@ spin_trylock() -- we can call these things lock-releases and lock-acquires -- have two properties beyond those of ordinary releases and acquires. -First, when a lock-acquire reads from a lock-release, the LKMM -requires that every instruction po-before the lock-release must -execute before any instruction po-after the lock-acquire. This would -naturally hold if the release and acquire operations were on different -CPUs, but the LKMM says it holds even when they are on the same CPU. -For example: +First, when a lock-acquire reads from or is po-after a lock-release, +the LKMM requires that every instruction po-before the lock-release +must execute before any instruction po-after the lock-acquire. This +would naturally hold if the release and acquire operations were on +different CPUs and accessed the same lock variable, but the LKMM says +it also holds when they are on the same CPU, even if they access +different lock variables. For example: int x, y; - spinlock_t s; + spinlock_t s, t; P0() { @@ -1830,9 +1882,9 @@ For example: spin_lock(&s); r1 = READ_ONCE(x); spin_unlock(&s); - spin_lock(&s); + spin_lock(&t); r2 = READ_ONCE(y); - spin_unlock(&s); + spin_unlock(&t); } P1() @@ -1842,10 +1894,10 @@ For example: WRITE_ONCE(x, 1); } -Here the second spin_lock() reads from the first spin_unlock(), and -therefore the load of x must execute before the load of y. Thus we -cannot have r1 = 1 and r2 = 0 at the end (this is an instance of the -MP pattern). +Here the second spin_lock() is po-after the first spin_unlock(), and +therefore the load of x must execute before the load of y, even though +the two locking operations use different locks. Thus we cannot have +r1 = 1 and r2 = 0 at the end (this is an instance of the MP pattern). This requirement does not apply to ordinary release and acquire fences, only to lock-related operations. For instance, suppose P0() @@ -1872,13 +1924,13 @@ instructions in the following order: and thus it could load y before x, obtaining r2 = 0 and r1 = 1. -Second, when a lock-acquire reads from a lock-release, and some other -stores W and W' occur po-before the lock-release and po-after the -lock-acquire respectively, the LKMM requires that W must propagate to -each CPU before W' does. For example, consider: +Second, when a lock-acquire reads from or is po-after a lock-release, +and some other stores W and W' occur po-before the lock-release and +po-after the lock-acquire respectively, the LKMM requires that W must +propagate to each CPU before W' does. For example, consider: int x, y; - spinlock_t x; + spinlock_t s; P0() { @@ -1908,7 +1960,12 @@ each CPU before W' does. For example, consider: If r1 = 1 at the end then the spin_lock() in P1 must have read from the spin_unlock() in P0. Hence the store to x must propagate to P2 -before the store to y does, so we cannot have r2 = 1 and r3 = 0. +before the store to y does, so we cannot have r2 = 1 and r3 = 0. But +if P1 had used a lock variable different from s, the writes could have +propagated in either order. (On the other hand, if the code in P0 and +P1 had all executed on a single CPU, as in the example before this +one, then the writes would have propagated in order even if the two +critical sections used different lock variables.) These two special requirements for lock-release and lock-acquire do not arise from the operational model. Nevertheless, kernel developers diff --git a/tools/memory-model/README b/tools/memory-model/README index 9a84c45504ab..9edd402704c4 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -195,6 +195,18 @@ litmus-tests are listed in litmus-tests/README. A great deal more litmus tests are available at https://github.com/paulmckrcu/litmus. + By "representative", it means the one in the litmus-tests + directory is: + + 1) simple, the number of threads should be relatively + small and each thread function should be relatively + simple. + 2) orthogonal, there should be no two litmus tests + describing the same aspect of the memory model. + 3) textbook, developers can easily copy-paste-modify + the litmus tests to use the patterns on their own + code. + lock.cat Provides a front-end analysis of lock acquisition and release, for example, associating a lock acquisition with the preceding diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index 2a9b4fe4a84e..d70315fddef6 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -27,7 +27,7 @@ include "lock.cat" (* Release Acquire *) let acq-po = [Acquire] ; po ; [M] let po-rel = [M] ; po ; [Release] -let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po +let po-unlock-lock-po = po ; [UL] ; (po|rf) ; [LKR] ; po (* Fences *) let R4rmb = R \ Noreturn (* Reads for which rmb works *) @@ -70,12 +70,12 @@ let rwdep = (dep | ctrl) ; [W] let overwrite = co | fr let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb) let to-r = addr | (dep ; [Marked] ; rfi) -let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int) +let ppo = to-r | to-w | fence | (po-unlock-lock-po & int) (* Propagation: Ordering from release operations and strong fences. *) let A-cumul(r) = (rfe ; [Marked])? ; r let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb | - po-unlock-rf-lock-po) ; [Marked] + po-unlock-lock-po) ; [Marked] let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ; [Marked] ; rfe? ; [Marked] diff --git a/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus new file mode 100644 index 000000000000..eb34123a6ffe --- /dev/null +++ b/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus @@ -0,0 +1,35 @@ +C LB+unlocklockonceonce+poacquireonce + +(* + * Result: Never + * + * If two locked critical sections execute on the same CPU, all accesses + * in the first must execute before any accesses in the second, even if the + * critical sections are protected by different locks. Note: Even when a + * write executes before a read, their memory effects can be reordered from + * the viewpoint of another CPU (the kind of reordering allowed by TSO). + *) + +{} + +P0(spinlock_t *s, spinlock_t *t, int *x, int *y) +{ + int r1; + + spin_lock(s); + r1 = READ_ONCE(*x); + spin_unlock(s); + spin_lock(t); + WRITE_ONCE(*y, 1); + spin_unlock(t); +} + +P1(int *x, int *y) +{ + int r2; + + r2 = smp_load_acquire(y); + WRITE_ONCE(*x, 1); +} + +exists (0:r1=1 /\ 1:r2=1) diff --git a/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus new file mode 100644 index 000000000000..2feb1398be71 --- /dev/null +++ b/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus @@ -0,0 +1,33 @@ +C MP+unlocklockonceonce+fencermbonceonce + +(* + * Result: Never + * + * If two locked critical sections execute on the same CPU, stores in the + * first must propagate to each CPU before stores in the second do, even if + * the critical sections are protected by different locks. + *) + +{} + +P0(spinlock_t *s, spinlock_t *t, int *x, int *y) +{ + spin_lock(s); + WRITE_ONCE(*x, 1); + spin_unlock(s); + spin_lock(t); + WRITE_ONCE(*y, 1); + spin_unlock(t); +} + +P1(int *x, int *y) +{ + int r1; + int r2; + + r1 = READ_ONCE(*y); + smp_rmb(); + r2 = READ_ONCE(*x); +} + +exists (1:r1=1 /\ 1:r2=0) diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README index 681f9067fa9e..d311a0ff1ae6 100644 --- a/tools/memory-model/litmus-tests/README +++ b/tools/memory-model/litmus-tests/README @@ -63,6 +63,10 @@ LB+poonceonces.litmus As above, but with store-release replaced with WRITE_ONCE() and load-acquire replaced with READ_ONCE(). +LB+unlocklockonceonce+poacquireonce.litmus + Does a unlock+lock pair provides ordering guarantee between a + load and a store? + MP+onceassign+derefonce.litmus As below, but with rcu_assign_pointer() and an rcu_dereference(). @@ -90,6 +94,10 @@ MP+porevlocks.litmus As below, but with the first access of the writer process and the second access of reader process protected by a lock. +MP+unlocklockonceonce+fencermbonceonce.litmus + Does a unlock+lock pair provides ordering guarantee between a + store and another store? + MP+fencewmbonceonce+fencermbonceonce.litmus Does a smp_wmb() (between the stores) and an smp_rmb() (between the loads) suffice for the message-passing litmus test, where one diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 4d6d7fc13255..479e769ca324 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -112,7 +112,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec const struct elf *elf = file->elf; struct insn insn; int x86_64, ret; - unsigned char op1, op2, + unsigned char op1, op2, op3, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; @@ -139,6 +139,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; + op3 = insn.opcode.bytes[2]; if (insn.rex_prefix.nbytes) { rex = insn.rex_prefix.bytes[0]; @@ -491,6 +492,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x38 && op3 == 0xf8) { + if (insn.prefixes.nbytes == 1 && + insn.prefixes.bytes[0] == 0xf2) { + /* ENQCMD cannot be used in the kernel. */ + WARN("ENQCMD instruction at %s:%lx", sec->name, + offset); + } + } else if (op2 == 0xa0 || op2 == 0xa8) { /* push fs/gs */ @@ -531,6 +540,11 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec } break; + case 0xcc: + /* int3 */ + *type = INSN_TRAP; + break; + case 0xe3: /* jecxz/jrcxz */ *type = INSN_JUMP_CONDITIONAL; @@ -697,10 +711,10 @@ const char *arch_ret_insn(int len) { static const char ret[5][5] = { { BYTE_RET }, - { BYTE_RET, BYTES_NOP1 }, - { BYTE_RET, BYTES_NOP2 }, - { BYTE_RET, BYTES_NOP3 }, - { BYTE_RET, BYTES_NOP4 }, + { BYTE_RET, 0xcc }, + { BYTE_RET, 0xcc, BYTES_NOP1 }, + { BYTE_RET, 0xcc, BYTES_NOP2 }, + { BYTE_RET, 0xcc, BYTES_NOP3 }, }; if (len < 1 || len > 5) { diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 8b38b5d6fec7..38070f26105b 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,7 @@ #include <objtool/objtool.h> bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup; + validate_dup, vmlinux, mcount, noinstr, backup, sls; static const char * const check_usage[] = { "objtool check [<options>] file.o", @@ -45,6 +45,7 @@ const struct option check_options[] = { OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), + OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), OPT_END(), }; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 21735829b860..7c33ec67c4a9 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -168,14 +168,16 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "panic", "do_exit", "do_task_dead", - "__module_put_and_exit", - "complete_and_exit", + "kthread_exit", + "make_task_dead", + "__module_put_and_kthread_exit", + "kthread_complete_and_exit", "__reiserfs_panic", "lbug_with_loc", "fortify_panic", "usercopy_abort", "machine_real_restart", - "rewind_stack_do_exit", + "rewind_stack_and_make_dead", "kunit_try_catch_throw", "xen_start_kernel", "cpu_bringup_and_idle", @@ -849,6 +851,10 @@ static const char *uaccess_safe_builtin[] = { "__asan_report_store16_noabort", /* KCSAN */ "__kcsan_check_access", + "__kcsan_mb", + "__kcsan_wmb", + "__kcsan_rmb", + "__kcsan_release", "kcsan_found_watchpoint", "kcsan_setup_watchpoint", "kcsan_check_scoped_accesses", @@ -1068,11 +1074,11 @@ static void annotate_call_site(struct objtool_file *file, } /* - * Many compilers cannot disable KCOV with a function attribute - * so they need a little help, NOP out any KCOV calls from noinstr - * text. + * Many compilers cannot disable KCOV or sanitizer calls with a function + * attribute so they need a little help, NOP out any such calls from + * noinstr text. */ - if (insn->sec->noinstr && sym->kcov) { + if (insn->sec->noinstr && sym->profiling_func) { if (reloc) { reloc->type = R_NONE; elf_write_reloc(file->elf, reloc); @@ -1987,6 +1993,31 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } +/* + * Return true if name matches an instrumentation function, where calls to that + * function from noinstr code can safely be removed, but compilers won't do so. + */ +static bool is_profiling_func(const char *name) +{ + /* + * Many compilers cannot disable KCOV with a function attribute. + */ + if (!strncmp(name, "__sanitizer_cov_", 16)) + return true; + + /* + * Some compilers currently do not remove __tsan_func_entry/exit nor + * __tsan_atomic_signal_fence (used for barrier instrumentation) with + * the __no_sanitize_thread attribute, remove them. Once the kernel's + * minimum Clang version is 14.0, this can be removed. + */ + if (!strncmp(name, "__tsan_func_", 12) || + !strcmp(name, "__tsan_atomic_signal_fence")) + return true; + + return false; +} + static int classify_symbols(struct objtool_file *file) { struct section *sec; @@ -2007,8 +2038,8 @@ static int classify_symbols(struct objtool_file *file) if (!strcmp(func->name, "__fentry__")) func->fentry = true; - if (!strncmp(func->name, "__sanitizer_cov_", 16)) - func->kcov = true; + if (is_profiling_func(func->name)) + func->profiling_func = true; } } @@ -2823,7 +2854,7 @@ static inline bool func_uaccess_safe(struct symbol *func) static inline const char *call_dest_name(struct instruction *insn) { - static char pvname[16]; + static char pvname[19]; struct reloc *rel; int idx; @@ -3084,6 +3115,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: + if (next_insn && next_insn->type == INSN_TRAP) { + next_insn->ignore = true; + } else if (sls && !insn->retpoline_safe) { + WARN_FUNC("missing int3 after ret", + insn->sec, insn->offset); + } return validate_return(func, insn, &state); case INSN_CALL: @@ -3127,6 +3164,14 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: + if (next_insn && next_insn->type == INSN_TRAP) { + next_insn->ignore = true; + } else if (sls && !insn->retpoline_safe) { + WARN_FUNC("missing int3 after indirect jump", + insn->sec, insn->offset); + } + + /* fallthrough */ case INSN_JUMP_DYNAMIC_CONDITIONAL: if (is_sibling_call(insn)) { ret = validate_sibling_call(file, insn, &state); @@ -3296,14 +3341,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio return true; /* - * Ignore any unused exceptions. This can happen when a whitelisted - * function has an exception table entry. - * - * Also ignore alternative replacement instructions. This can happen + * Ignore alternative replacement instructions. This can happen * when a whitelisted function uses one of the ALTERNATIVE macros. */ - if (!strcmp(insn->sec->name, ".fixup") || - !strcmp(insn->sec->name, ".altinstr_replacement") || + if (!strcmp(insn->sec->name, ".altinstr_replacement") || !strcmp(insn->sec->name, ".altinstr_aux")) return true; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 589ff58426ab..76bae3078286 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -26,6 +26,7 @@ enum insn_type { INSN_CLAC, INSN_STD, INSN_CLD, + INSN_TRAP, INSN_OTHER, }; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 15ac0b7d3d6a..89ba869ed08f 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup; + validate_dup, vmlinux, mcount, noinstr, backup, sls; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index cdc739fa9a6f..d22336781401 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -58,7 +58,7 @@ struct symbol { u8 static_call_tramp : 1; u8 retpoline_thunk : 1; u8 fentry : 1; - u8 kcov : 1; + u8 profiling_func : 1; struct list_head pv_target; }; diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index cd8ce6e8ec12..7e44b419d301 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -74,12 +74,15 @@ OPTIONS used when creating a uprobe for a process that resides in a different mount namespace from the perf(1) utility. ---debuginfod=URLs:: +--debuginfod[=URLs]:: Specify debuginfod URL to be used when retrieving perf.data binaries, it follows the same syntax as the DEBUGINFOD_URLS variable, like: buildid-cache.debuginfod=http://192.168.122.174:8002 + If the URLs is not specified, the value of DEBUGINFOD_URLS + system environment variable is used. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1] diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 3bb75c1f25e8..0420e71698ee 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -587,6 +587,15 @@ record.*:: Use 'n' control blocks in asynchronous (Posix AIO) trace writing mode ('n' default: 1, max: 4). + record.debuginfod:: + Specify debuginfod URL to be used when cacheing perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + http://192.168.122.174:8002 + + If the URLs is 'system', the value of DEBUGINFOD_URLS system environment + variable is used. + diff.*:: diff.order:: This option sets the number of columns to sort the result. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 4dc8d0af19df..57384a97c04f 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -81,7 +81,11 @@ On AMD systems it is implemented using IBS (up to precise-level 2). The precise modifier works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1 (micro-ops retired). Both events map to IBS execution sampling (IBS op) with the IBS Op Counter Control bit -(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s +(IbsOpCntCtl) set respectively (see the +Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS) +section of the [AMD Processor Programming Reference (PPR)] relevant to the +family, model and stepping of the processor being used). + Manual Volume 2: System Programming, 13.3 Instruction-Based Sampling). Examples to use IBS: @@ -94,10 +98,12 @@ RAW HARDWARE EVENT DESCRIPTOR Even when an event is not available in a symbolic form within perf right now, it can be encoded in a per processor specific way. -For instance For x86 CPUs NNN represents the raw register encoding with the +For instance on x86 CPUs, N is a hexadecimal value that represents the raw register encoding with the layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout -of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344, -Figure 13-7 Performance Event-Select Register (PerfEvtSeln)). +of IA32_PERFEVTSELx MSRs) or AMD's PERF_CTL MSRs (see the +Core Complex (CCX) -> Processor x86 Core -> MSR Registers section of the +[AMD Processor Programming Reference (PPR)] relevant to the family, model +and stepping of the processor being used). Note: Only the following bit fields can be set in x86 counter registers: event, umask, edge, inv, cmask. Esp. guest/host only and @@ -126,6 +132,38 @@ It's also possible to use pmu syntax: perf record -e cpu/r1a8/ ... perf record -e cpu/r0x1a8/ ... +Some processors, like those from AMD, support event codes and unit masks +larger than a byte. In such cases, the bits corresponding to the event +configuration parameters can be seen with: + + cat /sys/bus/event_source/devices/<pmu>/format/<config> + +Example: + +If the AMD docs for an EPYC 7713 processor describe an event as: + + Event Umask Event Mask + Num. Value Mnemonic Description + + 28FH 03H op_cache_hit_miss.op_cache_hit Counts Op Cache micro-tag + hit events. + +raw encoding of 0x0328F cannot be used since the upper nibble of the +EventSelect bits have to be specified via bits 32-35 as can be seen with: + + cat /sys/bus/event_source/devices/cpu/format/event + +raw encoding of 0x20000038F should be used instead: + + perf stat -e r20000038f -a sleep 1 + perf record -e r20000038f ... + +It's also possible to use pmu syntax: + + perf record -e r20000038f -a sleep 1 + perf record -e cpu/r20000038f/ ... + perf record -e cpu/r0x20000038f/ ... + You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. @@ -316,4 +354,4 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], -http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] +https://bugzilla.kernel.org/show_bug.cgi?id=206537[AMD Processor Programming Reference (PPR)] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 3cf7bac67239..9ccc75935bc5 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -30,8 +30,10 @@ OPTIONS - a symbolic event name (use 'perf list' to list all events) - - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + - a raw PMU event in the form of rN where N is a hexadecimal value + that represents the raw register encoding with the layout of the + event control registers as described by entries in + /sys/bus/event_sources/devices/cpu/format/*. - a symbolic or raw PMU event followed by an optional colon and a list of event modifiers, e.g., cpu-cycles:p. See the @@ -713,6 +715,15 @@ measurements: include::intel-hybrid.txt[] +--debuginfod[=URLs]:: + Specify debuginfod URL to be used when cacheing perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + http://192.168.122.174:8002 + + If the URLs is not specified, the value of DEBUGINFOD_URLS + system environment variable is used. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 7e6fb7cbc0f4..c06c341e72b9 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -36,8 +36,10 @@ report:: - a symbolic event name (use 'perf list' to list all events) - - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + - a raw PMU event in the form of rN where N is a hexadecimal value + that represents the raw register encoding with the layout of the + event control registers as described by entries in + /sys/bus/event_sources/devices/cpu/format/*. - a symbolic or raw PMU event followed by an optional colon and a list of event modifiers, e.g., cpu-cycles:p. See the @@ -493,6 +495,10 @@ This option can be enabled in perf config by setting the variable $ perf config stat.no-csv-summary=true +--cputype:: +Only enable events on applying cpu with this type for hybrid platform +(e.g. core or atom)" + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 9898a32b8d9c..cac3dfbee7d8 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -38,9 +38,10 @@ Default is to monitor all CPUS. -e <event>:: --event=<event>:: Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + (use 'perf list' to list all events) or a raw PMU event in the form + of rN where N is a hexadecimal value that represents the raw register + encoding with the layout of the event control registers as described + by entries in /sys/bus/event_sources/devices/cpu/format/*. -E <entries>:: --entries=<entries>:: diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 3df74cf5651a..96ad944ca6a8 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -17,6 +17,7 @@ detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) +HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) include $(srctree)/tools/scripts/Makefile.arch @@ -143,7 +144,10 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto ifdef CSINCLUDES LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) endif -OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++ +OPENCSDLIBS := -lopencsd_c_api +ifeq ($(findstring -static,${LDFLAGS}),-static) + OPENCSDLIBS += -lopencsd -lstdc++ +endif ifdef CSLIBS LIBOPENCSD_LDFLAGS := -L$(CSLIBS) endif @@ -211,6 +215,7 @@ endif ifneq ($(WERROR),0) CORE_CFLAGS += -Werror CXXFLAGS += -Werror + HOSTCFLAGS += -Werror endif ifndef DEBUG @@ -290,6 +295,9 @@ CXXFLAGS += -ggdb3 CXXFLAGS += -funwind-tables CXXFLAGS += -Wno-strict-aliasing +HOSTCFLAGS += -Wall +HOSTCFLAGS += -Wextra + # Enforce a non-executable stack, as we may regress (again) in the future by # adding assembler files missing the .GNU-stack linker note. LDFLAGS += -Wl,-z,noexecstack diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 80522bcfafe0..ac861e42c8f7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -226,7 +226,7 @@ else endif export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK -export HOSTCC HOSTLD HOSTAR +export HOSTCC HOSTLD HOSTAR HOSTCFLAGS include $(srctree)/tools/build/Makefile.include @@ -1041,7 +1041,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h -SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h +SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): $(Q)$(MKDIR) -p $@ diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h index 4085419283d0..99a06550e25d 100644 --- a/tools/perf/arch/arm/include/perf_regs.h +++ b/tools/perf/arch/arm/include/perf_regs.h @@ -15,46 +15,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM_PC #define PERF_REG_SP PERF_REG_ARM_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_ARM_R0: - return "r0"; - case PERF_REG_ARM_R1: - return "r1"; - case PERF_REG_ARM_R2: - return "r2"; - case PERF_REG_ARM_R3: - return "r3"; - case PERF_REG_ARM_R4: - return "r4"; - case PERF_REG_ARM_R5: - return "r5"; - case PERF_REG_ARM_R6: - return "r6"; - case PERF_REG_ARM_R7: - return "r7"; - case PERF_REG_ARM_R8: - return "r8"; - case PERF_REG_ARM_R9: - return "r9"; - case PERF_REG_ARM_R10: - return "r10"; - case PERF_REG_ARM_FP: - return "fp"; - case PERF_REG_ARM_IP: - return "ip"; - case PERF_REG_ARM_SP: - return "sp"; - case PERF_REG_ARM_LR: - return "lr"; - case PERF_REG_ARM_PC: - return "pc"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 293a23bf8be3..2e8b2c4365a0 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -203,9 +203,11 @@ static int cs_etm_set_option(struct auxtrace_record *itr, struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); /* Set option of each CPU we have */ - for (i = 0; i < cpu__max_cpu(); i++) { - if (!cpu_map__has(event_cpus, i) || - !cpu_map__has(online_cpus, i)) + for (i = 0; i < cpu__max_cpu().cpu; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (!perf_cpu_map__has(event_cpus, cpu) || + !perf_cpu_map__has(online_cpus, cpu)) continue; if (option & BIT(ETM_OPT_CTXTID)) { @@ -407,25 +409,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, } - /* Validate auxtrace_mmap_pages provided by user */ - if (opts->auxtrace_mmap_pages) { - unsigned int max_page = (KiB(128) / page_size); - size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; - - if (!privileged && - opts->auxtrace_mmap_pages > max_page) { - opts->auxtrace_mmap_pages = max_page; - pr_err("auxtrace too big, truncating to %d\n", - max_page); - } - - if (!is_power_of_2(sz)) { - pr_err("Invalid mmap size for %s: must be a power of 2\n", - CORESIGHT_ETM_PMU_NAME); - return -EINVAL; - } - } - if (opts->auxtrace_snapshot_mode) pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME, opts->auxtrace_snapshot_size); @@ -541,9 +524,11 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, /* cpu map is not empty, we have specific CPUs to work with */ if (!perf_cpu_map__empty(event_cpus)) { - for (i = 0; i < cpu__max_cpu(); i++) { - if (!cpu_map__has(event_cpus, i) || - !cpu_map__has(online_cpus, i)) + for (i = 0; i < cpu__max_cpu().cpu; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (!perf_cpu_map__has(event_cpus, cpu) || + !perf_cpu_map__has(online_cpus, cpu)) continue; if (cs_etm_is_ete(itr, i)) @@ -555,8 +540,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, } } else { /* get configuration for all CPUs in the system */ - for (i = 0; i < cpu__max_cpu(); i++) { - if (!cpu_map__has(online_cpus, i)) + for (i = 0; i < cpu__max_cpu().cpu; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (!perf_cpu_map__has(online_cpus, cpu)) continue; if (cs_etm_is_ete(itr, i)) @@ -741,8 +728,10 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, } else { /* Make sure all specified CPUs are online */ for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) { - if (cpu_map__has(event_cpus, i) && - !cpu_map__has(online_cpus, i)) + struct perf_cpu cpu = { .cpu = i, }; + + if (perf_cpu_map__has(event_cpus, cpu) && + !perf_cpu_map__has(online_cpus, cpu)) return -EINVAL; } @@ -762,9 +751,12 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, offset = CS_ETM_SNAPSHOT + 1; - for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++) - if (cpu_map__has(cpu_map, i)) + for (i = 0; i < cpu__max_cpu().cpu && offset < priv_size; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (perf_cpu_map__has(cpu_map, cpu)) cs_etm_get_metadata(i, &offset, itr, info); + } perf_cpu_map__put(online_cpus); diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h index fa3e07459f76..35a3cc775b39 100644 --- a/tools/perf/arch/arm64/include/perf_regs.h +++ b/tools/perf/arch/arm64/include/perf_regs.h @@ -4,7 +4,9 @@ #include <stdlib.h> #include <linux/types.h> +#define perf_event_arm_regs perf_event_arm64_regs #include <asm/perf_regs.h> +#undef perf_event_arm_regs void perf_regs_load(u64 *regs); @@ -15,80 +17,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM64_PC #define PERF_REG_SP PERF_REG_ARM64_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_ARM64_X0: - return "x0"; - case PERF_REG_ARM64_X1: - return "x1"; - case PERF_REG_ARM64_X2: - return "x2"; - case PERF_REG_ARM64_X3: - return "x3"; - case PERF_REG_ARM64_X4: - return "x4"; - case PERF_REG_ARM64_X5: - return "x5"; - case PERF_REG_ARM64_X6: - return "x6"; - case PERF_REG_ARM64_X7: - return "x7"; - case PERF_REG_ARM64_X8: - return "x8"; - case PERF_REG_ARM64_X9: - return "x9"; - case PERF_REG_ARM64_X10: - return "x10"; - case PERF_REG_ARM64_X11: - return "x11"; - case PERF_REG_ARM64_X12: - return "x12"; - case PERF_REG_ARM64_X13: - return "x13"; - case PERF_REG_ARM64_X14: - return "x14"; - case PERF_REG_ARM64_X15: - return "x15"; - case PERF_REG_ARM64_X16: - return "x16"; - case PERF_REG_ARM64_X17: - return "x17"; - case PERF_REG_ARM64_X18: - return "x18"; - case PERF_REG_ARM64_X19: - return "x19"; - case PERF_REG_ARM64_X20: - return "x20"; - case PERF_REG_ARM64_X21: - return "x21"; - case PERF_REG_ARM64_X22: - return "x22"; - case PERF_REG_ARM64_X23: - return "x23"; - case PERF_REG_ARM64_X24: - return "x24"; - case PERF_REG_ARM64_X25: - return "x25"; - case PERF_REG_ARM64_X26: - return "x26"; - case PERF_REG_ARM64_X27: - return "x27"; - case PERF_REG_ARM64_X28: - return "x28"; - case PERF_REG_ARM64_X29: - return "x29"; - case PERF_REG_ARM64_SP: - return "sp"; - case PERF_REG_ARM64_LR: - return "lr"; - case PERF_REG_ARM64_PC: - return "pc"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index 7e7714290a87..d2ce31e28cd7 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -5,6 +5,8 @@ #include <string.h> #include "debug.h" #include "symbol.h" +#include "callchain.h" +#include "record.h" /* On arm64, kernel text segment starts at high memory address, * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory @@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) p->end = c->start; pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); } + +void arch__add_leaf_frame_record_opts(struct record_opts *opts) +{ + opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask; +} diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index d3a18f9c85f6..79124bba713e 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -15,7 +15,7 @@ const struct pmu_events_map *pmu_events_map__find(void) * The cpumap should cover all CPUs. Otherwise, some CPUs may * not support some events or have different event IDs. */ - if (pmu->cpus->nr != cpu__max_cpu()) + if (pmu->cpus->nr != cpu__max_cpu().cpu) return NULL; return perf_pmu__find_map(pmu); diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h index 25ac3bdcb9d1..1afcc0e916c2 100644 --- a/tools/perf/arch/csky/include/perf_regs.h +++ b/tools/perf/arch/csky/include/perf_regs.h @@ -15,86 +15,4 @@ #define PERF_REG_IP PERF_REG_CSKY_PC #define PERF_REG_SP PERF_REG_CSKY_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_CSKY_A0: - return "a0"; - case PERF_REG_CSKY_A1: - return "a1"; - case PERF_REG_CSKY_A2: - return "a2"; - case PERF_REG_CSKY_A3: - return "a3"; - case PERF_REG_CSKY_REGS0: - return "regs0"; - case PERF_REG_CSKY_REGS1: - return "regs1"; - case PERF_REG_CSKY_REGS2: - return "regs2"; - case PERF_REG_CSKY_REGS3: - return "regs3"; - case PERF_REG_CSKY_REGS4: - return "regs4"; - case PERF_REG_CSKY_REGS5: - return "regs5"; - case PERF_REG_CSKY_REGS6: - return "regs6"; - case PERF_REG_CSKY_REGS7: - return "regs7"; - case PERF_REG_CSKY_REGS8: - return "regs8"; - case PERF_REG_CSKY_REGS9: - return "regs9"; - case PERF_REG_CSKY_SP: - return "sp"; - case PERF_REG_CSKY_LR: - return "lr"; - case PERF_REG_CSKY_PC: - return "pc"; -#if defined(__CSKYABIV2__) - case PERF_REG_CSKY_EXREGS0: - return "exregs0"; - case PERF_REG_CSKY_EXREGS1: - return "exregs1"; - case PERF_REG_CSKY_EXREGS2: - return "exregs2"; - case PERF_REG_CSKY_EXREGS3: - return "exregs3"; - case PERF_REG_CSKY_EXREGS4: - return "exregs4"; - case PERF_REG_CSKY_EXREGS5: - return "exregs5"; - case PERF_REG_CSKY_EXREGS6: - return "exregs6"; - case PERF_REG_CSKY_EXREGS7: - return "exregs7"; - case PERF_REG_CSKY_EXREGS8: - return "exregs8"; - case PERF_REG_CSKY_EXREGS9: - return "exregs9"; - case PERF_REG_CSKY_EXREGS10: - return "exregs10"; - case PERF_REG_CSKY_EXREGS11: - return "exregs11"; - case PERF_REG_CSKY_EXREGS12: - return "exregs12"; - case PERF_REG_CSKY_EXREGS13: - return "exregs13"; - case PERF_REG_CSKY_EXREGS14: - return "exregs14"; - case PERF_REG_CSKY_TLS: - return "tls"; - case PERF_REG_CSKY_HI: - return "hi"; - case PERF_REG_CSKY_LO: - return "lo"; -#endif - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index e2c481fcede6..3f1886ad9d80 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -364,3 +364,4 @@ # 447 reserved for memfd_secret 448 n64 process_mrelease sys_process_mrelease 449 n64 futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h index ee73b36a14d1..b8cd8bbb37ba 100644 --- a/tools/perf/arch/mips/include/perf_regs.h +++ b/tools/perf/arch/mips/include/perf_regs.h @@ -12,73 +12,4 @@ #define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1) -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_MIPS_PC: - return "PC"; - case PERF_REG_MIPS_R1: - return "$1"; - case PERF_REG_MIPS_R2: - return "$2"; - case PERF_REG_MIPS_R3: - return "$3"; - case PERF_REG_MIPS_R4: - return "$4"; - case PERF_REG_MIPS_R5: - return "$5"; - case PERF_REG_MIPS_R6: - return "$6"; - case PERF_REG_MIPS_R7: - return "$7"; - case PERF_REG_MIPS_R8: - return "$8"; - case PERF_REG_MIPS_R9: - return "$9"; - case PERF_REG_MIPS_R10: - return "$10"; - case PERF_REG_MIPS_R11: - return "$11"; - case PERF_REG_MIPS_R12: - return "$12"; - case PERF_REG_MIPS_R13: - return "$13"; - case PERF_REG_MIPS_R14: - return "$14"; - case PERF_REG_MIPS_R15: - return "$15"; - case PERF_REG_MIPS_R16: - return "$16"; - case PERF_REG_MIPS_R17: - return "$17"; - case PERF_REG_MIPS_R18: - return "$18"; - case PERF_REG_MIPS_R19: - return "$19"; - case PERF_REG_MIPS_R20: - return "$20"; - case PERF_REG_MIPS_R21: - return "$21"; - case PERF_REG_MIPS_R22: - return "$22"; - case PERF_REG_MIPS_R23: - return "$23"; - case PERF_REG_MIPS_R24: - return "$24"; - case PERF_REG_MIPS_R25: - return "$25"; - case PERF_REG_MIPS_R28: - return "$28"; - case PERF_REG_MIPS_R29: - return "$29"; - case PERF_REG_MIPS_R30: - return "$30"; - case PERF_REG_MIPS_R31: - return "$31"; - default: - break; - } - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 15109af9d075..2600b4237292 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -529,3 +529,4 @@ # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv +450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 93339d17acc4..9bb17c3f370b 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -19,70 +19,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_POWERPC_NIP #define PERF_REG_SP PERF_REG_POWERPC_R1 -static const char *reg_names[] = { - [PERF_REG_POWERPC_R0] = "r0", - [PERF_REG_POWERPC_R1] = "r1", - [PERF_REG_POWERPC_R2] = "r2", - [PERF_REG_POWERPC_R3] = "r3", - [PERF_REG_POWERPC_R4] = "r4", - [PERF_REG_POWERPC_R5] = "r5", - [PERF_REG_POWERPC_R6] = "r6", - [PERF_REG_POWERPC_R7] = "r7", - [PERF_REG_POWERPC_R8] = "r8", - [PERF_REG_POWERPC_R9] = "r9", - [PERF_REG_POWERPC_R10] = "r10", - [PERF_REG_POWERPC_R11] = "r11", - [PERF_REG_POWERPC_R12] = "r12", - [PERF_REG_POWERPC_R13] = "r13", - [PERF_REG_POWERPC_R14] = "r14", - [PERF_REG_POWERPC_R15] = "r15", - [PERF_REG_POWERPC_R16] = "r16", - [PERF_REG_POWERPC_R17] = "r17", - [PERF_REG_POWERPC_R18] = "r18", - [PERF_REG_POWERPC_R19] = "r19", - [PERF_REG_POWERPC_R20] = "r20", - [PERF_REG_POWERPC_R21] = "r21", - [PERF_REG_POWERPC_R22] = "r22", - [PERF_REG_POWERPC_R23] = "r23", - [PERF_REG_POWERPC_R24] = "r24", - [PERF_REG_POWERPC_R25] = "r25", - [PERF_REG_POWERPC_R26] = "r26", - [PERF_REG_POWERPC_R27] = "r27", - [PERF_REG_POWERPC_R28] = "r28", - [PERF_REG_POWERPC_R29] = "r29", - [PERF_REG_POWERPC_R30] = "r30", - [PERF_REG_POWERPC_R31] = "r31", - [PERF_REG_POWERPC_NIP] = "nip", - [PERF_REG_POWERPC_MSR] = "msr", - [PERF_REG_POWERPC_ORIG_R3] = "orig_r3", - [PERF_REG_POWERPC_CTR] = "ctr", - [PERF_REG_POWERPC_LINK] = "link", - [PERF_REG_POWERPC_XER] = "xer", - [PERF_REG_POWERPC_CCR] = "ccr", - [PERF_REG_POWERPC_SOFTE] = "softe", - [PERF_REG_POWERPC_TRAP] = "trap", - [PERF_REG_POWERPC_DAR] = "dar", - [PERF_REG_POWERPC_DSISR] = "dsisr", - [PERF_REG_POWERPC_SIER] = "sier", - [PERF_REG_POWERPC_MMCRA] = "mmcra", - [PERF_REG_POWERPC_MMCR0] = "mmcr0", - [PERF_REG_POWERPC_MMCR1] = "mmcr1", - [PERF_REG_POWERPC_MMCR2] = "mmcr2", - [PERF_REG_POWERPC_MMCR3] = "mmcr3", - [PERF_REG_POWERPC_SIER2] = "sier2", - [PERF_REG_POWERPC_SIER3] = "sier3", - [PERF_REG_POWERPC_PMC1] = "pmc1", - [PERF_REG_POWERPC_PMC2] = "pmc2", - [PERF_REG_POWERPC_PMC3] = "pmc3", - [PERF_REG_POWERPC_PMC4] = "pmc4", - [PERF_REG_POWERPC_PMC5] = "pmc5", - [PERF_REG_POWERPC_PMC6] = "pmc6", - [PERF_REG_POWERPC_SDAR] = "sdar", - [PERF_REG_POWERPC_SIAR] = "siar", -}; - -static inline const char *__perf_reg_name(int id) -{ - return reg_names[id]; -} #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index 3bf441257466..cf430a4c55b9 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -40,8 +40,12 @@ const char *arch_perf_header_entry(const char *se_header) { if (!strcmp(se_header, "Local INSTR Latency")) return "Finish Cyc"; - else if (!strcmp(se_header, "Pipeline Stage Cycle")) + else if (!strcmp(se_header, "INSTR Latency")) + return "Global Finish_cyc"; + else if (!strcmp(se_header, "Local Pipeline Stage Cycle")) return "Dispatch Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Global Dispatch_cyc"; return se_header; } @@ -49,5 +53,7 @@ int arch_support_sort_key(const char *sort_key) { if (!strcmp(sort_key, "p_stage_cyc")) return 1; + if (!strcmp(sort_key, "local_p_stage_cyc")) + return 1; return 0; } diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h index 6b02a767c918..6944bf0de53e 100644 --- a/tools/perf/arch/riscv/include/perf_regs.h +++ b/tools/perf/arch/riscv/include/perf_regs.h @@ -19,78 +19,4 @@ #define PERF_REG_IP PERF_REG_RISCV_PC #define PERF_REG_SP PERF_REG_RISCV_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_RISCV_PC: - return "pc"; - case PERF_REG_RISCV_RA: - return "ra"; - case PERF_REG_RISCV_SP: - return "sp"; - case PERF_REG_RISCV_GP: - return "gp"; - case PERF_REG_RISCV_TP: - return "tp"; - case PERF_REG_RISCV_T0: - return "t0"; - case PERF_REG_RISCV_T1: - return "t1"; - case PERF_REG_RISCV_T2: - return "t2"; - case PERF_REG_RISCV_S0: - return "s0"; - case PERF_REG_RISCV_S1: - return "s1"; - case PERF_REG_RISCV_A0: - return "a0"; - case PERF_REG_RISCV_A1: - return "a1"; - case PERF_REG_RISCV_A2: - return "a2"; - case PERF_REG_RISCV_A3: - return "a3"; - case PERF_REG_RISCV_A4: - return "a4"; - case PERF_REG_RISCV_A5: - return "a5"; - case PERF_REG_RISCV_A6: - return "a6"; - case PERF_REG_RISCV_A7: - return "a7"; - case PERF_REG_RISCV_S2: - return "s2"; - case PERF_REG_RISCV_S3: - return "s3"; - case PERF_REG_RISCV_S4: - return "s4"; - case PERF_REG_RISCV_S5: - return "s5"; - case PERF_REG_RISCV_S6: - return "s6"; - case PERF_REG_RISCV_S7: - return "s7"; - case PERF_REG_RISCV_S8: - return "s8"; - case PERF_REG_RISCV_S9: - return "s9"; - case PERF_REG_RISCV_S10: - return "s10"; - case PERF_REG_RISCV_S11: - return "s11"; - case PERF_REG_RISCV_T3: - return "t3"; - case PERF_REG_RISCV_T4: - return "t4"; - case PERF_REG_RISCV_T5: - return "t5"; - case PERF_REG_RISCV_T6: - return "t6"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index ed9c5c2eafad..799147658dee 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -452,3 +452,4 @@ # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index ce3031526623..52fcc0891da6 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -14,82 +14,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_S390_PC #define PERF_REG_SP PERF_REG_S390_R15 -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_S390_R0: - return "R0"; - case PERF_REG_S390_R1: - return "R1"; - case PERF_REG_S390_R2: - return "R2"; - case PERF_REG_S390_R3: - return "R3"; - case PERF_REG_S390_R4: - return "R4"; - case PERF_REG_S390_R5: - return "R5"; - case PERF_REG_S390_R6: - return "R6"; - case PERF_REG_S390_R7: - return "R7"; - case PERF_REG_S390_R8: - return "R8"; - case PERF_REG_S390_R9: - return "R9"; - case PERF_REG_S390_R10: - return "R10"; - case PERF_REG_S390_R11: - return "R11"; - case PERF_REG_S390_R12: - return "R12"; - case PERF_REG_S390_R13: - return "R13"; - case PERF_REG_S390_R14: - return "R14"; - case PERF_REG_S390_R15: - return "R15"; - case PERF_REG_S390_FP0: - return "FP0"; - case PERF_REG_S390_FP1: - return "FP1"; - case PERF_REG_S390_FP2: - return "FP2"; - case PERF_REG_S390_FP3: - return "FP3"; - case PERF_REG_S390_FP4: - return "FP4"; - case PERF_REG_S390_FP5: - return "FP5"; - case PERF_REG_S390_FP6: - return "FP6"; - case PERF_REG_S390_FP7: - return "FP7"; - case PERF_REG_S390_FP8: - return "FP8"; - case PERF_REG_S390_FP9: - return "FP9"; - case PERF_REG_S390_FP10: - return "FP10"; - case PERF_REG_S390_FP11: - return "FP11"; - case PERF_REG_S390_FP12: - return "FP12"; - case PERF_REG_S390_FP13: - return "FP13"; - case PERF_REG_S390_FP14: - return "FP14"; - case PERF_REG_S390_FP15: - return "FP15"; - case PERF_REG_S390_MASK: - return "MASK"; - case PERF_REG_S390_PC: - return "PC"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index fe8f8dd157b4..c84d12608cd2 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -371,6 +371,7 @@ 447 common memfd_secret sys_memfd_secret 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index cddc4cdc0d9b..16e23b722042 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -23,86 +23,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_X86_IP #define PERF_REG_SP PERF_REG_X86_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_X86_AX: - return "AX"; - case PERF_REG_X86_BX: - return "BX"; - case PERF_REG_X86_CX: - return "CX"; - case PERF_REG_X86_DX: - return "DX"; - case PERF_REG_X86_SI: - return "SI"; - case PERF_REG_X86_DI: - return "DI"; - case PERF_REG_X86_BP: - return "BP"; - case PERF_REG_X86_SP: - return "SP"; - case PERF_REG_X86_IP: - return "IP"; - case PERF_REG_X86_FLAGS: - return "FLAGS"; - case PERF_REG_X86_CS: - return "CS"; - case PERF_REG_X86_SS: - return "SS"; - case PERF_REG_X86_DS: - return "DS"; - case PERF_REG_X86_ES: - return "ES"; - case PERF_REG_X86_FS: - return "FS"; - case PERF_REG_X86_GS: - return "GS"; -#ifdef HAVE_ARCH_X86_64_SUPPORT - case PERF_REG_X86_R8: - return "R8"; - case PERF_REG_X86_R9: - return "R9"; - case PERF_REG_X86_R10: - return "R10"; - case PERF_REG_X86_R11: - return "R11"; - case PERF_REG_X86_R12: - return "R12"; - case PERF_REG_X86_R13: - return "R13"; - case PERF_REG_X86_R14: - return "R14"; - case PERF_REG_X86_R15: - return "R15"; -#endif /* HAVE_ARCH_X86_64_SUPPORT */ - -#define XMM(x) \ - case PERF_REG_X86_XMM ## x: \ - case PERF_REG_X86_XMM ## x + 1: \ - return "XMM" #x; - XMM(0) - XMM(1) - XMM(2) - XMM(3) - XMM(4) - XMM(5) - XMM(6) - XMM(7) - XMM(8) - XMM(9) - XMM(10) - XMM(11) - XMM(12) - XMM(13) - XMM(14) - XMM(15) -#undef XMM - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index 9708ae892061..ba429cadb18f 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -2197,6 +2197,924 @@ "3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",}, {{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 58 cb \tvaddph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 58 cb \tvaddph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 58 cb \tvaddph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 58 cb \tvaddsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2f ca \tvcomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5b ca \tvcvtdq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5b ca \tvcvtdq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5b ca \tvcvtdq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 48 5a ca \tvcvtpd2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 08 5a ca \tvcvtpd2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 28 5a ca \tvcvtpd2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 5b ca \tvcvtph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 5b ca \tvcvtph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 5b ca \tvcvtph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5a ca \tvcvtph2pd %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5a ca \tvcvtph2pd %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5a ca \tvcvtph2pd %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f2 7d 48 13 ca \tvcvtph2ps %ymm2,%zmm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 13 ca \tvcvtph2psx %ymm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 13 ca \tvcvtph2psx %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 13 ca \tvcvtph2psx %xmm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7b ca \tvcvtph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7b ca \tvcvtph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7b ca \tvcvtph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 79 ca \tvcvtph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 79 ca \tvcvtph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 79 ca \tvcvtph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 79 ca \tvcvtph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 79 ca \tvcvtph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 79 ca \tvcvtph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7d ca \tvcvtph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7d ca \tvcvtph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7d ca \tvcvtph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7d ca \tvcvtph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7d ca \tvcvtph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7d ca \tvcvtph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 1d ca \tvcvtps2phx %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 1d ca \tvcvtps2phx %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 1d ca \tvcvtps2phx %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 48 5b ca \tvcvtqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 08 5b ca \tvcvtqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 28 5b ca \tvcvtqq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 79 c1 \tvcvtsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 2a c8 \tvcvtsi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 1d cb \tvcvtss2sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 5b ca \tvcvttph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 5b ca \tvcvttph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 5b ca \tvcvttph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7a ca \tvcvttph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7a ca \tvcvttph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7a ca \tvcvttph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 78 ca \tvcvttph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 78 ca \tvcvttph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 78 ca \tvcvttph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 78 ca \tvcvttph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 78 ca \tvcvttph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 78 ca \tvcvttph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7c ca \tvcvttph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7c ca \tvcvttph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7c ca \tvcvttph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7c ca \tvcvttph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7c ca \tvcvttph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7c ca \tvcvttph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 2c c1 \tvcvttsh2si %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 78 c1 \tvcvttsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7a ca \tvcvtudq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7a ca \tvcvtudq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7a ca \tvcvtudq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 48 7a ca \tvcvtuqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 08 7a ca \tvcvtuqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 28 7a ca \tvcvtuqq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 7b c8 \tvcvtusi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7d ca \tvcvtuw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7d ca \tvcvtuw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7d ca \tvcvtuw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 7d ca \tvcvtw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 7d ca \tvcvtw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 7d ca \tvcvtw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5e cb \tvdivph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5e cb \tvdivph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5e cb \tvdivph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5e cb \tvdivsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 56 cb \tvfcmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 56 cb \tvfcmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 56 cb \tvfcmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 57 cb \tvfcmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 d6 cb \tvfcmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d6 cb \tvfcmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 d6 cb \tvfcmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d7 cb \tvfcmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 98 cb \tvfmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 98 cb \tvfmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 98 cb \tvfmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 99 cb \tvfmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a8 cb \tvfmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a8 cb \tvfmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a8 cb \tvfmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a9 cb \tvfmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b8 cb \tvfmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b8 cb \tvfmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b8 cb \tvfmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b9 cb \tvfmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 56 cb \tvfmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 56 cb \tvfmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 56 cb \tvfmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 57 cb \tvfmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 96 cb \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 96 cb \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 96 cb \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a6 cb \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a6 cb \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a6 cb \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b6 cb \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b6 cb \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b6 cb \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9a cb \tvfmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9a cb \tvfmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9a cb \tvfmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9b cb \tvfmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 aa cb \tvfmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 aa cb \tvfmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 aa cb \tvfmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ab cb \tvfmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ba cb \tvfmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ba cb \tvfmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ba cb \tvfmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bb cb \tvfmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 97 cb \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 97 cb \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 97 cb \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a7 cb \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a7 cb \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a7 cb \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b7 cb \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b7 cb \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b7 cb \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 d6 cb \tvfmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d6 cb \tvfmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 d6 cb \tvfmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d7 cb \tvfmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9c cb \tvfnmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9c cb \tvfnmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9c cb \tvfnmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9d cb \tvfnmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ac cb \tvfnmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ac cb \tvfnmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ac cb \tvfnmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ad cb \tvfnmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 bc cb \tvfnmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bc cb \tvfnmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 bc cb \tvfnmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bd cb \tvfnmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9e cb \tvfnmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9e cb \tvfnmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9e cb \tvfnmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9f cb \tvfnmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ae cb \tvfnmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ae cb \tvfnmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ae cb \tvfnmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 af cb \tvfnmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 be cb \tvfnmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 be cb \tvfnmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 be cb \tvfnmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bf cb \tvfnmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 42 ca \tvgetexpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 42 ca \tvgetexpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 42 ca \tvgetexpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 43 cb \tvgetexpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5f cb \tvmaxph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5f cb \tvmaxph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5f cb \tvmaxph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5f cb \tvmaxsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5d cb \tvminph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5d cb \tvminph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5d cb \tvminph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5d cb \tvminsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 10 cb \tvmovsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 7e c8 \tvmovw %xmm1,%eax",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 6e c8 \tvmovw %eax,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 59 cb \tvmulph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 59 cb \tvmulph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 59 cb \tvmulph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 59 cb \tvmulsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4c ca \tvrcpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4c ca \tvrcpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4c ca \tvrcpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4d cb \tvrcpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4e ca \tvrsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4e ca \tvrsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4e ca \tvrsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4f cb \tvrsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 2c cb \tvscalefph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2c cb \tvscalefph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 2c cb \tvscalefph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2d cb \tvscalefsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 51 ca \tvsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 51 ca \tvsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 51 ca \tvsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 51 cb \tvsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5c cb \tvsubph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5c cb \tvsubph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5c cb \tvsubph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5c cb \tvsubsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2e ca \tvucomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "", +"f3 0f 3a f0 c0 00 \threset $0x0",}, +{{0x0f, 0x01, 0xe8, }, 3, 0, "", "", +"0f 01 e8 \tserialize ",}, +{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "", +"f2 0f 01 e9 \txresldtrk ",}, +{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f2 0f 01 e8 \txsusldtrk ",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 5da17d41d302..3a47e98fec33 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -2459,6 +2459,1432 @@ "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",}, {{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect", "3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 e2 78 49 04 c8 \tldtilecfg (%rax,%rcx,8)",}, +{{0xc4, 0xc2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 c2 78 49 04 c8 \tldtilecfg (%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 e2 79 49 04 c8 \tsttilecfg (%rax,%rcx,8)",}, +{{0xc4, 0xc2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 c2 79 49 04 c8 \tsttilecfg (%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x7a, 0x5c, 0xd1, }, 5, 0, "", "", +"c4 e2 7a 5c d1 \ttdpbf16ps %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x7b, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 7b 5e d1 \ttdpbssd %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x7a, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 7a 5e d1 \ttdpbsud %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x79, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 79 5e d1 \ttdpbusd %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x78, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 78 5e d1 \ttdpbuud %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x7b, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "", +"c4 e2 7b 4b 0c c8 \ttileloadd (%rax,%rcx,8),%tmm1",}, +{{0xc4, 0xc2, 0x7b, 0x4b, 0x14, 0xc8, }, 6, 0, "", "", +"c4 c2 7b 4b 14 c8 \ttileloadd (%r8,%rcx,8),%tmm2",}, +{{0xc4, 0xe2, 0x79, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "", +"c4 e2 79 4b 0c c8 \ttileloaddt1 (%rax,%rcx,8),%tmm1",}, +{{0xc4, 0xc2, 0x79, 0x4b, 0x14, 0xc8, }, 6, 0, "", "", +"c4 c2 79 4b 14 c8 \ttileloaddt1 (%r8,%rcx,8),%tmm2",}, +{{0xc4, 0xe2, 0x78, 0x49, 0xc0, }, 5, 0, "", "", +"c4 e2 78 49 c0 \ttilerelease ",}, +{{0xc4, 0xe2, 0x7a, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "", +"c4 e2 7a 4b 0c c8 \ttilestored %tmm1,(%rax,%rcx,8)",}, +{{0xc4, 0xc2, 0x7a, 0x4b, 0x14, 0xc8, }, 6, 0, "", "", +"c4 c2 7a 4b 14 c8 \ttilestored %tmm2,(%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x7b, 0x49, 0xc0, }, 5, 0, "", "", +"c4 e2 7b 49 c0 \ttilezero %tmm0",}, +{{0xc4, 0xe2, 0x7b, 0x49, 0xf8, }, 5, 0, "", "", +"c4 e2 7b 49 f8 \ttilezero %tmm7",}, +{{0xf3, 0x0f, 0x01, 0xee, }, 4, 0, "", "", +"f3 0f 01 ee \tclui ",}, +{{0xf3, 0x0f, 0xc7, 0xf0, }, 4, 0, "", "", +"f3 0f c7 f0 \tsenduipi %rax",}, +{{0xf3, 0x41, 0x0f, 0xc7, 0xf0, }, 5, 0, "", "", +"f3 41 0f c7 f0 \tsenduipi %r8",}, +{{0xf3, 0x0f, 0x01, 0xef, }, 4, 0, "", "", +"f3 0f 01 ef \tstui ",}, +{{0xf3, 0x0f, 0x01, 0xed, }, 4, 0, "", "", +"f3 0f 01 ed \ttestui ",}, +{{0xf3, 0x0f, 0x01, 0xec, }, 4, 0, "", "", +"f3 0f 01 ec \tuiret ",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 58 cb \tvaddph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 58 cb \tvaddph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 58 cb \tvaddph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 58 cb \tvaddsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%zmm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%xmm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%ymm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%rax,%rcx,8),%xmm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2f ca \tvcomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5b ca \tvcvtdq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5b ca \tvcvtdq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5b ca \tvcvtdq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 48 5a ca \tvcvtpd2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 08 5a ca \tvcvtpd2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 28 5a ca \tvcvtpd2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 5b ca \tvcvtph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 5b ca \tvcvtph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 5b ca \tvcvtph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5a ca \tvcvtph2pd %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5a ca \tvcvtph2pd %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5a ca \tvcvtph2pd %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f2 7d 48 13 ca \tvcvtph2ps %ymm2,%zmm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 13 ca \tvcvtph2psx %ymm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 13 ca \tvcvtph2psx %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 13 ca \tvcvtph2psx %xmm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7b ca \tvcvtph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7b ca \tvcvtph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7b ca \tvcvtph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 79 ca \tvcvtph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 79 ca \tvcvtph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 79 ca \tvcvtph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 79 ca \tvcvtph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 79 ca \tvcvtph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 79 ca \tvcvtph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7d ca \tvcvtph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7d ca \tvcvtph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7d ca \tvcvtph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7d ca \tvcvtph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7d ca \tvcvtph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7d ca \tvcvtph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 1d ca \tvcvtps2phx %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 1d ca \tvcvtps2phx %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 1d ca \tvcvtps2phx %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 48 5b ca \tvcvtqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 08 5b ca \tvcvtqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 28 5b ca \tvcvtqq2ph %ymm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%rax",}, +{{0x67, 0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 79 c1 \tvcvtsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0xc1, }, 6, 0, "", "", +"62 f5 fe 08 79 c1 \tvcvtsh2usi %xmm1,%rax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%rax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%rax",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 2a c8 \tvcvtsi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0xee, 0x08, 0x2a, 0xc8, }, 6, 0, "", "", +"62 f5 ee 08 2a c8 \tvcvtsi2sh %rax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 1d cb \tvcvtss2sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 5b ca \tvcvttph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 5b ca \tvcvttph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 5b ca \tvcvttph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7a ca \tvcvttph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7a ca \tvcvttph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7a ca \tvcvttph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 78 ca \tvcvttph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 78 ca \tvcvttph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 78 ca \tvcvttph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 78 ca \tvcvttph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 78 ca \tvcvttph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 78 ca \tvcvttph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7c ca \tvcvttph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7c ca \tvcvttph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7c ca \tvcvttph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7c ca \tvcvttph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7c ca \tvcvttph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7c ca \tvcvttph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 2c c1 \tvcvttsh2si %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0xc1, }, 6, 0, "", "", +"62 f5 fe 08 2c c1 \tvcvttsh2si %xmm1,%rax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%rax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%rax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 78 c1 \tvcvttsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0xc1, }, 6, 0, "", "", +"62 f5 fe 08 78 c1 \tvcvttsh2usi %xmm1,%rax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%rax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%rax",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7a ca \tvcvtudq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7a ca \tvcvtudq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7a ca \tvcvtudq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 48 7a ca \tvcvtuqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 08 7a ca \tvcvtuqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 28 7a ca \tvcvtuqq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 7b c8 \tvcvtusi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0xee, 0x08, 0x7b, 0xc8, }, 6, 0, "", "", +"62 f5 ee 08 7b c8 \tvcvtusi2sh %rax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7d ca \tvcvtuw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7d ca \tvcvtuw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7d ca \tvcvtuw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 7d ca \tvcvtw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 7d ca \tvcvtw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 7d ca \tvcvtw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5e cb \tvdivph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5e cb \tvdivph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5e cb \tvdivph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5e cb \tvdivsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 56 cb \tvfcmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 56 cb \tvfcmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 56 cb \tvfcmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 57 cb \tvfcmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 d6 cb \tvfcmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d6 cb \tvfcmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 d6 cb \tvfcmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d7 cb \tvfcmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 98 cb \tvfmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 98 cb \tvfmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 98 cb \tvfmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 99 cb \tvfmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a8 cb \tvfmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a8 cb \tvfmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a8 cb \tvfmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a9 cb \tvfmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b8 cb \tvfmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b8 cb \tvfmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b8 cb \tvfmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b9 cb \tvfmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 56 cb \tvfmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 56 cb \tvfmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 56 cb \tvfmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 57 cb \tvfmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 96 cb \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 96 cb \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 96 cb \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a6 cb \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a6 cb \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a6 cb \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b6 cb \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b6 cb \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b6 cb \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9a cb \tvfmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9a cb \tvfmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9a cb \tvfmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9b cb \tvfmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 aa cb \tvfmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 aa cb \tvfmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 aa cb \tvfmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ab cb \tvfmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ba cb \tvfmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ba cb \tvfmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ba cb \tvfmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bb cb \tvfmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 97 cb \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 97 cb \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 97 cb \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a7 cb \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a7 cb \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a7 cb \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b7 cb \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b7 cb \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b7 cb \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 d6 cb \tvfmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d6 cb \tvfmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 d6 cb \tvfmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d7 cb \tvfmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9c cb \tvfnmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9c cb \tvfnmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9c cb \tvfnmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9d cb \tvfnmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ac cb \tvfnmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ac cb \tvfnmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ac cb \tvfnmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ad cb \tvfnmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 bc cb \tvfnmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bc cb \tvfnmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 bc cb \tvfnmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bd cb \tvfnmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9e cb \tvfnmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9e cb \tvfnmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9e cb \tvfnmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9f cb \tvfnmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ae cb \tvfnmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ae cb \tvfnmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ae cb \tvfnmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 af cb \tvfnmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 be cb \tvfnmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 be cb \tvfnmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 be cb \tvfnmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bf cb \tvfnmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%rax,%rcx,8),%k5",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 42 ca \tvgetexpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 42 ca \tvgetexpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 42 ca \tvgetexpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 43 cb \tvgetexpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5f cb \tvmaxph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5f cb \tvmaxph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5f cb \tvmaxph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5f cb \tvmaxsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5d cb \tvminph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5d cb \tvminph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5d cb \tvminph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5d cb \tvminsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 10 cb \tvmovsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 7e c8 \tvmovw %xmm1,%eax",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 6e c8 \tvmovw %eax,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 59 cb \tvmulph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 59 cb \tvmulph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 59 cb \tvmulph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 59 cb \tvmulsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4c ca \tvrcpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4c ca \tvrcpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4c ca \tvrcpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4d cb \tvrcpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4e ca \tvrsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4e ca \tvrsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4e ca \tvrsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4f cb \tvrsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 2c cb \tvscalefph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2c cb \tvscalefph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 2c cb \tvscalefph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2d cb \tvscalefsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 51 ca \tvsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 51 ca \tvsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 51 ca \tvsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 51 cb \tvsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5c cb \tvsubph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5c cb \tvsubph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5c cb \tvsubph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5c cb \tvsubsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2e ca \tvucomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "", +"f3 0f 3a f0 c0 00 \threset $0x0",}, +{{0x0f, 0x01, 0xe8, }, 3, 0, "", "", +"0f 01 e8 \tserialize ",}, +{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "", +"f2 0f 01 e9 \txresldtrk ",}, +{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f2 0f 01 e8 \txsusldtrk ",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index c3808e94c46e..a391464c8dee 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1910,6 +1910,724 @@ int main(void) asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + /* AMX */ + + asm volatile("ldtilecfg (%rax,%rcx,8)"); + asm volatile("ldtilecfg (%r8,%rcx,8)"); + asm volatile("sttilecfg (%rax,%rcx,8)"); + asm volatile("sttilecfg (%r8,%rcx,8)"); + asm volatile("tdpbf16ps %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbssd %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbsud %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbusd %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbuud %tmm0, %tmm1, %tmm2"); + asm volatile("tileloadd (%rax,%rcx,8), %tmm1"); + asm volatile("tileloadd (%r8,%rcx,8), %tmm2"); + asm volatile("tileloaddt1 (%rax,%rcx,8), %tmm1"); + asm volatile("tileloaddt1 (%r8,%rcx,8), %tmm2"); + asm volatile("tilerelease"); + asm volatile("tilestored %tmm1, (%rax,%rcx,8)"); + asm volatile("tilestored %tmm2, (%r8,%rcx,8)"); + asm volatile("tilezero %tmm0"); + asm volatile("tilezero %tmm7"); + + /* User Interrupt */ + + asm volatile("clui"); + asm volatile("senduipi %rax"); + asm volatile("senduipi %r8"); + asm volatile("stui"); + asm volatile("testui"); + asm volatile("uiret"); + + /* AVX512-FP16 */ + + asm volatile("vaddph %zmm3, %zmm2, %zmm1"); + asm volatile("vaddph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vaddph %xmm3, %xmm2, %xmm1"); + asm volatile("vaddph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vaddph %ymm3, %ymm2, %ymm1"); + asm volatile("vaddph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vaddsh %xmm3, %xmm2, %xmm1"); + asm volatile("vaddsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %zmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5"); + asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %ymm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5"); + asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5"); + asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcomish %xmm2, %xmm1"); + asm volatile("vcomish 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtdq2ph %zmm2, %ymm1"); + asm volatile("vcvtdq2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtdq2ph %xmm2, %xmm1"); + asm volatile("vcvtdq2ph %ymm2, %xmm1"); + asm volatile("vcvtpd2ph %zmm2, %xmm1"); + asm volatile("vcvtpd2ph %xmm2, %xmm1"); + asm volatile("vcvtpd2ph %ymm2, %xmm1"); + asm volatile("vcvtph2dq %ymm2, %zmm1"); + asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2dq %xmm2, %xmm1"); + asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2dq %xmm2, %ymm1"); + asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2pd %xmm2, %zmm1"); + asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2pd %xmm2, %xmm1"); + asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2pd %xmm2, %ymm1"); + asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %ymm2, %zmm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2psx %ymm2, %zmm1"); + asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2psx %xmm2, %xmm1"); + asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2psx %xmm2, %ymm1"); + asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2qq %xmm2, %zmm1"); + asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2qq %xmm2, %xmm1"); + asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2qq %xmm2, %ymm1"); + asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2udq %ymm2, %zmm1"); + asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2udq %xmm2, %xmm1"); + asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2udq %xmm2, %ymm1"); + asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uqq %xmm2, %zmm1"); + asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uqq %xmm2, %xmm1"); + asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uqq %xmm2, %ymm1"); + asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uw %zmm2, %zmm1"); + asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uw %xmm2, %xmm1"); + asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uw %ymm2, %ymm1"); + asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2w %zmm2, %zmm1"); + asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2w %xmm2, %xmm1"); + asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2w %ymm2, %ymm1"); + asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1"); + asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2phx %zmm2, %ymm1"); + asm volatile("vcvtps2phx 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2phx %xmm2, %xmm1"); + asm volatile("vcvtps2phx %ymm2, %xmm1"); + asm volatile("vcvtqq2ph %zmm2, %xmm1"); + asm volatile("vcvtqq2ph %xmm2, %xmm1"); + asm volatile("vcvtqq2ph %ymm2, %xmm1"); + asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2usi %xmm1, %eax"); + asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %eax"); + asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsh2usi %xmm1, %rax"); + asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %rax"); + asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh %rax, %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1"); + asm volatile("vcvtss2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvttph2dq %ymm2, %zmm1"); + asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2dq %xmm2, %xmm1"); + asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2dq %xmm2, %ymm1"); + asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2qq %xmm2, %zmm1"); + asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2qq %xmm2, %xmm1"); + asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2qq %xmm2, %ymm1"); + asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2udq %ymm2, %zmm1"); + asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2udq %xmm2, %xmm1"); + asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2udq %xmm2, %ymm1"); + asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uqq %xmm2, %zmm1"); + asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uqq %xmm2, %xmm1"); + asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uqq %xmm2, %ymm1"); + asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uw %zmm2, %zmm1"); + asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uw %xmm2, %xmm1"); + asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uw %ymm2, %ymm1"); + asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2w %zmm2, %zmm1"); + asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2w %xmm2, %xmm1"); + asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2w %ymm2, %ymm1"); + asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttsh2si %xmm1, %eax"); + asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %eax"); + asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvttsh2si %xmm1, %rax"); + asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %rax"); + asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvttsh2usi %xmm1, %eax"); + asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %eax"); + asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvttsh2usi %xmm1, %rax"); + asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %rax"); + asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvtudq2ph %zmm2, %ymm1"); + asm volatile("vcvtudq2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtudq2ph %xmm2, %xmm1"); + asm volatile("vcvtudq2ph %ymm2, %xmm1"); + asm volatile("vcvtuqq2ph %zmm2, %xmm1"); + asm volatile("vcvtuqq2ph %xmm2, %xmm1"); + asm volatile("vcvtuqq2ph %ymm2, %xmm1"); + asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh %rax, %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtuw2ph %zmm2, %zmm1"); + asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtuw2ph %xmm2, %xmm1"); + asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtuw2ph %ymm2, %ymm1"); + asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtw2ph %zmm2, %zmm1"); + asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtw2ph %xmm2, %xmm1"); + asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtw2ph %ymm2, %ymm1"); + asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vdivph %zmm3, %zmm2, %zmm1"); + asm volatile("vdivph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vdivph %xmm3, %xmm2, %xmm1"); + asm volatile("vdivph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vdivph %ymm3, %ymm2, %ymm1"); + asm volatile("vdivph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vdivsh %xmm3, %xmm2, %xmm1"); + asm volatile("vdivsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfpclassph $0x12, %zmm1, %k5"); + asm volatile("vfpclassph $0x12, %xmm1, %k5"); + asm volatile("vfpclassph $0x12, %ymm1, %k5"); + asm volatile("vfpclasssh $0x12, %xmm1, %k5"); + asm volatile("vfpclasssh $0x12, 0x12345678(%rax,%rcx,8), %k5"); + asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5"); + asm volatile("vgetexpph %zmm2, %zmm1"); + asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetexpph %xmm2, %xmm1"); + asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetexpph %ymm2, %ymm1"); + asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vgetexpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, %zmm2, %zmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetmantph $0x12, %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetmantph $0x12, %ymm2, %ymm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vgetmantsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %zmm3, %zmm2, %zmm1"); + asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmaxph %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %ymm3, %ymm2, %ymm1"); + asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmaxsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %zmm3, %zmm2, %zmm1"); + asm volatile("vminph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vminph %xmm3, %xmm2, %xmm1"); + asm volatile("vminph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %ymm3, %ymm2, %ymm1"); + asm volatile("vminph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vminsh %xmm3, %xmm2, %xmm1"); + asm volatile("vminsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmovsh %xmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovsh 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmovsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmovw %xmm1, %eax"); + asm volatile("vmovw %xmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovw %eax, %xmm1"); + asm volatile("vmovw 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmulph %zmm3, %zmm2, %zmm1"); + asm volatile("vmulph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmulph %xmm3, %xmm2, %xmm1"); + asm volatile("vmulph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmulph %ymm3, %ymm2, %ymm1"); + asm volatile("vmulph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmulsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmulsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrcpph %zmm2, %zmm1"); + asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrcpph %xmm2, %xmm1"); + asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrcpph %ymm2, %ymm1"); + asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrcpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrcpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, %zmm2, %zmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vreduceph $0x12, %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vreduceph $0x12, %ymm2, %ymm1"); + asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vreducesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, %zmm2, %zmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrndscaleph $0x12, %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrndscaleph $0x12, %ymm2, %ymm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vrndscalesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrsqrtph %zmm2, %zmm1"); + asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrsqrtph %xmm2, %xmm1"); + asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrsqrtph %ymm2, %ymm1"); + asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %zmm3, %zmm2, %zmm1"); + asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vscalefph %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %ymm3, %ymm2, %ymm1"); + asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vscalefsh %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsqrtph %zmm2, %zmm1"); + asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vsqrtph %xmm2, %xmm1"); + asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vsqrtph %ymm2, %ymm1"); + asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %zmm3, %zmm2, %zmm1"); + asm volatile("vsubph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vsubph %xmm3, %xmm2, %xmm1"); + asm volatile("vsubph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %ymm3, %ymm2, %ymm1"); + asm volatile("vsubph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vsubsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsubsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vucomish %xmm2, %xmm1"); + asm volatile("vucomish 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1"); + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ @@ -3670,8 +4388,479 @@ int main(void) asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + /* AVX512-FP16 */ + + asm volatile("vaddph %zmm3, %zmm2, %zmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vaddph %xmm3, %xmm2, %xmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vaddph %ymm3, %ymm2, %ymm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vaddsh %xmm3, %xmm2, %xmm1"); + asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5"); + asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5"); + asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcomish %xmm2, %xmm1"); + asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtdq2ph %zmm2, %ymm1"); + asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtdq2ph %xmm2, %xmm1"); + asm volatile("vcvtdq2ph %ymm2, %xmm1"); + asm volatile("vcvtpd2ph %zmm2, %xmm1"); + asm volatile("vcvtpd2ph %xmm2, %xmm1"); + asm volatile("vcvtpd2ph %ymm2, %xmm1"); + asm volatile("vcvtph2dq %ymm2, %zmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2dq %xmm2, %xmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2dq %xmm2, %ymm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2pd %xmm2, %zmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2pd %xmm2, %xmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2pd %xmm2, %ymm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %ymm2, %zmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2psx %ymm2, %zmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2psx %xmm2, %xmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2psx %xmm2, %ymm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2qq %xmm2, %zmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2qq %xmm2, %xmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2qq %xmm2, %ymm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2udq %ymm2, %zmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2udq %xmm2, %xmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2udq %xmm2, %ymm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uqq %xmm2, %zmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uqq %xmm2, %xmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uqq %xmm2, %ymm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uw %zmm2, %zmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uw %xmm2, %xmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uw %ymm2, %ymm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2w %zmm2, %zmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2w %xmm2, %xmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2w %ymm2, %ymm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1"); + asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2phx %zmm2, %ymm1"); + asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2phx %xmm2, %xmm1"); + asm volatile("vcvtps2phx %ymm2, %xmm1"); + asm volatile("vcvtqq2ph %zmm2, %xmm1"); + asm volatile("vcvtqq2ph %xmm2, %xmm1"); + asm volatile("vcvtqq2ph %ymm2, %xmm1"); + asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2usi %xmm1, %eax"); + asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1"); + asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvttph2dq %ymm2, %zmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2dq %xmm2, %xmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2dq %xmm2, %ymm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2qq %xmm2, %zmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2qq %xmm2, %xmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2qq %xmm2, %ymm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2udq %ymm2, %zmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2udq %xmm2, %xmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2udq %xmm2, %ymm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uqq %xmm2, %zmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uqq %xmm2, %xmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uqq %xmm2, %ymm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uw %zmm2, %zmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uw %xmm2, %xmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uw %ymm2, %ymm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2w %zmm2, %zmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2w %xmm2, %xmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2w %ymm2, %ymm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttsh2si %xmm1, %eax"); + asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvttsh2usi %xmm1, %eax"); + asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtudq2ph %zmm2, %ymm1"); + asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtudq2ph %xmm2, %xmm1"); + asm volatile("vcvtudq2ph %ymm2, %xmm1"); + asm volatile("vcvtuqq2ph %zmm2, %xmm1"); + asm volatile("vcvtuqq2ph %xmm2, %xmm1"); + asm volatile("vcvtuqq2ph %ymm2, %xmm1"); + asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtuw2ph %zmm2, %zmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtuw2ph %xmm2, %xmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtuw2ph %ymm2, %ymm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtw2ph %zmm2, %zmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtw2ph %xmm2, %xmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtw2ph %ymm2, %ymm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vdivph %zmm3, %zmm2, %zmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vdivph %xmm3, %xmm2, %xmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vdivph %ymm3, %ymm2, %ymm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vdivsh %xmm3, %xmm2, %xmm1"); + asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfpclassph $0x12, %zmm1, %k5"); + asm volatile("vfpclassph $0x12, %xmm1, %k5"); + asm volatile("vfpclassph $0x12, %ymm1, %k5"); + asm volatile("vfpclasssh $0x12, %xmm1, %k5"); + asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5"); + asm volatile("vgetexpph %zmm2, %zmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetexpph %xmm2, %xmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetexpph %ymm2, %ymm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, %zmm2, %zmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetmantph $0x12, %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetmantph $0x12, %ymm2, %ymm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %zmm3, %zmm2, %zmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmaxph %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %ymm3, %ymm2, %ymm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmaxsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %zmm3, %zmm2, %zmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vminph %xmm3, %xmm2, %xmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %ymm3, %ymm2, %ymm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vminsh %xmm3, %xmm2, %xmm1"); + asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmovsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmovw %xmm1, %eax"); + asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovw %eax, %xmm1"); + asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmulph %zmm3, %zmm2, %zmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmulph %xmm3, %xmm2, %xmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmulph %ymm3, %ymm2, %ymm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmulsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrcpph %zmm2, %zmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrcpph %xmm2, %xmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrcpph %ymm2, %ymm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrcpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, %zmm2, %zmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vreduceph $0x12, %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vreduceph $0x12, %ymm2, %ymm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, %zmm2, %zmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrndscaleph $0x12, %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrndscaleph $0x12, %ymm2, %ymm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrsqrtph %zmm2, %zmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrsqrtph %xmm2, %xmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrsqrtph %ymm2, %ymm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %zmm3, %zmm2, %zmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vscalefph %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %ymm3, %ymm2, %ymm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vscalefsh %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsqrtph %zmm2, %zmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vsqrtph %xmm2, %xmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vsqrtph %ymm2, %ymm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %zmm3, %zmm2, %zmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vsubph %xmm3, %xmm2, %xmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %ymm3, %ymm2, %ymm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vsubsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vucomish %xmm2, %xmm1"); + asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1"); + #endif /* #ifndef __x86_64__ */ + /* Prediction history reset */ + + asm volatile("hreset $0"); + + /* Serialize instruction execution */ + + asm volatile("serialize"); + + /* TSX suspend load address tracking */ + + asm volatile("xresldtrk"); + asm volatile("xsusldtrk"); + /* SGX */ asm volatile("encls"); diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 0b0951030a2f..8d9b55959256 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -17,3 +17,20 @@ int arch_evlist__add_default_attrs(struct evlist *evlist) else return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL); } + +struct evsel *arch_evlist__leader(struct list_head *list) +{ + struct evsel *evsel, *first; + + first = list_first_entry(list, struct evsel, core.node); + + if (!pmu_have_event("cpu", "slots")) + return first; + + __evlist__for_each_entry(list, evsel) { + if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && + evsel->name && strcasestr(evsel->name, "slots")) + return evsel; + } + return first; +} diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index ddaca75c3bc0..134612bde0cb 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -106,7 +106,7 @@ static void nest_epollfd(void) printinfo("Nesting level(s): %d\n", nested); epollfdp = calloc(nested, sizeof(int)); - if (!epollfd) + if (!epollfdp) err(EXIT_FAILURE, "calloc"); for (i = 0; i < nested; i++) { @@ -253,7 +253,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) @@ -333,7 +333,7 @@ int bench_epoll_ctl(int argc, const char **argv) /* default to the number of CPUs */ if (!nthreads) - nthreads = cpu->nr; + nthreads = perf_cpu_map__nr(cpu); worker = calloc(nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 79d13dbc0a47..37de970c9743 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -342,7 +342,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) @@ -452,7 +452,7 @@ int bench_epoll_wait(int argc, const char **argv) /* default to the number of CPUs and leave one for the writer pthread */ if (!nthreads) - nthreads = cpu->nr - 1; + nthreads = perf_cpu_map__nr(cpu) - 1; worker = calloc(nthreads, sizeof(*worker)); if (!worker) { diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index 482738e9bdad..de56601f69ee 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -71,7 +71,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist) int cnt = 0; evlist__for_each_entry(evlist, evsel) - cnt += evsel->core.threads->nr * evsel->core.cpus->nr; + cnt += evsel->core.threads->nr * perf_cpu_map__nr(evsel->core.cpus); return cnt; } @@ -151,7 +151,7 @@ static int bench_evlist_open_close__run(char *evstr) init_stats(&time_stats); - printf(" Number of cpus:\t%d\n", evlist->core.cpus->nr); + printf(" Number of cpus:\t%d\n", perf_cpu_map__nr(evlist->core.cpus)); printf(" Number of threads:\t%d\n", evlist->core.threads->nr); printf(" Number of events:\t%d (%d fds)\n", evlist->core.nr_entries, evlist__count_evsel_fds(evlist)); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index fcdea3e44937..dbcecec4eeda 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -150,7 +150,7 @@ int bench_futex_hash(int argc, const char **argv) } if (!params.nthreads) /* default to the number of CPUs */ - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) @@ -177,7 +177,7 @@ int bench_futex_hash(int argc, const char **argv) goto errmem; CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 137890f78e17..6fc9a3d55c1f 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -136,7 +136,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, worker[i].futex = &global_futex; CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -173,7 +173,7 @@ int bench_futex_lock_pi(int argc, const char **argv) } if (!params.nthreads) - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index f7a5ffebb940..2f59d5d1c509 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -131,7 +131,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -175,7 +175,7 @@ int bench_futex_requeue(int argc, const char **argv) } if (!params.nthreads) - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 0983f40b4b40..861deb934745 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -152,7 +152,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -252,7 +252,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "calloc"); if (!params.nthreads) - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); /* some sanity checks */ if (params.nwakes > params.nthreads || diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 2226a475e782..cfda48bef1d7 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -105,7 +105,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -151,7 +151,7 @@ int bench_futex_wake(int argc, const char **argv) } if (!params.nthreads) - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index d0895162c2ba..d291f3a8af5f 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -226,7 +226,6 @@ static void run_collection(struct collection *coll) if (!bench->fn) break; printf("# Running %s/%s benchmark...\n", coll->name, bench->name); - fflush(stdout); argv[1] = bench->name; run_bench(coll->name, bench->name, bench->fn, 1, argv); @@ -247,6 +246,9 @@ int cmd_bench(int argc, const char **argv) struct collection *coll; int ret = 0; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + if (argc < 2) { /* No collection specified. */ print_usage(); @@ -300,7 +302,6 @@ int cmd_bench(int argc, const char **argv) if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); - fflush(stdout); ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1); goto end; } diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 0db3cfc04c47..cd381693658b 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -351,10 +351,14 @@ static int build_id_cache__show_all(void) static int perf_buildid_cache_config(const char *var, const char *value, void *cb) { - const char **debuginfod = cb; + struct perf_debuginfod *di = cb; - if (!strcmp(var, "buildid-cache.debuginfod")) - *debuginfod = strdup(value); + if (!strcmp(var, "buildid-cache.debuginfod")) { + di->urls = strdup(value); + if (!di->urls) + return -ENOMEM; + di->set = true; + } return 0; } @@ -373,8 +377,8 @@ int cmd_buildid_cache(int argc, const char **argv) *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, - *kcore_filename = NULL, - *debuginfod = NULL; + *kcore_filename = NULL; + struct perf_debuginfod debuginfod = { }; char sbuf[STRERR_BUFSIZE]; struct perf_data data = { @@ -399,8 +403,10 @@ int cmd_buildid_cache(int argc, const char **argv) OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_STRING('u', "update", &update_name_list_str, "file list", "file(s) to update"), - OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url", - "set debuginfod url"), + OPT_STRING_OPTARG_SET(0, "debuginfod", &debuginfod.urls, + &debuginfod.set, "debuginfod urls", + "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", + "system"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"), OPT_END() @@ -425,10 +431,7 @@ int cmd_buildid_cache(int argc, const char **argv) if (argc || !(list_files || opts_flag)) usage_with_options(buildid_cache_usage, buildid_cache_options); - if (debuginfod) { - pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod); - setenv("DEBUGINFOD_URLS", debuginfod, 1); - } + perf_debuginfod_setup(&debuginfod); /* -l is exclusive. It can not be used with other options. */ if (list_files && opts_flag) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b5c67ef73862..77dd4afacca4 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2015,7 +2015,8 @@ static int setup_nodes(struct perf_session *session) { struct numa_node *n; unsigned long **nodes; - int node, cpu; + int node, idx; + struct perf_cpu cpu; int *cpu2node; if (c2c.node_info > 2) @@ -2038,8 +2039,8 @@ static int setup_nodes(struct perf_session *session) if (!cpu2node) return -ENOMEM; - for (cpu = 0; cpu < c2c.cpus_cnt; cpu++) - cpu2node[cpu] = -1; + for (idx = 0; idx < c2c.cpus_cnt; idx++) + cpu2node[idx] = -1; c2c.cpu2node = cpu2node; @@ -2057,13 +2058,13 @@ static int setup_nodes(struct perf_session *session) if (perf_cpu_map__empty(map)) continue; - for (cpu = 0; cpu < map->nr; cpu++) { - set_bit(map->map[cpu], set); + perf_cpu_map__for_each_cpu(cpu, idx, map) { + set_bit(cpu.cpu, set); - if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug")) + if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug")) return -EINVAL; - cpu2node[map->map[cpu]] = node; + cpu2node[cpu.cpu] = node; } } diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 87cb11a7a3ee..a8785dec5ca6 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -13,7 +13,9 @@ #include <signal.h> #include <stdlib.h> #include <fcntl.h> +#include <math.h> #include <poll.h> +#include <ctype.h> #include <linux/capability.h> #include <linux/string.h> @@ -28,36 +30,12 @@ #include "strfilter.h" #include "util/cap.h" #include "util/config.h" +#include "util/ftrace.h" #include "util/units.h" #include "util/parse-sublevel-options.h" #define DEFAULT_TRACER "function_graph" -struct perf_ftrace { - struct evlist *evlist; - struct target target; - const char *tracer; - struct list_head filters; - struct list_head notrace; - struct list_head graph_funcs; - struct list_head nograph_funcs; - int graph_depth; - unsigned long percpu_buffer_size; - bool inherit; - int func_stack_trace; - int func_irq_info; - int graph_nosleep_time; - int graph_noirqs; - int graph_verbose; - int graph_thresh; - unsigned int initial_delay; -}; - -struct filter_entry { - struct list_head list; - char name[]; -}; - static volatile int workload_exec_errno; static bool done; @@ -303,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap) int ret; int last_cpu; - last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1); + last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu; mask_size = last_cpu / 4 + 2; /* one more byte for EOS */ mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ @@ -565,7 +543,24 @@ static int set_tracing_options(struct perf_ftrace *ftrace) return 0; } -static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) +static void select_tracer(struct perf_ftrace *ftrace) +{ + bool graph = !list_empty(&ftrace->graph_funcs) || + !list_empty(&ftrace->nograph_funcs); + bool func = !list_empty(&ftrace->filters) || + !list_empty(&ftrace->notrace); + + /* The function_graph has priority over function tracer. */ + if (graph) + ftrace->tracer = "function_graph"; + else if (func) + ftrace->tracer = "function"; + /* Otherwise, the default tracer is used. */ + + pr_debug("%s tracer is used\n", ftrace->tracer); +} + +static int __cmd_ftrace(struct perf_ftrace *ftrace) { char *trace_file; int trace_fd; @@ -586,10 +581,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) return -1; } - signal(SIGINT, sig_handler); - signal(SIGUSR1, sig_handler); - signal(SIGCHLD, sig_handler); - signal(SIGPIPE, sig_handler); + select_tracer(ftrace); if (reset_tracing_files(ftrace) < 0) { pr_err("failed to reset ftrace\n"); @@ -600,11 +592,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) if (write_tracing_file("trace", "0") < 0) goto out; - if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false, - ftrace__workload_exec_failed_signal) < 0) { - goto out; - } - if (set_tracing_options(ftrace) < 0) goto out_reset; @@ -693,6 +680,270 @@ out: return (done && !workload_exec_errno) ? 0 : -1; } +static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf) +{ + char *p, *q; + char *unit; + double num; + int i; + + /* ensure NUL termination */ + buf[len] = '\0'; + + /* handle data line by line */ + for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) { + *q = '\0'; + /* move it to the line buffer */ + strcat(linebuf, p); + + /* + * parse trace output to get function duration like in + * + * # tracer: function_graph + * # + * # CPU DURATION FUNCTION CALLS + * # | | | | | | | + * 1) + 10.291 us | do_filp_open(); + * 1) 4.889 us | do_filp_open(); + * 1) 6.086 us | do_filp_open(); + * + */ + if (linebuf[0] == '#') + goto next; + + /* ignore CPU */ + p = strchr(linebuf, ')'); + if (p == NULL) + p = linebuf; + + while (*p && !isdigit(*p) && (*p != '|')) + p++; + + /* no duration */ + if (*p == '\0' || *p == '|') + goto next; + + num = strtod(p, &unit); + if (!unit || strncmp(unit, " us", 3)) + goto next; + + i = log2(num); + if (i < 0) + i = 0; + if (i >= NUM_BUCKET) + i = NUM_BUCKET - 1; + + buckets[i]++; + +next: + /* empty the line buffer for the next output */ + linebuf[0] = '\0'; + } + + /* preserve any remaining output (before newline) */ + strcat(linebuf, p); +} + +static void display_histogram(int buckets[]) +{ + int i; + int total = 0; + int bar_total = 46; /* to fit in 80 column */ + char bar[] = "###############################################"; + int bar_len; + + for (i = 0; i < NUM_BUCKET; i++) + total += buckets[i]; + + if (total == 0) { + printf("No data found\n"); + return; + } + + printf("# %14s | %10s | %-*s |\n", + " DURATION ", "COUNT", bar_total, "GRAPH"); + + bar_len = buckets[0] * bar_total / total; + printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", + 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); + + for (i = 1; i < NUM_BUCKET - 1; i++) { + int start = (1 << (i - 1)); + int stop = 1 << i; + const char *unit = "us"; + + if (start >= 1024) { + start >>= 10; + stop >>= 10; + unit = "ms"; + } + bar_len = buckets[i] * bar_total / total; + printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", + start, stop, unit, buckets[i], bar_len, bar, + bar_total - bar_len, ""); + } + + bar_len = buckets[NUM_BUCKET - 1] * bar_total / total; + printf(" %4d - %-4s %s | %10d | %.*s%*s |\n", + 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar, + bar_total - bar_len, ""); + +} + +static int prepare_func_latency(struct perf_ftrace *ftrace) +{ + char *trace_file; + int fd; + + if (ftrace->target.use_bpf) + return perf_ftrace__latency_prepare_bpf(ftrace); + + if (reset_tracing_files(ftrace) < 0) { + pr_err("failed to reset ftrace\n"); + return -1; + } + + /* reset ftrace buffer */ + if (write_tracing_file("trace", "0") < 0) + return -1; + + if (set_tracing_options(ftrace) < 0) + return -1; + + /* force to use the function_graph tracer to track duration */ + if (write_tracing_file("current_tracer", "function_graph") < 0) { + pr_err("failed to set current_tracer to function_graph\n"); + return -1; + } + + trace_file = get_tracing_file("trace_pipe"); + if (!trace_file) { + pr_err("failed to open trace_pipe\n"); + return -1; + } + + fd = open(trace_file, O_RDONLY); + if (fd < 0) + pr_err("failed to open trace_pipe\n"); + + put_tracing_file(trace_file); + return fd; +} + +static int start_func_latency(struct perf_ftrace *ftrace) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_start_bpf(ftrace); + + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + return -1; + } + + return 0; +} + +static int stop_func_latency(struct perf_ftrace *ftrace) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_stop_bpf(ftrace); + + write_tracing_file("tracing_on", "0"); + return 0; +} + +static int read_func_latency(struct perf_ftrace *ftrace, int buckets[]) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_read_bpf(ftrace, buckets); + + return 0; +} + +static int cleanup_func_latency(struct perf_ftrace *ftrace) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_cleanup_bpf(ftrace); + + reset_tracing_files(ftrace); + return 0; +} + +static int __cmd_latency(struct perf_ftrace *ftrace) +{ + int trace_fd; + char buf[4096]; + char line[256]; + struct pollfd pollfd = { + .events = POLLIN, + }; + int buckets[NUM_BUCKET] = { }; + + if (!(perf_cap__capable(CAP_PERFMON) || + perf_cap__capable(CAP_SYS_ADMIN))) { + pr_err("ftrace only works for %s!\n", +#ifdef HAVE_LIBCAP_SUPPORT + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" +#else + "root" +#endif + ); + return -1; + } + + trace_fd = prepare_func_latency(ftrace); + if (trace_fd < 0) + goto out; + + fcntl(trace_fd, F_SETFL, O_NONBLOCK); + pollfd.fd = trace_fd; + + if (start_func_latency(ftrace) < 0) + goto out; + + evlist__start_workload(ftrace->evlist); + + line[0] = '\0'; + while (!done) { + if (poll(&pollfd, 1, -1) < 0) + break; + + if (pollfd.revents & POLLIN) { + int n = read(trace_fd, buf, sizeof(buf) - 1); + if (n < 0) + break; + + make_histogram(buckets, buf, n, line); + } + } + + stop_func_latency(ftrace); + + if (workload_exec_errno) { + const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf)); + pr_err("workload failed: %s\n", emsg); + goto out; + } + + /* read remaining buffer contents */ + while (!ftrace->target.use_bpf) { + int n = read(trace_fd, buf, sizeof(buf) - 1); + if (n <= 0) + break; + make_histogram(buckets, buf, n, line); + } + + read_func_latency(ftrace, buckets); + + display_histogram(buckets); + +out: + close(trace_fd); + cleanup_func_latency(ftrace); + + return (done && !workload_exec_errno) ? 0 : -1; +} + static int perf_ftrace_config(const char *var, const char *value, void *cb) { struct perf_ftrace *ftrace = cb; @@ -855,41 +1106,21 @@ static int parse_graph_tracer_opts(const struct option *opt, return 0; } -static void select_tracer(struct perf_ftrace *ftrace) -{ - bool graph = !list_empty(&ftrace->graph_funcs) || - !list_empty(&ftrace->nograph_funcs); - bool func = !list_empty(&ftrace->filters) || - !list_empty(&ftrace->notrace); - - /* The function_graph has priority over function tracer. */ - if (graph) - ftrace->tracer = "function_graph"; - else if (func) - ftrace->tracer = "function"; - /* Otherwise, the default tracer is used. */ - - pr_debug("%s tracer is used\n", ftrace->tracer); -} +enum perf_ftrace_subcommand { + PERF_FTRACE_NONE, + PERF_FTRACE_TRACE, + PERF_FTRACE_LATENCY, +}; int cmd_ftrace(int argc, const char **argv) { int ret; + int (*cmd_func)(struct perf_ftrace *) = NULL; struct perf_ftrace ftrace = { .tracer = DEFAULT_TRACER, .target = { .uid = UINT_MAX, }, }; - const char * const ftrace_usage[] = { - "perf ftrace [<options>] [<command>]", - "perf ftrace [<options>] -- <command> [<options>]", - NULL - }; - const struct option ftrace_options[] = { - OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", - "Tracer to use: function_graph(default) or function"), - OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", - "Show available functions to filter", - opt_list_avail_functions, "*"), + const struct option common_options[] = { OPT_STRING('p', "pid", &ftrace.target.pid, "pid", "Trace on existing process id"), /* TODO: Add short option -t after -t/--tracer can be removed. */ @@ -901,6 +1132,14 @@ int cmd_ftrace(int argc, const char **argv) "System-wide collection from all CPUs"), OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", "List of cpus to monitor"), + OPT_END() + }; + const struct option ftrace_options[] = { + OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", + "Tracer to use: function_graph(default) or function"), + OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", + "Show available functions to filter", + opt_list_avail_functions, "*"), OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", "Trace given functions using function tracer", parse_filter_func), @@ -923,24 +1162,87 @@ int cmd_ftrace(int argc, const char **argv) "Trace children processes"), OPT_UINTEGER('D', "delay", &ftrace.initial_delay, "Number of milliseconds to wait before starting tracing after program start"), - OPT_END() + OPT_PARENT(common_options), + }; + const struct option latency_options[] = { + OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", + "Show latency of given function", parse_filter_func), +#ifdef HAVE_BPF_SKEL + OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, + "Use BPF to measure function latency"), +#endif + OPT_PARENT(common_options), + }; + const struct option *options = ftrace_options; + + const char * const ftrace_usage[] = { + "perf ftrace [<options>] [<command>]", + "perf ftrace [<options>] -- [<command>] [<options>]", + "perf ftrace {trace|latency} [<options>] [<command>]", + "perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]", + NULL }; + enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE; INIT_LIST_HEAD(&ftrace.filters); INIT_LIST_HEAD(&ftrace.notrace); INIT_LIST_HEAD(&ftrace.graph_funcs); INIT_LIST_HEAD(&ftrace.nograph_funcs); + signal(SIGINT, sig_handler); + signal(SIGUSR1, sig_handler); + signal(SIGCHLD, sig_handler); + signal(SIGPIPE, sig_handler); + ret = perf_config(perf_ftrace_config, &ftrace); if (ret < 0) return -1; - argc = parse_options(argc, argv, ftrace_options, ftrace_usage, + if (argc > 1) { + if (!strcmp(argv[1], "trace")) { + subcmd = PERF_FTRACE_TRACE; + } else if (!strcmp(argv[1], "latency")) { + subcmd = PERF_FTRACE_LATENCY; + options = latency_options; + } + + if (subcmd != PERF_FTRACE_NONE) { + argc--; + argv++; + } + } + /* for backward compatibility */ + if (subcmd == PERF_FTRACE_NONE) + subcmd = PERF_FTRACE_TRACE; + + argc = parse_options(argc, argv, options, ftrace_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc && target__none(&ftrace.target)) - ftrace.target.system_wide = true; + if (argc < 0) { + ret = -EINVAL; + goto out_delete_filters; + } - select_tracer(&ftrace); + switch (subcmd) { + case PERF_FTRACE_TRACE: + if (!argc && target__none(&ftrace.target)) + ftrace.target.system_wide = true; + cmd_func = __cmd_ftrace; + break; + case PERF_FTRACE_LATENCY: + if (list_empty(&ftrace.filters)) { + pr_err("Should provide a function to measure\n"); + parse_options_usage(ftrace_usage, options, "T", 1); + ret = -EINVAL; + goto out_delete_filters; + } + cmd_func = __cmd_latency; + break; + case PERF_FTRACE_NONE: + default: + pr_err("Invalid subcommand\n"); + ret = -EINVAL; + goto out_delete_filters; + } ret = target__validate(&ftrace.target); if (ret) { @@ -961,7 +1263,15 @@ int cmd_ftrace(int argc, const char **argv) if (ret < 0) goto out_delete_evlist; - ret = __cmd_ftrace(&ftrace, argc, argv); + if (argc) { + ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target, + argv, false, + ftrace__workload_exec_failed_signal); + if (ret < 0) + goto out_delete_evlist; + } + + ret = cmd_func(&ftrace); out_delete_evlist: evlist__delete(ftrace.evlist); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 409b721666cb..fbf43a454cba 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -535,12 +535,9 @@ static int perf_event__repipe_exit(struct perf_tool *tool, static int perf_event__repipe_tracing_data(struct perf_session *session, union perf_event *event) { - int err; - perf_event__repipe_synth(session->tool, event); - err = perf_event__process_tracing_data(session, event); - return err; + return perf_event__process_tracing_data(session, event); } static int dso__read_build_id(struct dso *dso) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index da03a341c63c..99d7ff9a8eff 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -192,7 +192,7 @@ static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_samp int ret = evsel__process_alloc_event(evsel, sample); if (!ret) { - int node1 = cpu__get_node(sample->cpu), + int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), node2 = evsel__intval(evsel, sample, "node"); if (node1 != node2) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0338b813585a..bb716c953d02 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -111,6 +111,7 @@ struct record { unsigned long long samples; struct mmap_cpu_mask affinity_mask; unsigned long output_max_size; /* = 0: unlimited */ + struct perf_debuginfod debuginfod; }; static volatile int done; @@ -2177,6 +2178,12 @@ static int perf_record_config(const char *var, const char *value, void *cb) rec->opts.nr_cblocks = nr_cblocks_default; } #endif + if (!strcmp(var, "record.debuginfod")) { + rec->debuginfod.urls = strdup(value); + if (!rec->debuginfod.urls) + return -ENOMEM; + rec->debuginfod.set = true; + } return 0; } @@ -2267,6 +2274,10 @@ out_free: return ret; } +void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) +{ +} + static int parse_control_option(const struct option *opt, const char *str, int unset __maybe_unused) @@ -2663,6 +2674,10 @@ static struct option __record_options[] = { parse_control_option), OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", "Fine-tune event synthesis: default=all", parse_record_synth_option), + OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, + &record.debuginfod.set, "debuginfod urls", + "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", + "system"), OPT_END() }; @@ -2716,6 +2731,8 @@ int cmd_record(int argc, const char **argv) if (err) return err; + perf_debuginfod_setup(&record.debuginfod); + /* Make system wide (-a) the default target. */ if (!argc && target__none(&rec->opts.target)) rec->opts.target.system_wide = true; @@ -2792,7 +2809,7 @@ int cmd_record(int argc, const char **argv) symbol__init(NULL); if (rec->opts.affinity != PERF_AFFINITY_SYS) { - rec->affinity_mask.nbits = cpu__max_cpu(); + rec->affinity_mask.nbits = cpu__max_cpu().cpu; rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits); if (!rec->affinity_mask.bits) { pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits); @@ -2898,6 +2915,10 @@ int cmd_record(int argc, const char **argv) } rec->opts.target.hybrid = perf_pmu__has_hybrid(); + + if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) + arch__add_leaf_frame_record_opts(&rec->opts); + err = -ENOMEM; if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8ae400429870..1dd92d8c9279 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep) } } - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env)); if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" @@ -1127,7 +1127,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused, * on events sample_type. */ sample_type = evlist__combined_sample_type(*pevlist); - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); return 0; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 4527f632ebe4..72d446de9c60 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -167,7 +167,7 @@ struct trace_sched_handler { struct perf_sched_map { DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); - int *comp_cpus; + struct perf_cpu *comp_cpus; bool comp; struct perf_thread_map *color_pids; const char *color_pids_str; @@ -191,7 +191,7 @@ struct perf_sched { * Track the current task - that way we can know whether there's any * weird events, such as a task being switched away that is not current. */ - int max_cpu; + struct perf_cpu max_cpu; u32 curr_pid[MAX_CPUS]; struct thread *curr_thread[MAX_CPUS]; char next_shortname1; @@ -1535,28 +1535,31 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, int new_shortname; u64 timestamp0, timestamp = sample->time; s64 delta; - int i, this_cpu = sample->cpu; + int i; + struct perf_cpu this_cpu = { + .cpu = sample->cpu, + }; int cpus_nr; bool new_cpu = false; const char *color = PERF_COLOR_NORMAL; char stimestamp[32]; - BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); + BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); - if (this_cpu > sched->max_cpu) + if (this_cpu.cpu > sched->max_cpu.cpu) sched->max_cpu = this_cpu; if (sched->map.comp) { cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); - if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { + if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { sched->map.comp_cpus[cpus_nr++] = this_cpu; new_cpu = true; } } else - cpus_nr = sched->max_cpu; + cpus_nr = sched->max_cpu.cpu; - timestamp0 = sched->cpu_last_switched[this_cpu]; - sched->cpu_last_switched[this_cpu] = timestamp; + timestamp0 = sched->cpu_last_switched[this_cpu.cpu]; + sched->cpu_last_switched[this_cpu.cpu] = timestamp; if (timestamp0) delta = timestamp - timestamp0; else @@ -1577,7 +1580,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, return -1; } - sched->curr_thread[this_cpu] = thread__get(sched_in); + sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); printf(" "); @@ -1608,8 +1611,10 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, } for (i = 0; i < cpus_nr; i++) { - int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; - struct thread *curr_thread = sched->curr_thread[cpu]; + struct perf_cpu cpu = { + .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i, + }; + struct thread *curr_thread = sched->curr_thread[cpu.cpu]; struct thread_runtime *curr_tr; const char *pid_color = color; const char *cpu_color = color; @@ -1617,19 +1622,19 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, if (curr_thread && thread__has_color(curr_thread)) pid_color = COLOR_PIDS; - if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) + if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu)) continue; - if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) + if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) cpu_color = COLOR_CPUS; - if (cpu != this_cpu) + if (cpu.cpu != this_cpu.cpu) color_fprintf(stdout, color, " "); else color_fprintf(stdout, cpu_color, "*"); - if (sched->curr_thread[cpu]) { - curr_tr = thread__get_runtime(sched->curr_thread[cpu]); + if (sched->curr_thread[cpu.cpu]) { + curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]); if (curr_tr == NULL) { thread__put(sched_in); return -1; @@ -1639,7 +1644,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, color_fprintf(stdout, color, " "); } - if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) + if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu)) goto out; timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); @@ -1929,7 +1934,7 @@ static char *timehist_get_commstr(struct thread *thread) static void timehist_header(struct perf_sched *sched) { - u32 ncpus = sched->max_cpu + 1; + u32 ncpus = sched->max_cpu.cpu + 1; u32 i, j; printf("%15s %6s ", "time", "cpu"); @@ -2008,7 +2013,7 @@ static void timehist_print_sample(struct perf_sched *sched, struct thread_runtime *tr = thread__priv(thread); const char *next_comm = evsel__strval(evsel, sample, "next_comm"); const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); - u32 max_cpus = sched->max_cpu + 1; + u32 max_cpus = sched->max_cpu.cpu + 1; char tstr[64]; char nstr[30]; u64 wait_time; @@ -2389,7 +2394,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); printf("%15s [%04d] ", tstr, sample->cpu); if (sched->show_cpu_visual) - printf(" %*s ", sched->max_cpu + 1, ""); + printf(" %*s ", sched->max_cpu.cpu + 1, ""); printf(" %-*s ", comm_width, timehist_get_commstr(thread)); @@ -2449,13 +2454,13 @@ static void timehist_print_migration_event(struct perf_sched *sched, { struct thread *thread; char tstr[64]; - u32 max_cpus = sched->max_cpu + 1; + u32 max_cpus; u32 ocpu, dcpu; if (sched->summary_only) return; - max_cpus = sched->max_cpu + 1; + max_cpus = sched->max_cpu.cpu + 1; ocpu = evsel__intval(evsel, sample, "orig_cpu"); dcpu = evsel__intval(evsel, sample, "dest_cpu"); @@ -2918,7 +2923,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" Total scheduling time (msec): "); print_sched_time(hist_time, 2); - printf(" (x %d)\n", sched->max_cpu); + printf(" (x %d)\n", sched->max_cpu.cpu); } typedef int (*sched_handler)(struct perf_tool *tool, @@ -2935,9 +2940,11 @@ static int perf_timehist__process_sample(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); int err = 0; - int this_cpu = sample->cpu; + struct perf_cpu this_cpu = { + .cpu = sample->cpu, + }; - if (this_cpu > sched->max_cpu) + if (this_cpu.cpu > sched->max_cpu.cpu) sched->max_cpu = this_cpu; if (evsel->handler != NULL) { @@ -3054,10 +3061,10 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; /* pre-allocate struct for per-CPU idle stats */ - sched->max_cpu = session->header.env.nr_cpus_online; - if (sched->max_cpu == 0) - sched->max_cpu = 4; - if (init_idle_threads(sched->max_cpu)) + sched->max_cpu.cpu = session->header.env.nr_cpus_online; + if (sched->max_cpu.cpu == 0) + sched->max_cpu.cpu = 4; + if (init_idle_threads(sched->max_cpu.cpu)) goto out; /* summary_only implies summary option, but don't overwrite summary if set */ @@ -3209,10 +3216,10 @@ static int setup_map_cpus(struct perf_sched *sched) { struct perf_cpu_map *map; - sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); + sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF); if (sched->map.comp) { - sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); + sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int)); if (!sched->map.comp_cpus) return -1; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c82b033e8942..fa478ddcd18a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -15,6 +15,7 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" +#include "util/env.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/evsel_fprintf.h" @@ -462,7 +463,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(WEIGHT) && - evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT)) + evsel__do_check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT, allow_user_set)) return -EINVAL; if (PRINT_FIELD(SYM) && @@ -514,7 +515,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(PHYS_ADDR) && - evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) + evsel__do_check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR, allow_user_set)) return -EINVAL; if (PRINT_FIELD(DATA_PAGE_SIZE) && @@ -648,7 +649,7 @@ out: return 0; } -static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, +static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch, FILE *fp) { unsigned i = 0, r; @@ -661,7 +662,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; - printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val); + printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val); } return printed; @@ -718,17 +719,17 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen, } static int perf_sample__fprintf_iregs(struct perf_sample *sample, - struct perf_event_attr *attr, FILE *fp) + struct perf_event_attr *attr, const char *arch, FILE *fp) { return perf_sample__fprintf_regs(&sample->intr_regs, - attr->sample_regs_intr, fp); + attr->sample_regs_intr, arch, fp); } static int perf_sample__fprintf_uregs(struct perf_sample *sample, - struct perf_event_attr *attr, FILE *fp) + struct perf_event_attr *attr, const char *arch, FILE *fp) { return perf_sample__fprintf_regs(&sample->user_regs, - attr->sample_regs_user, fp); + attr->sample_regs_user, arch, fp); } static int perf_sample__fprintf_start(struct perf_script *script, @@ -2000,6 +2001,7 @@ static void process_event(struct perf_script *script, struct evsel_script *es = evsel->priv; FILE *fp = es->fp; char str[PAGE_SIZE_NAME_LEN]; + const char *arch = perf_env__arch(machine->env); if (output[type].fields == 0) return; @@ -2066,10 +2068,10 @@ static void process_event(struct perf_script *script, } if (PRINT_FIELD(IREGS)) - perf_sample__fprintf_iregs(sample, attr, fp); + perf_sample__fprintf_iregs(sample, attr, arch, fp); if (PRINT_FIELD(UREGS)) - perf_sample__fprintf_uregs(sample, attr, fp); + perf_sample__fprintf_uregs(sample, attr, arch, fp); if (PRINT_FIELD(BRSTACK)) perf_sample__fprintf_brstack(sample, thread, attr, fp); @@ -2113,8 +2115,8 @@ static struct scripting_ops *scripting_ops; static void __process_stat(struct evsel *counter, u64 tstamp) { int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = evsel__nr_cpus(counter); - int cpu, thread; + int idx, thread; + struct perf_cpu cpu; static int header_printed; if (counter->core.system_wide) @@ -2127,13 +2129,13 @@ static void __process_stat(struct evsel *counter, u64 tstamp) } for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { + perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { struct perf_counts_values *counts; - counts = perf_counts(counter->counts, cpu, thread); + counts = perf_counts(counter->counts, idx, thread); printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n", - counter->core.cpus->map[cpu], + cpu.cpu, perf_thread_map__pid(counter->core.threads, thread), counts->val, counts->ena, @@ -2316,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, * on events sample_type. */ sample_type = evlist__combined_sample_type(evlist); - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); /* Enable fields for callchain entries */ if (symbol_conf.use_callchain && @@ -3466,16 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script) struct perf_session *session = script->session; u64 sample_type = evlist__combined_sample_type(session->evlist); - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { - if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) { - callchain_param.record_mode = CALLCHAIN_DWARF; - dwarf_callchain_users = true; - } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) - callchain_param.record_mode = CALLCHAIN_LBR; - else - callchain_param.record_mode = CALLCHAIN_FP; - } + callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env)); if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7974933dbc77..3f98689dd687 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -230,11 +230,12 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b) if (!a->core.cpus || !b->core.cpus) return false; - if (a->core.cpus->nr != b->core.cpus->nr) + if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus)) return false; - for (int i = 0; i < a->core.cpus->nr; i++) { - if (a->core.cpus->map[i] != b->core.cpus->map[i]) + for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) { + if (perf_cpu_map__cpu(a->core.cpus, i).cpu != + perf_cpu_map__cpu(b->core.cpus, i).cpu) return false; } @@ -327,34 +328,35 @@ static int write_stat_round_event(u64 tm, u64 type) #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) -static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, +static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread, struct perf_counts_values *count) { - struct perf_sample_id *sid = SID(counter, cpu, thread); + struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread); + struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx); return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, process_synthesized_event, NULL); } -static int read_single_counter(struct evsel *counter, int cpu, +static int read_single_counter(struct evsel *counter, int cpu_map_idx, int thread, struct timespec *rs) { if (counter->tool_event == PERF_TOOL_DURATION_TIME) { u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; struct perf_counts_values *count = - perf_counts(counter->counts, cpu, thread); + perf_counts(counter->counts, cpu_map_idx, thread); count->ena = count->run = val; count->val = val; return 0; } - return evsel__read_counter(counter, cpu, thread); + return evsel__read_counter(counter, cpu_map_idx, thread); } /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) +static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx) { int nthreads = perf_thread_map__nr(evsel_list->core.threads); int thread; @@ -368,24 +370,24 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) for (thread = 0; thread < nthreads; thread++) { struct perf_counts_values *count; - count = perf_counts(counter->counts, cpu, thread); + count = perf_counts(counter->counts, cpu_map_idx, thread); /* * The leader's group read loads data into its group members * (via evsel__read_counter()) and sets their count->loaded. */ - if (!perf_counts__is_loaded(counter->counts, cpu, thread) && - read_single_counter(counter, cpu, thread, rs)) { + if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) && + read_single_counter(counter, cpu_map_idx, thread, rs)) { counter->counts->scaled = -1; - perf_counts(counter->counts, cpu, thread)->ena = 0; - perf_counts(counter->counts, cpu, thread)->run = 0; + perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0; + perf_counts(counter->counts, cpu_map_idx, thread)->run = 0; return -1; } - perf_counts__set_loaded(counter->counts, cpu, thread, false); + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false); if (STAT_RECORD) { - if (evsel__write_stat_event(counter, cpu, thread, count)) { + if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) { pr_err("failed to write stat event\n"); return -1; } @@ -395,7 +397,8 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) fprintf(stat_config.output, "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", evsel__name(counter), - cpu, + perf_cpu_map__cpu(evsel__cpus(counter), + cpu_map_idx).cpu, count->val, count->ena, count->run); } } @@ -405,36 +408,33 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) static int read_affinity_counters(struct timespec *rs) { - struct evsel *counter; - struct affinity affinity; - int i, ncpus, cpu; + struct evlist_cpu_iterator evlist_cpu_itr; + struct affinity saved_affinity, *affinity; if (all_counters_use_bpf) return 0; - if (affinity__setup(&affinity) < 0) + if (!target__has_cpu(&target) || target__has_per_thread(&target)) + affinity = NULL; + else if (affinity__setup(&saved_affinity) < 0) return -1; + else + affinity = &saved_affinity; - ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus); - if (!target__has_cpu(&target) || target__has_per_thread(&target)) - ncpus = 1; - evlist__for_each_cpu(evsel_list, i, cpu) { - if (i >= ncpus) - break; - affinity__set(&affinity, cpu); + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { + struct evsel *counter = evlist_cpu_itr.evsel; - evlist__for_each_entry(evsel_list, counter) { - if (evsel__cpu_iter_skip(counter, cpu)) - continue; - if (evsel__is_bpf(counter)) - continue; - if (!counter->err) { - counter->err = read_counter_cpu(counter, rs, - counter->cpu_iter - 1); - } + if (evsel__is_bpf(counter)) + continue; + + if (!counter->err) { + counter->err = read_counter_cpu(counter, rs, + evlist_cpu_itr.cpu_map_idx); } } - affinity__cleanup(&affinity); + if (affinity) + affinity__cleanup(&saved_affinity); + return 0; } @@ -788,8 +788,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int status = 0; const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; - struct affinity affinity; - int i, cpu, err; + struct evlist_cpu_iterator evlist_cpu_itr; + struct affinity saved_affinity, *affinity = NULL; + int err; bool second_pass = false; if (forks) { @@ -803,8 +804,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (group) evlist__set_leader(evsel_list); - if (affinity__setup(&affinity) < 0) - return -1; + if (!cpu_map__is_dummy(evsel_list->core.cpus)) { + if (affinity__setup(&saved_affinity) < 0) + return -1; + affinity = &saved_affinity; + } evlist__for_each_entry(evsel_list, counter) { if (bpf_counter__load(counter, &target)) @@ -813,103 +817,98 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) all_counters_use_bpf = false; } - evlist__for_each_cpu (evsel_list, i, cpu) { + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { + counter = evlist_cpu_itr.evsel; + /* * bperf calls evsel__open_per_cpu() in bperf__load(), so * no need to call it again here. */ if (target.use_bpf) break; - affinity__set(&affinity, cpu); - evlist__for_each_entry(evsel_list, counter) { - if (evsel__cpu_iter_skip(counter, cpu)) + if (counter->reset_group || counter->errored) + continue; + if (evsel__is_bpf(counter)) + continue; +try_again: + if (create_perf_stat_counter(counter, &stat_config, &target, + evlist_cpu_itr.cpu_map_idx) < 0) { + + /* + * Weak group failed. We cannot just undo this here + * because earlier CPUs might be in group mode, and the kernel + * doesn't support mixing group and non group reads. Defer + * it to later. + * Don't close here because we're in the wrong affinity. + */ + if ((errno == EINVAL || errno == EBADF) && + evsel__leader(counter) != counter && + counter->weak_group) { + evlist__reset_weak_group(evsel_list, counter, false); + assert(counter->reset_group); + second_pass = true; continue; - if (counter->reset_group || counter->errored) + } + + switch (stat_handle_error(counter)) { + case COUNTER_FATAL: + return -1; + case COUNTER_RETRY: + goto try_again; + case COUNTER_SKIP: continue; - if (evsel__is_bpf(counter)) + default: + break; + } + + } + counter->supported = true; + } + + if (second_pass) { + /* + * Now redo all the weak group after closing them, + * and also close errored counters. + */ + + /* First close errored or weak retry */ + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { + counter = evlist_cpu_itr.evsel; + + if (!counter->reset_group && !counter->errored) continue; -try_again: + + perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx); + } + /* Now reopen weak */ + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { + counter = evlist_cpu_itr.evsel; + + if (!counter->reset_group && !counter->errored) + continue; + if (!counter->reset_group) + continue; +try_again_reset: + pr_debug2("reopening weak %s\n", evsel__name(counter)); if (create_perf_stat_counter(counter, &stat_config, &target, - counter->cpu_iter - 1) < 0) { - - /* - * Weak group failed. We cannot just undo this here - * because earlier CPUs might be in group mode, and the kernel - * doesn't support mixing group and non group reads. Defer - * it to later. - * Don't close here because we're in the wrong affinity. - */ - if ((errno == EINVAL || errno == EBADF) && - evsel__leader(counter) != counter && - counter->weak_group) { - evlist__reset_weak_group(evsel_list, counter, false); - assert(counter->reset_group); - second_pass = true; - continue; - } + evlist_cpu_itr.cpu_map_idx) < 0) { switch (stat_handle_error(counter)) { case COUNTER_FATAL: return -1; case COUNTER_RETRY: - goto try_again; + goto try_again_reset; case COUNTER_SKIP: continue; default: break; } - } counter->supported = true; } } - - if (second_pass) { - /* - * Now redo all the weak group after closing them, - * and also close errored counters. - */ - - evlist__for_each_cpu(evsel_list, i, cpu) { - affinity__set(&affinity, cpu); - /* First close errored or weak retry */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->reset_group && !counter->errored) - continue; - if (evsel__cpu_iter_skip_no_inc(counter, cpu)) - continue; - perf_evsel__close_cpu(&counter->core, counter->cpu_iter); - } - /* Now reopen weak */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->reset_group && !counter->errored) - continue; - if (evsel__cpu_iter_skip(counter, cpu)) - continue; - if (!counter->reset_group) - continue; -try_again_reset: - pr_debug2("reopening weak %s\n", evsel__name(counter)); - if (create_perf_stat_counter(counter, &stat_config, &target, - counter->cpu_iter - 1) < 0) { - - switch (stat_handle_error(counter)) { - case COUNTER_FATAL: - return -1; - case COUNTER_RETRY: - goto try_again_reset; - case COUNTER_SKIP: - continue; - default: - break; - } - } - counter->supported = true; - } - } - } - affinity__cleanup(&affinity); + affinity__cleanup(affinity); evlist__for_each_entry(evsel_list, counter) { if (!counter->supported) { @@ -1168,6 +1167,26 @@ static int parse_stat_cgroups(const struct option *opt, return parse_cgroups(opt, str, unset); } +static int parse_hybrid_type(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + struct evlist *evlist = *(struct evlist **)opt->value; + + if (!list_empty(&evlist->core.entries)) { + fprintf(stderr, "Must define cputype before events/metrics\n"); + return -1; + } + + evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str); + if (!evlist->hybrid_pmu_name) { + fprintf(stderr, "--cputype %s is not supported!\n", str); + return -1; + } + + return 0; +} + static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1282,6 +1301,10 @@ static struct option stat_options[] = { "don't print 'summary' for CSV summary output"), OPT_BOOLEAN(0, "quiet", &stat_config.quiet, "don't print output (useful with record)"), + OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", + "Only enable events on applying cpu with this type " + "for hybrid platform (e.g. core or atom)", + parse_hybrid_type), #ifdef HAVE_LIBPFM OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", "libpfm4 event selector. use 'perf list' to list available events", @@ -1298,70 +1321,75 @@ static struct option stat_options[] = { OPT_END() }; +static const char *const aggr_mode__string[] = { + [AGGR_CORE] = "core", + [AGGR_DIE] = "die", + [AGGR_GLOBAL] = "global", + [AGGR_NODE] = "node", + [AGGR_NONE] = "none", + [AGGR_SOCKET] = "socket", + [AGGR_THREAD] = "thread", + [AGGR_UNSET] = "unset", +}; + static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + struct perf_cpu cpu) { - return cpu_map__get_socket(map, cpu, NULL); + return aggr_cpu_id__socket(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + struct perf_cpu cpu) { - return cpu_map__get_die(map, cpu, NULL); + return aggr_cpu_id__die(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + struct perf_cpu cpu) { - return cpu_map__get_core(map, cpu, NULL); + return aggr_cpu_id__core(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + struct perf_cpu cpu) { - return cpu_map__get_node(map, cpu, NULL); + return aggr_cpu_id__node(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, - aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) + aggr_get_id_t get_id, struct perf_cpu cpu) { - int cpu; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (idx >= map->nr) - return id; + if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) + config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu); - cpu = map->map[idx]; - - if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu])) - config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); - - id = config->cpus_aggr_map->map[cpu]; + id = config->cpus_aggr_map->map[cpu.cpu]; return id; } static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_socket, cpu); } static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_die, cpu); } static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_core, cpu); } static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_stat__get_aggr(config, perf_stat__get_node, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_node, cpu); } static bool term_percore_set(void) @@ -1376,54 +1404,67 @@ static bool term_percore_set(void) return false; } -static int perf_stat_init_aggr_mode(void) +static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) { - int nr; + switch (aggr_mode) { + case AGGR_SOCKET: + return aggr_cpu_id__socket; + case AGGR_DIE: + return aggr_cpu_id__die; + case AGGR_CORE: + return aggr_cpu_id__core; + case AGGR_NODE: + return aggr_cpu_id__node; + case AGGR_NONE: + if (term_percore_set()) + return aggr_cpu_id__core; + + return NULL; + case AGGR_GLOBAL: + case AGGR_THREAD: + case AGGR_UNSET: + default: + return NULL; + } +} - switch (stat_config.aggr_mode) { +static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) +{ + switch (aggr_mode) { case AGGR_SOCKET: - if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build socket map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_socket_cached; - break; + return perf_stat__get_socket_cached; case AGGR_DIE: - if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build die map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_die_cached; - break; + return perf_stat__get_die_cached; case AGGR_CORE: - if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_core_cached; - break; + return perf_stat__get_core_cached; case AGGR_NODE: - if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_node_cached; - break; + return perf_stat__get_node_cached; case AGGR_NONE: if (term_percore_set()) { - if (cpu_map__build_core_map(evsel_list->core.cpus, - &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_core_cached; + return perf_stat__get_core_cached; } - break; + return NULL; case AGGR_GLOBAL: case AGGR_THREAD: case AGGR_UNSET: default: - break; + return NULL; + } +} + +static int perf_stat_init_aggr_mode(void) +{ + int nr; + aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode); + + if (get_id) { + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, + get_id, /*data=*/NULL); + if (!stat_config.aggr_map) { + pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); + return -1; + } + stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode); } /* @@ -1431,7 +1472,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - nr = perf_cpu_map__max(evsel_list->core.cpus); + nr = perf_cpu_map__max(evsel_list->core.cpus).cpu; stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } @@ -1459,169 +1500,139 @@ static void perf_stat__exit_aggr_mode(void) stat_config.cpus_aggr_map = NULL; } -static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx) -{ - int cpu; - - if (idx > map->nr) - return -1; - - cpu = map->map[idx]; - - if (cpu >= env->nr_cpus_avail) - return -1; - - return cpu; -} - -static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct perf_env *env = data; - int cpu = perf_env__get_cpu(env, map, idx); - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu != -1) - id.socket = env->cpu[cpu].socket_id; + if (cpu.cpu != -1) + id.socket = env->cpu[cpu.cpu].socket_id; return id; } -static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - int cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu != -1) { + if (cpu.cpu != -1) { /* * die_id is relative to socket, so start * with the socket ID and then add die to * make a unique ID. */ - id.socket = env->cpu[cpu].socket_id; - id.die = env->cpu[cpu].die_id; + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; } return id; } -static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - int cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu != -1) { + if (cpu.cpu != -1) { /* * core_id is relative to socket and die, * we need a global id. So we set * socket, die id and core id */ - id.socket = env->cpu[cpu].socket_id; - id.die = env->cpu[cpu].die_id; - id.core = env->cpu[cpu].core_id; + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; + id.core = env->cpu[cpu.cpu].core_id; } return id; } -static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data) { - int cpu = perf_env__get_cpu(data, map, idx); - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); id.node = perf_env__numa_node(data, cpu); return id; } -static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **sockp) -{ - return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); -} - -static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **diep) -{ - return cpu_map__build_map(cpus, diep, perf_env__get_die, env); -} - -static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **corep) -{ - return cpu_map__build_map(cpus, corep, perf_env__get_core, env); -} - -static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **nodep) -{ - return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); -} - static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_env__get_socket(map, idx, &perf_stat.session->header.env); + return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_env__get_die(map, idx, &perf_stat.session->header.env); + return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_env__get_core(map, idx, &perf_stat.session->header.env); + return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_env__get_node(map, idx, &perf_stat.session->header.env); + return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); } -static int perf_stat_init_aggr_mode_file(struct perf_stat *st) +static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) { - struct perf_env *env = &st->session->header.env; + switch (aggr_mode) { + case AGGR_SOCKET: + return perf_env__get_socket_aggr_by_cpu; + case AGGR_DIE: + return perf_env__get_die_aggr_by_cpu; + case AGGR_CORE: + return perf_env__get_core_aggr_by_cpu; + case AGGR_NODE: + return perf_env__get_node_aggr_by_cpu; + case AGGR_NONE: + case AGGR_GLOBAL: + case AGGR_THREAD: + case AGGR_UNSET: + default: + return NULL; + } +} - switch (stat_config.aggr_mode) { +static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) +{ + switch (aggr_mode) { case AGGR_SOCKET: - if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build socket map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_socket_file; - break; + return perf_stat__get_socket_file; case AGGR_DIE: - if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build die map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_die_file; - break; + return perf_stat__get_die_file; case AGGR_CORE: - if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_core_file; - break; + return perf_stat__get_core_file; case AGGR_NODE: - if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_node_file; - break; + return perf_stat__get_node_file; case AGGR_NONE: case AGGR_GLOBAL: case AGGR_THREAD: case AGGR_UNSET: default: - break; + return NULL; } +} + +static int perf_stat_init_aggr_mode_file(struct perf_stat *st) +{ + struct perf_env *env = &st->session->header.env; + aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); + + if (!get_id) + return 0; + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, get_id, env); + if (!stat_config.aggr_map) { + pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); + return -1; + } + stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode); return 0; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index df9fc00b4cd6..52b137a184a6 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1536,13 +1536,20 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) return fprintf(fp, " ? "); } +static pid_t workload_pid = -1; static bool done = false; static bool interrupted = false; -static void sig_handler(int sig) +static void sighandler_interrupt(int sig __maybe_unused) { - done = true; - interrupted = sig == SIGINT; + done = interrupted = true; +} + +static void sighandler_chld(int sig __maybe_unused, siginfo_t *info, + void *context __maybe_unused) +{ + if (info->si_pid == workload_pid) + done = true; } static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp) @@ -2726,6 +2733,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, offset = format_field__intval(field, sample, evsel->needs_swap); syscall_arg.len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } val = (uintptr_t)(sample->raw_data + offset); @@ -3936,7 +3945,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) bool draining = false; trace->live = true; - signal(SIGCHLD, sig_handler); if (!trace->raw_augmented_syscalls) { if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) @@ -3962,6 +3970,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__add(evlist, pgfault_min); } + /* Enable ignoring missing threads when -u/-p option is defined. */ + trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid; + if (trace->sched && evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; @@ -4013,6 +4024,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) fprintf(trace->output, "Couldn't run the workload!\n"); goto out_delete_evlist; } + workload_pid = evlist->workload.pid; } err = evlist__open(evlist); @@ -4882,10 +4894,16 @@ int cmd_trace(int argc, const char **argv) const char * const trace_subcommands[] = { "record", NULL }; int err = -1; char bf[BUFSIZ]; + struct sigaction sigchld_act; signal(SIGSEGV, sighandler_dump_stack); signal(SIGFPE, sighandler_dump_stack); - signal(SIGINT, sig_handler); + signal(SIGINT, sighandler_interrupt); + + memset(&sigchld_act, 0, sizeof(sigchld_act)); + sigchld_act.sa_flags = SA_SIGINFO; + sigchld_act.sa_sigaction = sighandler_chld; + sigaction(SIGCHLD, &sigchld_act, NULL); trace.evlist = evlist__new(); trace.sctbl = syscalltbl__new(); diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 7565a1852c74..b17eb52a0694 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -308,8 +308,6 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) { - struct filter_data *d = data; - pr_debug("%s API\n", __func__); return do_checks(data, sample, ctx, false); diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json new file mode 100644 index 000000000000..79f2016c53b0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json new file mode 100644 index 000000000000..579c1c993d17 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json new file mode 100644 index 000000000000..0141f749bff3 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json @@ -0,0 +1,155 @@ +[ + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L1D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L3D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L3D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L3D_CACHE" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "LL_CACHE_RD" + }, + { + "ArchStdEvent": "LL_CACHE_MISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_INNER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_OUTER" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "ArchStdEvent": "L3D_CACHE_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L3D_CACHE_LMISS_RD" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json new file mode 100644 index 000000000000..344a2d552ad5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json new file mode 100644 index 000000000000..e57cd55937c6 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json @@ -0,0 +1,143 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "ASE_INST_SPEC" + }, + { + "ArchStdEvent": "SVE_INST_SPEC" + }, + { + "ArchStdEvent": "FP_HP_SPEC" + }, + { + "ArchStdEvent": "FP_SP_SPEC" + }, + { + "ArchStdEvent": "FP_DP_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_EMPTY_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_FULL_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_FAULT_SPEC" + }, + { + "ArchStdEvent": "FP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT8_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT16_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT32_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT64_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json new file mode 100644 index 000000000000..e522113aeb96 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json @@ -0,0 +1,38 @@ +[ + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC" + }, + { + "ArchStdEvent": "LDST_ALIGN_LAT" + }, + { + "ArchStdEvent": "LD_ALIGN_LAT" + }, + { + "ArchStdEvent": "ST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json new file mode 100644 index 000000000000..20d8365756c5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "REMOTE_ACCESS" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json new file mode 100644 index 000000000000..f9fae15f7555 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json @@ -0,0 +1,23 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json new file mode 100644 index 000000000000..20f2165c85fe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "SAMPLE_POP" + }, + { + "ArchStdEvent": "SAMPLE_FEED" + }, + { + "ArchStdEvent": "SAMPLE_FILTRATE" + }, + { + "ArchStdEvent": "SAMPLE_COLLISION" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json new file mode 100644 index 000000000000..3116135c59e2 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json @@ -0,0 +1,29 @@ +[ + { + "ArchStdEvent": "TRB_WRAP" + }, + { + "ArchStdEvent": "TRCEXTOUT0" + }, + { + "ArchStdEvent": "TRCEXTOUT1" + }, + { + "ArchStdEvent": "TRCEXTOUT2" + }, + { + "ArchStdEvent": "TRCEXTOUT3" + }, + { + "ArchStdEvent": "CTI_TRIGOUT4" + }, + { + "ArchStdEvent": "CTI_TRIGOUT5" + }, + { + "ArchStdEvent": "CTI_TRIGOUT6" + }, + { + "ArchStdEvent": "CTI_TRIGOUT7" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json index 423767510aff..80d7a70829a0 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json +++ b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json @@ -300,6 +300,30 @@ "BriefDescription": "No operation sent for execution on a slot" }, { + "PublicDescription": "Sample Population", + "EventCode": "0x4000", + "EventName": "SAMPLE_POP", + "BriefDescription": "Sample Population" + }, + { + "PublicDescription": "Sample Taken", + "EventCode": "0x4001", + "EventName": "SAMPLE_FEED", + "BriefDescription": "Sample Taken" + }, + { + "PublicDescription": "Sample Taken and not removed by filtering", + "EventCode": "0x4002", + "EventName": "SAMPLE_FILTRATE", + "BriefDescription": "Sample Taken and not removed by filtering" + }, + { + "PublicDescription": "Sample collided with previous sample", + "EventCode": "0x4003", + "EventName": "SAMPLE_COLLISION", + "BriefDescription": "Sample collided with previous sample" + }, + { "PublicDescription": "Constant frequency cycles. The counter increments at a constant frequency equal to the rate of increment of the system counter, CNTPCT_EL0.", "EventCode": "0x4004", "EventName": "CNT_CYCLES", @@ -330,6 +354,96 @@ "BriefDescription": "Level 3 data cache long-latency read miss" }, { + "PublicDescription": "Trace buffer current write pointer wrapped", + "EventCode": "0x400C", + "EventName": "TRB_WRAP", + "BriefDescription": "Trace buffer current write pointer wrapped" + }, + { + "PublicDescription": "PE Trace Unit external output 0", + "EventCode": "0x4010", + "EventName": "TRCEXTOUT0", + "BriefDescription": "PE Trace Unit external output 0" + }, + { + "PublicDescription": "PE Trace Unit external output 1", + "EventCode": "0x4011", + "EventName": "TRCEXTOUT1", + "BriefDescription": "PE Trace Unit external output 1" + }, + { + "PublicDescription": "PE Trace Unit external output 2", + "EventCode": "0x4012", + "EventName": "TRCEXTOUT2", + "BriefDescription": "PE Trace Unit external output 2" + }, + { + "PublicDescription": "PE Trace Unit external output 3", + "EventCode": "0x4013", + "EventName": "TRCEXTOUT3", + "BriefDescription": "PE Trace Unit external output 3" + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 4", + "EventCode": "0x4018", + "EventName": "CTI_TRIGOUT4", + "BriefDescription": "Cross-trigger Interface output trigger 4" + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 5 ", + "EventCode": "0x4019", + "EventName": "CTI_TRIGOUT5", + "BriefDescription": "Cross-trigger Interface output trigger 5 " + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 6", + "EventCode": "0x401A", + "EventName": "CTI_TRIGOUT6", + "BriefDescription": "Cross-trigger Interface output trigger 6" + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 7", + "EventCode": "0x401B", + "EventName": "CTI_TRIGOUT7", + "BriefDescription": "Cross-trigger Interface output trigger 7" + }, + { + "PublicDescription": "Access with additional latency from alignment", + "EventCode": "0x4020", + "EventName": "LDST_ALIGN_LAT", + "BriefDescription": "Access with additional latency from alignment" + }, + { + "PublicDescription": "Load with additional latency from alignment", + "EventCode": "0x4021", + "EventName": "LD_ALIGN_LAT", + "BriefDescription": "Load with additional latency from alignment" + }, + { + "PublicDescription": "Store with additional latency from alignment", + "EventCode": "0x4022", + "EventName": "ST_ALIGN_LAT", + "BriefDescription": "Store with additional latency from alignment" + }, + { + "PublicDescription": "Checked data memory access", + "EventCode": "0x4024", + "EventName": "MEM_ACCESS_CHECKED", + "BriefDescription": "Checked data memory access" + }, + { + "PublicDescription": "Checked data memory access, read", + "EventCode": "0x4025", + "EventName": "MEM_ACCESS_CHECKED_RD", + "BriefDescription": "Checked data memory access, read" + }, + { + "PublicDescription": "Checked data memory access, write", + "EventCode": "0x4026", + "EventName": "MEM_ACCESS_CHECKED_WR", + "BriefDescription": "Checked data memory access, write" + }, + { "PublicDescription": "SIMD Instruction architecturally executed.", "EventCode": "0x8000", "EventName": "SIMD_INST_RETIRED", @@ -342,6 +456,18 @@ "BriefDescription": "Instruction architecturally executed, SVE." }, { + "PublicDescription": "ASE operations speculatively executed", + "EventCode": "0x8005", + "EventName": "ASE_INST_SPEC", + "BriefDescription": "ASE operations speculatively executed" + }, + { + "PublicDescription": "SVE operations speculatively executed", + "EventCode": "0x8006", + "EventName": "SVE_INST_SPEC", + "BriefDescription": "SVE operations speculatively executed" + }, + { "PublicDescription": "Microarchitectural operation, Operations speculatively executed.", "EventCode": "0x8008", "EventName": "UOP_SPEC", @@ -360,6 +486,24 @@ "BriefDescription": "Floating-point Operations speculatively executed." }, { + "PublicDescription": "Floating-point half-precision operations speculatively executed", + "EventCode": "0x8014", + "EventName": "FP_HP_SPEC", + "BriefDescription": "Floating-point half-precision operations speculatively executed" + }, + { + "PublicDescription": "Floating-point single-precision operations speculatively executed", + "EventCode": "0x8018", + "EventName": "FP_SP_SPEC", + "BriefDescription": "Floating-point single-precision operations speculatively executed" + }, + { + "PublicDescription": "Floating-point double-precision operations speculatively executed", + "EventCode": "0x801C", + "EventName": "FP_DP_SPEC", + "BriefDescription": "Floating-point double-precision operations speculatively executed" + }, + { "PublicDescription": "Floating-point FMA Operations speculatively executed.", "EventCode": "0x8028", "EventName": "FP_FMA_SPEC", @@ -390,6 +534,30 @@ "BriefDescription": "SVE predicated Operations speculatively executed." }, { + "PublicDescription": "SVE predicated operations with no active predicates speculatively executed", + "EventCode": "0x8075", + "EventName": "SVE_PRED_EMPTY_SPEC", + "BriefDescription": "SVE predicated operations with no active predicates speculatively executed" + }, + { + "PublicDescription": "SVE predicated operations speculatively executed with all active predicates", + "EventCode": "0x8076", + "EventName": "SVE_PRED_FULL_SPEC", + "BriefDescription": "SVE predicated operations speculatively executed with all active predicates" + }, + { + "PublicDescription": "SVE predicated operations speculatively executed with partially active predicates", + "EventCode": "0x8077", + "EventName": "SVE_PRED_PARTIAL_SPEC", + "BriefDescription": "SVE predicated operations speculatively executed with partially active predicates" + }, + { + "PublicDescription": "SVE predicated operations with empty or partially active predicates", + "EventCode": "0x8079", + "EventName": "SVE_PRED_NOT_FULL_SPEC", + "BriefDescription": "SVE predicated operations with empty or partially active predicates" + }, + { "PublicDescription": "SVE MOVPRFX Operations speculatively executed.", "EventCode": "0x807C", "EventName": "SVE_MOVPRFX_SPEC", @@ -498,6 +666,12 @@ "BriefDescription": "SVE First-fault load Operations speculatively executed." }, { + "PublicDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0", + "EventCode": "0x80BD", + "EventName": "SVE_LDFF_FAULT_SPEC", + "BriefDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0" + }, + { "PublicDescription": "Scalable floating-point element Operations speculatively executed.", "EventCode": "0x80C0", "EventName": "FP_SCALE_OPS_SPEC", @@ -544,5 +718,29 @@ "EventCode": "0x80C7", "EventName": "FP_DP_FIXED_OPS_SPEC", "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed", + "EventCode": "0x80E3", + "EventName": "ASE_SVE_INT8_SPEC", + "BriefDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed" + }, + { + "PublicDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed", + "EventCode": "0x80E7", + "EventName": "ASE_SVE_INT16_SPEC", + "BriefDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed" + }, + { + "PublicDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed", + "EventCode": "0x80EB", + "EventName": "ASE_SVE_INT32_SPEC", + "BriefDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed" + }, + { + "PublicDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed", + "EventCode": "0x80EF", + "EventName": "ASE_SVE_INT64_SPEC", + "BriefDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed" } ] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 31d8b57ca9bb..b899db48c12a 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -19,6 +19,7 @@ 0x00000000410fd0b0,v1,arm/cortex-a76-n1,core 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core 0x00000000410fd400,v1,arm/neoverse-v1,core +0x00000000410fd490,v1,arm/neoverse-n2,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000460f0010,v1,fujitsu/a64fx,core diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/recommended.json index d0a19866563d..210afa856091 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json +++ b/tools/perf/pmu-events/arch/arm64/recommended.json @@ -148,305 +148,305 @@ "EventCode": "0x60", "EventName": "BUS_ACCESS_RD", "BriefDescription": "Bus access read" - }, - { + }, + { "PublicDescription": "Bus access write", "EventCode": "0x61", "EventName": "BUS_ACCESS_WR", "BriefDescription": "Bus access write" - }, - { + }, + { "PublicDescription": "Bus access, Normal, Cacheable, Shareable", "EventCode": "0x62", "EventName": "BUS_ACCESS_SHARED", "BriefDescription": "Bus access, Normal, Cacheable, Shareable" - }, - { + }, + { "PublicDescription": "Bus access, not Normal, Cacheable, Shareable", "EventCode": "0x63", "EventName": "BUS_ACCESS_NOT_SHARED", "BriefDescription": "Bus access, not Normal, Cacheable, Shareable" - }, - { + }, + { "PublicDescription": "Bus access, Normal", "EventCode": "0x64", "EventName": "BUS_ACCESS_NORMAL", "BriefDescription": "Bus access, Normal" - }, - { + }, + { "PublicDescription": "Bus access, peripheral", "EventCode": "0x65", "EventName": "BUS_ACCESS_PERIPH", "BriefDescription": "Bus access, peripheral" - }, - { + }, + { "PublicDescription": "Data memory access, read", "EventCode": "0x66", "EventName": "MEM_ACCESS_RD", "BriefDescription": "Data memory access, read" - }, - { + }, + { "PublicDescription": "Data memory access, write", "EventCode": "0x67", "EventName": "MEM_ACCESS_WR", "BriefDescription": "Data memory access, write" - }, - { + }, + { "PublicDescription": "Unaligned access, read", "EventCode": "0x68", "EventName": "UNALIGNED_LD_SPEC", "BriefDescription": "Unaligned access, read" - }, - { + }, + { "PublicDescription": "Unaligned access, write", "EventCode": "0x69", "EventName": "UNALIGNED_ST_SPEC", "BriefDescription": "Unaligned access, write" - }, - { + }, + { "PublicDescription": "Unaligned access", "EventCode": "0x6a", "EventName": "UNALIGNED_LDST_SPEC", "BriefDescription": "Unaligned access" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX", "EventCode": "0x6c", "EventName": "LDREX_SPEC", "BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass", "EventCode": "0x6d", "EventName": "STREX_PASS_SPEC", "BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail", "EventCode": "0x6e", "EventName": "STREX_FAIL_SPEC", "BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, STREX or STX", "EventCode": "0x6f", "EventName": "STREX_SPEC", "BriefDescription": "Exclusive operation speculatively executed, STREX or STX" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, load", "EventCode": "0x70", "EventName": "LD_SPEC", "BriefDescription": "Operation speculatively executed, load" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, store", "EventCode": "0x71", "EventName": "ST_SPEC", "BriefDescription": "Operation speculatively executed, store" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, load or store", "EventCode": "0x72", "EventName": "LDST_SPEC", "BriefDescription": "Operation speculatively executed, load or store" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, integer data processing", "EventCode": "0x73", "EventName": "DP_SPEC", "BriefDescription": "Operation speculatively executed, integer data processing" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, Advanced SIMD instruction", "EventCode": "0x74", "EventName": "ASE_SPEC", "BriefDescription": "Operation speculatively executed, Advanced SIMD instruction" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, floating-point instruction", "EventCode": "0x75", "EventName": "VFP_SPEC", "BriefDescription": "Operation speculatively executed, floating-point instruction" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, software change of the PC", "EventCode": "0x76", "EventName": "PC_WRITE_SPEC", "BriefDescription": "Operation speculatively executed, software change of the PC" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, Cryptographic instruction", "EventCode": "0x77", "EventName": "CRYPTO_SPEC", "BriefDescription": "Operation speculatively executed, Cryptographic instruction" - }, - { + }, + { "PublicDescription": "Branch speculatively executed, immediate branch", "EventCode": "0x78", "EventName": "BR_IMMED_SPEC", "BriefDescription": "Branch speculatively executed, immediate branch" - }, - { + }, + { "PublicDescription": "Branch speculatively executed, procedure return", "EventCode": "0x79", "EventName": "BR_RETURN_SPEC", "BriefDescription": "Branch speculatively executed, procedure return" - }, - { + }, + { "PublicDescription": "Branch speculatively executed, indirect branch", "EventCode": "0x7a", "EventName": "BR_INDIRECT_SPEC", "BriefDescription": "Branch speculatively executed, indirect branch" - }, - { + }, + { "PublicDescription": "Barrier speculatively executed, ISB", "EventCode": "0x7c", "EventName": "ISB_SPEC", "BriefDescription": "Barrier speculatively executed, ISB" - }, - { + }, + { "PublicDescription": "Barrier speculatively executed, DSB", "EventCode": "0x7d", "EventName": "DSB_SPEC", "BriefDescription": "Barrier speculatively executed, DSB" - }, - { + }, + { "PublicDescription": "Barrier speculatively executed, DMB", "EventCode": "0x7e", "EventName": "DMB_SPEC", "BriefDescription": "Barrier speculatively executed, DMB" - }, - { + }, + { "PublicDescription": "Exception taken, Other synchronous", "EventCode": "0x81", "EventName": "EXC_UNDEF", "BriefDescription": "Exception taken, Other synchronous" - }, - { + }, + { "PublicDescription": "Exception taken, Supervisor Call", "EventCode": "0x82", "EventName": "EXC_SVC", "BriefDescription": "Exception taken, Supervisor Call" - }, - { + }, + { "PublicDescription": "Exception taken, Instruction Abort", "EventCode": "0x83", "EventName": "EXC_PABORT", "BriefDescription": "Exception taken, Instruction Abort" - }, - { + }, + { "PublicDescription": "Exception taken, Data Abort and SError", "EventCode": "0x84", "EventName": "EXC_DABORT", "BriefDescription": "Exception taken, Data Abort and SError" - }, - { + }, + { "PublicDescription": "Exception taken, IRQ", "EventCode": "0x86", "EventName": "EXC_IRQ", "BriefDescription": "Exception taken, IRQ" - }, - { + }, + { "PublicDescription": "Exception taken, FIQ", "EventCode": "0x87", "EventName": "EXC_FIQ", "BriefDescription": "Exception taken, FIQ" - }, - { + }, + { "PublicDescription": "Exception taken, Secure Monitor Call", "EventCode": "0x88", "EventName": "EXC_SMC", "BriefDescription": "Exception taken, Secure Monitor Call" - }, - { + }, + { "PublicDescription": "Exception taken, Hypervisor Call", "EventCode": "0x8a", "EventName": "EXC_HVC", "BriefDescription": "Exception taken, Hypervisor Call" - }, - { + }, + { "PublicDescription": "Exception taken, Instruction Abort not taken locally", "EventCode": "0x8b", "EventName": "EXC_TRAP_PABORT", "BriefDescription": "Exception taken, Instruction Abort not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, Data Abort or SError not taken locally", "EventCode": "0x8c", "EventName": "EXC_TRAP_DABORT", "BriefDescription": "Exception taken, Data Abort or SError not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, Other traps not taken locally", "EventCode": "0x8d", "EventName": "EXC_TRAP_OTHER", "BriefDescription": "Exception taken, Other traps not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, IRQ not taken locally", "EventCode": "0x8e", "EventName": "EXC_TRAP_IRQ", "BriefDescription": "Exception taken, IRQ not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, FIQ not taken locally", "EventCode": "0x8f", "EventName": "EXC_TRAP_FIQ", "BriefDescription": "Exception taken, FIQ not taken locally" - }, - { + }, + { "PublicDescription": "Release consistency operation speculatively executed, Load-Acquire", "EventCode": "0x90", "EventName": "RC_LD_SPEC", "BriefDescription": "Release consistency operation speculatively executed, Load-Acquire" - }, - { + }, + { "PublicDescription": "Release consistency operation speculatively executed, Store-Release", "EventCode": "0x91", "EventName": "RC_ST_SPEC", "BriefDescription": "Release consistency operation speculatively executed, Store-Release" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache access, read", "EventCode": "0xa0", "EventName": "L3D_CACHE_RD", "BriefDescription": "Attributable Level 3 data or unified cache access, read" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache access, write", "EventCode": "0xa1", "EventName": "L3D_CACHE_WR", "BriefDescription": "Attributable Level 3 data or unified cache access, write" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache refill, read", "EventCode": "0xa2", "EventName": "L3D_CACHE_REFILL_RD", "BriefDescription": "Attributable Level 3 data or unified cache refill, read" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache refill, write", "EventCode": "0xa3", "EventName": "L3D_CACHE_REFILL_WR", "BriefDescription": "Attributable Level 3 data or unified cache refill, write" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim", "EventCode": "0xa6", "EventName": "L3D_CACHE_WB_VICTIM", "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean", "EventCode": "0xa7", "EventName": "L3D_CACHE_WB_CLEAN", "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache access, invalidate", "EventCode": "0xa8", "EventName": "L3D_CACHE_INVAL", "BriefDescription": "Attributable Level 3 data or unified cache access, invalidate" - } + } ] diff --git a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json index 73089c682f80..41bac1c6a008 100644 --- a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json +++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json @@ -19,6 +19,22 @@ "EdgeDetect": "0" }, { + "Unit": "CBO", + "EventCode": "0xE0", + "UMask": "0x00", + "EventName": "event-hyphen", + "BriefDescription": "UNC_CBO_HYPHEN", + "PublicDescription": "UNC_CBO_HYPHEN" + }, + { + "Unit": "CBO", + "EventCode": "0xC0", + "UMask": "0x00", + "EventName": "event-two-hyph", + "BriefDescription": "UNC_CBO_TWO_HYPH", + "PublicDescription": "UNC_CBO_TWO_HYPH" + }, + { "EventCode": "0x7", "EventName": "uncore_hisi_l3c.rd_hit_cpipe", "BriefDescription": "Total read hits", diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 2e7c4153875b..1a57c3f81dd4 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -672,8 +672,6 @@ static int json_events(const char *fn, addfield(map, &je.metric_constraint, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &je.metric_expr, "", "", val); - for (s = je.metric_expr; *s; s++) - *s = tolower(*s); } else if (json_streq(map, field, "ArchStdEvent")) { addfield(map, &arch_std, "", "", val); for (s = arch_std; *s; s++) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 803ca426f8e6..af2b37ef7c70 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -65,6 +65,7 @@ perf-y += pe-file-parsing.o perf-y += expand-cgroup.o perf-y += perf-time-to-tsc.o perf-y += dlfilter-test.o +perf-y += sigtrap.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 0f73e300f207..56fba08a3037 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -65,7 +65,7 @@ do { \ #define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) -static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, +static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags) { FILE *file; @@ -93,7 +93,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, /* syscall arguments */ __WRITE_ASS(fd, "d", fd); __WRITE_ASS(group_fd, "d", group_fd); - __WRITE_ASS(cpu, "d", cpu); + __WRITE_ASS(cpu, "d", cpu.cpu); __WRITE_ASS(pid, "d", pid); __WRITE_ASS(flags, "lu", flags); @@ -144,7 +144,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, return 0; } -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags) { int errno_saved = errno; diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README index a36f49fb4dbe..1116fc6bf2ac 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/attr/README @@ -45,8 +45,10 @@ Following tests are defined (with perf commands): perf record -d kill (test-record-data) perf record -F 100 kill (test-record-freq) perf record -g kill (test-record-graph-default) + perf record -g kill (test-record-graph-default-aarch64) perf record --call-graph dwarf kill (test-record-graph-dwarf) perf record --call-graph fp kill (test-record-graph-fp) + perf record --call-graph fp kill (test-record-graph-fp-aarch64) perf record --group -e cycles,instructions kill (test-record-group) perf record -e '{cycles,instructions}' kill (test-record-group1) perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2) diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default index 5d8234d50845..f0a18b4ea4f5 100644 --- a/tools/perf/tests/attr/test-record-graph-default +++ b/tools/perf/tests/attr/test-record-graph-default @@ -2,6 +2,8 @@ command = record args = --no-bpf-event -g kill >/dev/null 2>&1 ret = 1 +# arm64 enables registers in the default mode (fp) +arch = !aarch64 [event:base-record] sample_type=295 diff --git a/tools/perf/tests/attr/test-record-graph-default-aarch64 b/tools/perf/tests/attr/test-record-graph-default-aarch64 new file mode 100644 index 000000000000..e98d62efb6f7 --- /dev/null +++ b/tools/perf/tests/attr/test-record-graph-default-aarch64 @@ -0,0 +1,9 @@ +[config] +command = record +args = --no-bpf-event -g kill >/dev/null 2>&1 +ret = 1 +arch = aarch64 + +[event:base-record] +sample_type=4391 +sample_regs_user=1073741824 diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp index 5630521c0b0f..a6e60e839205 100644 --- a/tools/perf/tests/attr/test-record-graph-fp +++ b/tools/perf/tests/attr/test-record-graph-fp @@ -2,6 +2,8 @@ command = record args = --no-bpf-event --call-graph fp kill >/dev/null 2>&1 ret = 1 +# arm64 enables registers in fp mode +arch = !aarch64 [event:base-record] sample_type=295 diff --git a/tools/perf/tests/attr/test-record-graph-fp-aarch64 b/tools/perf/tests/attr/test-record-graph-fp-aarch64 new file mode 100644 index 000000000000..cbeea9971285 --- /dev/null +++ b/tools/perf/tests/attr/test-record-graph-fp-aarch64 @@ -0,0 +1,9 @@ +[config] +command = record +args = --no-bpf-event --call-graph fp kill >/dev/null 2>&1 +ret = 1 +arch = aarch64 + +[event:base-record] +sample_type=4391 +sample_regs_user=1073741824 diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 384856347236..4965dd666956 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -17,8 +17,8 @@ static unsigned long *get_bitmap(const char *str, int nbits) bm = bitmap_zalloc(nbits); if (map && bm) { - for (i = 0; i < map->nr; i++) - set_bit(map->map[i], bm); + for (i = 0; i < perf_cpu_map__nr(map); i++) + set_bit(perf_cpu_map__cpu(map, i).cpu, bm); } if (map) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 8cb5a1c3489e..fac3717d9ba1 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -107,6 +107,7 @@ static struct test_suite *generic_tests[] = { &suite__expand_cgroup_events, &suite__perf_time_to_tsc, &suite__dlfilter, + &suite__sigtrap, NULL, }; @@ -420,7 +421,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width, continue; st.file = ent->d_name; - pr_info("%2d: %-*s:", i, width, test_suite.desc); + pr_info("%3d: %-*s:", i, width, test_suite.desc); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); @@ -470,7 +471,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) continue; } - pr_info("%2d: %-*s:", i, width, test_description(t, -1)); + pr_info("%3d: %-*s:", i, width, test_description(t, -1)); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); @@ -510,7 +511,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) curr, argc, argv)) continue; - pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, + pr_info("%3d.%1d: %-*s:", i, subi + 1, subw, test_description(t, subi)); test_and_print(t, subi); } @@ -545,7 +546,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i) if (!perf_test__matches(t.desc, curr, argc, argv)) continue; - pr_info("%2d: %s\n", i, t.desc); + pr_info("%3d: %s\n", i, t.desc); } @@ -567,14 +568,14 @@ static int perf_test__list(int argc, const char **argv) if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) continue; - pr_info("%2d: %s\n", i, test_description(t, -1)); + pr_info("%3d: %s\n", i, test_description(t, -1)); if (has_subtests(t)) { int subn = num_subtests(t); int subi; for (subi = 0; subi < subn; subi++) - pr_info("%2d:%1d: %s\n", i, subi + 1, + pr_info("%3d:%1d: %s\n", i, subi + 1, test_description(t, subi)); } } @@ -606,6 +607,9 @@ int cmd_test(int argc, const char **argv) if (ret < 0) return ret; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc - 1, argv + 1); diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 89a155092f85..84e87e31f119 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -38,7 +38,7 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", map->nr == 20); for (i = 0; i < 20; i++) { - TEST_ASSERT_VAL("wrong cpu", map->map[i] == i); + TEST_ASSERT_VAL("wrong cpu", map->map[i].cpu == i); } perf_cpu_map__put(map); @@ -67,8 +67,8 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, map = cpu_map__new_data(data); TEST_ASSERT_VAL("wrong nr", map->nr == 2); - TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1); - TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256); + TEST_ASSERT_VAL("wrong cpu", map->map[0].cpu == 1); + TEST_ASSERT_VAL("wrong cpu", map->map[1].cpu == 256); TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1); perf_cpu_map__put(map); return 0; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index d01532d40acb..78db4d704e76 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -75,10 +75,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong id", ev->id == 123); TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS); - TEST_ASSERT_VAL("wrong cpus", map->nr == 3); - TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1); - TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2); - TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3); + TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__nr(map) == 3); + TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 0).cpu == 1); + TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 1).cpu == 2); + TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 2).cpu == 3); perf_cpu_map__put(map); return 0; } diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index b17b86391383..4c96829510c9 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -25,14 +25,15 @@ static unsigned long *get_bitmap(const char *str, int nbits) { struct perf_cpu_map *map = perf_cpu_map__new(str); unsigned long *bm = NULL; - int i; bm = bitmap_zalloc(nbits); if (map && bm) { - for (i = 0; i < map->nr; i++) { - set_bit(map->map[i], bm); - } + struct perf_cpu cpu; + int i; + + perf_cpu_map__for_each_cpu(cpu, i, map) + set_bit(cpu.cpu, bm); } if (map) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 90b2feda31ac..c3c17600f29c 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -59,11 +59,12 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest } CPU_ZERO(&cpu_set); - CPU_SET(cpus->map[0], &cpu_set); + CPU_SET(perf_cpu_map__cpu(cpus, 0).cpu, &cpu_set); sched_setaffinity(0, sizeof(cpu_set), &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf))); + perf_cpu_map__cpu(cpus, 0).cpu, + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_free_cpus; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index cd3dd463783f..1ab362323d25 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -22,7 +22,8 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - int err = -1, fd, cpu; + int err = -1, fd, idx; + struct perf_cpu cpu; struct perf_cpu_map *cpus; struct evsel *evsel; unsigned int nr_openat_calls = 111, i; @@ -58,23 +59,23 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb goto out_evsel_delete; } - for (cpu = 0; cpu < cpus->nr; ++cpu) { - unsigned int ncalls = nr_openat_calls + cpu; + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + unsigned int ncalls = nr_openat_calls + idx; /* * XXX eventually lift this restriction in a way that * keeps perf building on older glibc installations * without CPU_ALLOC. 1024 cpus in 2010 still seems * a reasonable upper limit tho :-) */ - if (cpus->map[cpu] >= CPU_SETSIZE) { - pr_debug("Ignoring CPU %d\n", cpus->map[cpu]); + if (cpu.cpu >= CPU_SETSIZE) { + pr_debug("Ignoring CPU %d\n", cpu.cpu); continue; } - CPU_SET(cpus->map[cpu], &cpu_set); + CPU_SET(cpu.cpu, &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[cpu], + cpu.cpu, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_close_fd; } @@ -82,37 +83,29 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb fd = openat(0, "/etc/passwd", O_RDONLY); close(fd); } - CPU_CLR(cpus->map[cpu], &cpu_set); + CPU_CLR(cpu.cpu, &cpu_set); } - /* - * Here we need to explicitly preallocate the counts, as if - * we use the auto allocation it will allocate just for 1 cpu, - * as we start by cpu 0. - */ - if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { - pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr); - goto out_close_fd; - } + evsel->core.cpus = perf_cpu_map__get(cpus); err = 0; - for (cpu = 0; cpu < cpus->nr; ++cpu) { + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { unsigned int expected; - if (cpus->map[cpu] >= CPU_SETSIZE) + if (cpu.cpu >= CPU_SETSIZE) continue; - if (evsel__read_on_cpu(evsel, cpu, 0) < 0) { + if (evsel__read_on_cpu(evsel, idx, 0) < 0) { pr_debug("evsel__read_on_cpu\n"); err = -1; break; } - expected = nr_openat_calls + cpu; - if (perf_counts(evsel->counts, cpu, 0)->val != expected) { + expected = nr_openat_calls + idx; + if (perf_counts(evsel->counts, idx, 0)->val != expected) { pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); + expected, cpu.cpu, perf_counts(evsel->counts, idx, 0)->val); err = -1; } } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index a508f1dbcb2a..e71efadb24f5 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2069,6 +2069,31 @@ static int test_event(struct evlist_test *e) return ret; } +static int test_event_fake_pmu(const char *str) +{ + struct parse_events_error err; + struct evlist *evlist; + int ret; + + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + + parse_events_error__init(&err); + perf_pmu__test_parse_init(); + ret = __parse_events(evlist, str, &err, &perf_pmu__fake); + if (ret) { + pr_debug("failed to parse event '%s', err %d, str '%s'\n", + str, ret, err.str); + parse_events_error__print(&err, str); + } + + parse_events_error__exit(&err); + evlist__delete(evlist); + + return ret; +} + static int test_events(struct evlist_test *events, unsigned cnt) { int ret1, ret2 = 0; @@ -2276,6 +2301,26 @@ static int test_pmu_events_alias(char *event, char *alias) return test_event(&e); } +static int test_pmu_events_alias2(void) +{ + static const char events[][30] = { + "event-hyphen", + "event-two-hyph", + }; + unsigned long i; + int ret = 0; + + for (i = 0; i < ARRAY_SIZE(events); i++) { + ret = test_event_fake_pmu(&events[i][0]); + if (ret) { + pr_err("check_parse_fake %s failed\n", &events[i][0]); + break; + } + } + + return ret; +} + static int test__parse_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { int ret1, ret2 = 0; @@ -2313,6 +2358,10 @@ do { \ return ret; } + ret1 = test_pmu_events_alias2(); + if (!ret2) + ret2 = ret1; + ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms)); if (!ret2) ret2 = ret1; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index df1c9a3cc05b..1c695fb5a79c 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -143,6 +143,34 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = { .matching_pmu = "uncore_cbox_0", }; +static const struct perf_pmu_test_event uncore_hyphen = { + .event = { + .name = "event-hyphen", + .event = "umask=0x00,event=0xe0", + .desc = "Unit: uncore_cbox UNC_CBO_HYPHEN", + .topic = "uncore", + .long_desc = "UNC_CBO_HYPHEN", + .pmu = "uncore_cbox", + }, + .alias_str = "umask=0,event=0xe0", + .alias_long_desc = "UNC_CBO_HYPHEN", + .matching_pmu = "uncore_cbox_0", +}; + +static const struct perf_pmu_test_event uncore_two_hyph = { + .event = { + .name = "event-two-hyph", + .event = "umask=0x00,event=0xc0", + .desc = "Unit: uncore_cbox UNC_CBO_TWO_HYPH", + .topic = "uncore", + .long_desc = "UNC_CBO_TWO_HYPH", + .pmu = "uncore_cbox", + }, + .alias_str = "umask=0,event=0xc0", + .alias_long_desc = "UNC_CBO_TWO_HYPH", + .matching_pmu = "uncore_cbox_0", +}; + static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = { .event = { .name = "uncore_hisi_l3c.rd_hit_cpipe", @@ -188,6 +216,8 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = { static const struct perf_pmu_test_event *uncore_events[] = { &uncore_hisi_ddrc_flux_wcmd, &unc_cbo_xsnp_response_miss_eviction, + &uncore_hyphen, + &uncore_two_hyph, &uncore_hisi_l3c_rd_hit_cpipe, &uncore_imc_free_running_cache_miss, &uncore_imc_cache_hits, @@ -654,6 +684,8 @@ static struct perf_pmu_test_pmu test_pmus[] = { }, .aliases = { &unc_cbo_xsnp_response_miss_eviction, + &uncore_hyphen, + &uncore_two_hyph, }, }, { diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh index de24d374ce24..cb35e488809a 100755 --- a/tools/perf/tests/shell/stat_all_metricgroups.sh +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -6,7 +6,7 @@ set -e for m in $(perf list --raw-dump metricgroups); do echo "Testing $m" - perf stat -M "$m" true + perf stat -M "$m" -a true done exit 0 diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c new file mode 100644 index 000000000000..e32ece90e164 --- /dev/null +++ b/tools/perf/tests/sigtrap.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic test for sigtrap support. + * + * Copyright (C) 2021, Google LLC. + */ + +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <linux/hw_breakpoint.h> +#include <linux/string.h> +#include <pthread.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "cloexec.h" +#include "debug.h" +#include "event.h" +#include "tests.h" +#include "../perf-sys.h" + +#define NUM_THREADS 5 + +static struct { + int tids_want_signal; /* Which threads still want a signal. */ + int signal_count; /* Sanity check number of signals received. */ + volatile int iterate_on; /* Variable to set breakpoint on. */ + siginfo_t first_siginfo; /* First observed siginfo_t. */ +} ctx; + +#define TEST_SIG_DATA (~(unsigned long)(&ctx.iterate_on)) + +static struct perf_event_attr make_event_attr(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_BREAKPOINT, + .size = sizeof(attr), + .sample_period = 1, + .disabled = 1, + .bp_addr = (unsigned long)&ctx.iterate_on, + .bp_type = HW_BREAKPOINT_RW, + .bp_len = HW_BREAKPOINT_LEN_1, + .inherit = 1, /* Children inherit events ... */ + .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */ + .remove_on_exec = 1, /* Required by sigtrap. */ + .sigtrap = 1, /* Request synchronous SIGTRAP on event. */ + .sig_data = TEST_SIG_DATA, + .exclude_kernel = 1, /* To allow */ + .exclude_hv = 1, /* running as !root */ + }; + return attr; +} + +static void +sigtrap_handler(int signum __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused) +{ + if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED)) + ctx.first_siginfo = *info; + __atomic_fetch_sub(&ctx.tids_want_signal, syscall(SYS_gettid), __ATOMIC_RELAXED); +} + +static void *test_thread(void *arg) +{ + pthread_barrier_t *barrier = (pthread_barrier_t *)arg; + pid_t tid = syscall(SYS_gettid); + int i; + + pthread_barrier_wait(barrier); + + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + for (i = 0; i < ctx.iterate_on - 1; i++) + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + + return NULL; +} + +static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier) +{ + int i; + + pthread_barrier_wait(barrier); + for (i = 0; i < NUM_THREADS; i++) + TEST_ASSERT_EQUAL("pthread_join() failed", pthread_join(threads[i], NULL), 0); + + return TEST_OK; +} + +static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier) +{ + int ret; + + ctx.iterate_on = 3000; + + TEST_ASSERT_EQUAL("misfired signal?", ctx.signal_count, 0); + TEST_ASSERT_EQUAL("enable failed", ioctl(fd, PERF_EVENT_IOC_ENABLE, 0), 0); + ret = run_test_threads(threads, barrier); + TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0); + + TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on); + TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0); + TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on); +#if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */ + TEST_ASSERT_EQUAL("unexpected si_perf_type", ctx.first_siginfo.si_perf_type, + PERF_TYPE_BREAKPOINT); + TEST_ASSERT_EQUAL("unexpected si_perf_data", ctx.first_siginfo.si_perf_data, + TEST_SIG_DATA); +#endif + + return ret; +} + +static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + struct perf_event_attr attr = make_event_attr(); + struct sigaction action = {}; + struct sigaction oldact; + pthread_t threads[NUM_THREADS]; + pthread_barrier_t barrier; + char sbuf[STRERR_BUFSIZE]; + int i, fd, ret = TEST_FAIL; + + if (!BP_SIGNAL_IS_SUPPORTED) { + pr_debug("Test not supported on this architecture"); + return TEST_SKIP; + } + + pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1); + + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = sigtrap_handler; + sigemptyset(&action.sa_mask); + if (sigaction(SIGTRAP, &action, &oldact)) { + pr_debug("FAILED sigaction(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + goto out; + } + + fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); + if (fd < 0) { + pr_debug("FAILED sys_perf_event_open(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + goto out_restore_sigaction; + } + + for (i = 0; i < NUM_THREADS; i++) { + if (pthread_create(&threads[i], NULL, test_thread, &barrier)) { + pr_debug("FAILED pthread_create(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + goto out_close_perf_event; + } + } + + ret = run_stress_test(fd, threads, &barrier); + +out_close_perf_event: + close(fd); +out_restore_sigaction: + sigaction(SIGTRAP, &oldact, NULL); +out: + pthread_barrier_destroy(&barrier); + return ret; +} + +DEFINE_SUITE("Sigtrap", sigtrap); diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c index 2eb096b5e6da..500974040fe3 100644 --- a/tools/perf/tests/stat.c +++ b/tools/perf/tests/stat.c @@ -87,7 +87,8 @@ static int test__synthesize_stat(struct test_suite *test __maybe_unused, int sub count.run = 300; TEST_ASSERT_VAL("failed to synthesize stat_config", - !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL)); + !perf_event__synthesize_stat(NULL, (struct perf_cpu){.cpu = 1}, 2, 3, + &count, process_stat_event, NULL)); return 0; } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 8f65098110fc..5bbb8f6a48fc 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -146,6 +146,7 @@ DECLARE_SUITE(pe_file_parsing); DECLARE_SUITE(expand_cgroup_events); DECLARE_SUITE(perf_time_to_tsc); DECLARE_SUITE(dlfilter); +DECLARE_SUITE(sigtrap); /* * PowerPC and S390 do not support creation of instruction breakpoints using the diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 869986139146..ee1e3dcbc0bd 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -112,62 +112,88 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); for (i = 0; i < session->header.env.nr_cpus_avail; i++) { - if (!cpu_map__has(map, i)) + struct perf_cpu cpu = { .cpu = i }; + + if (!perf_cpu_map__has(map, cpu)) continue; pr_debug("CPU %d, core %d, socket %d\n", i, session->header.env.cpu[i].core_id, session->header.env.cpu[i].socket_id); } + // Test that CPU ID contains socket, die, core and CPU + for (i = 0; i < perf_cpu_map__nr(map); i++) { + id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL); + TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", + perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu); + + TEST_ASSERT_VAL("Cpu map - Core ID doesn't match", + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core); + TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match", + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == + id.socket); + + TEST_ASSERT_VAL("Cpu map - Die ID doesn't match", + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die); + TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1); + } + // Test that core ID contains socket, die and core - for (i = 0; i < map->nr; i++) { - id = cpu_map__get_core(map, i, NULL); + for (i = 0; i < perf_cpu_map__nr(map); i++) { + id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[map->map[i]].core_id == id.core); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == + id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == id.die); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1); } // Test that die ID contains socket and die - for (i = 0; i < map->nr; i++) { - id = cpu_map__get_die(map, i, NULL); + for (i = 0; i < perf_cpu_map__nr(map); i++) { + id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == + id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == id.die); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Die map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1); } // Test that socket ID contains only socket - for (i = 0; i < map->nr; i++) { - id = cpu_map__get_socket(map, i, NULL); + for (i = 0; i < perf_cpu_map__nr(map); i++) { + id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == + id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1); } // Test that node ID contains only node - for (i = 0; i < map->nr; i++) { - id = cpu_map__get_node(map, i, NULL); + for (i = 0; i < perf_cpu_map__nr(map); i++) { + id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", - cpu__get_node(map->map[i]) == id.node); + cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Node map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1); } perf_session__delete(session); diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh index 3109d7b05e11..3d278785fe57 100755 --- a/tools/perf/trace/beauty/prctl_option.sh +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -4,7 +4,7 @@ [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *prctl_options[] = {\n" -regex='^#define[[:space:]]+PR_(\w+)[[:space:]]*([[:xdigit:]]+).*' +regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$' egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ sed -r "s/$regex/\2 \1/g" | \ sort -n | xargs printf "\t[%s] = \"%s\",\n" diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index e81c2493efdf..44ba900828f6 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -966,6 +966,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, .opts = opts, }; int ret = -1, err; + int not_annotated = list_empty(¬es->src->source); if (sym == NULL) return -1; @@ -973,13 +974,15 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, if (ms->map->dso->annotate_warned) return -1; - err = symbol__annotate2(ms, evsel, opts, &browser.arch); - if (err) { - char msg[BUFSIZ]; - ms->map->dso->annotate_warned = true; - symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); - ui__error("Couldn't annotate %s:\n%s", sym->name, msg); - goto out_free_offsets; + if (not_annotated) { + err = symbol__annotate2(ms, evsel, opts, &browser.arch); + if (err) { + char msg[BUFSIZ]; + ms->map->dso->annotate_warned = true; + symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); + goto out_free_offsets; + } } ui_helpline__push("Press ESC to exit"); @@ -994,9 +997,11 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, ret = annotate_browser__run(&browser, evsel, hbt); - annotated_source__purge(notes->src); + if(not_annotated) + annotated_source__purge(notes->src); out_free_offsets: - zfree(¬es->offsets); + if(not_annotated) + zfree(¬es->offsets); return ret; } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2e5bfbb69960..2a403cefcaf2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,3 +1,4 @@ +perf-y += arm64-frame-pointer-unwind-support.o perf-y += annotate.o perf-y += block-info.o perf-y += block-range.o @@ -144,6 +145,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 7b12bd7a3080..4d216c0dc425 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -11,7 +11,7 @@ static int get_cpu_set_size(void) { - int sz = cpu__max_cpu() + 8 - 1; + int sz = cpu__max_cpu().cpu + 8 - 1; /* * sched_getaffinity doesn't like masks smaller than the kernel. * Hopefully that's big enough. @@ -62,7 +62,7 @@ void affinity__set(struct affinity *a, int cpu) clear_bit(cpu, a->sched_cpus); } -void affinity__cleanup(struct affinity *a) +static void __affinity__cleanup(struct affinity *a) { int cpu_set_size = get_cpu_set_size(); @@ -71,3 +71,9 @@ void affinity__cleanup(struct affinity *a) zfree(&a->sched_cpus); zfree(&a->orig_cpus); } + +void affinity__cleanup(struct affinity *a) +{ + if (a != NULL) + __affinity__cleanup(a); +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 01900689dc00..8190a124b99d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2036,6 +2036,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) memset(&objdump_process, 0, sizeof(objdump_process)); objdump_process.argv = objdump_argv; objdump_process.out = -1; + objdump_process.err = -1; if (start_command(&objdump_process)) { pr_err("Failure starting to run %s\n", command); err = -1; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 3fc528c9270c..5e390a1a79ab 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -179,6 +179,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.phys_addr = ip; break; case ARM_SPE_COUNTER: + if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT) + decoder->record.latency = payload; break; case ARM_SPE_CONTEXT: decoder->record.context_id = payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 46a8556a9e95..69b31084d6be 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -33,6 +33,7 @@ struct arm_spe_record { enum arm_spe_sample_type type; int err; u32 op; + u32 latency; u64 from_ip; u64 to_ip; u64 timestamp; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index fccac06b573a..d2b64e3f588b 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -58,6 +58,8 @@ struct arm_spe { u8 sample_branch; u8 sample_remote_access; u8 sample_memory; + u8 sample_instructions; + u64 instructions_sample_period; u64 l1d_miss_id; u64 l1d_access_id; @@ -68,6 +70,7 @@ struct arm_spe { u64 branch_miss_id; u64 remote_access_id; u64 memory_id; + u64 instructions_id; u64 kernel_start; @@ -90,6 +93,7 @@ struct arm_spe_queue { u64 time; u64 timestamp; struct thread *thread; + u64 period_instructions; }; static void arm_spe_dump(struct arm_spe *spe __maybe_unused, @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, speq->pid = -1; speq->tid = -1; speq->cpu = -1; + speq->period_instructions = 0; /* params set */ params.get_trace = arm_spe_get_trace; @@ -330,6 +335,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, sample.addr = record->virt_addr; sample.phys_addr = record->phys_addr; sample.data_src = data_src; + sample.weight = record->latency; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -347,6 +353,36 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; sample.addr = record->to_ip; + sample.weight = record->latency; + + return arm_spe_deliver_synth_event(spe, speq, event, &sample); +} + +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, + u64 spe_events_id, u64 data_src) +{ + struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; + union perf_event *event = speq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + /* + * Handles perf instruction sampling period. + */ + speq->period_instructions++; + if (speq->period_instructions < spe->instructions_sample_period) + return 0; + speq->period_instructions = 0; + + arm_spe_prep_sample(spe, speq, event, &sample); + + sample.id = spe_events_id; + sample.stream_id = spe_events_id; + sample.addr = record->virt_addr; + sample.phys_addr = record->phys_addr; + sample.data_src = data_src; + sample.period = spe->instructions_sample_period; + sample.weight = record->latency; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -480,6 +516,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } + if (spe->sample_instructions) { + err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); + if (err) + return err; + } + return 0; } @@ -993,7 +1035,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.type = PERF_TYPE_HARDWARE; attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | - PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC; + PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | + PERF_SAMPLE_WEIGHT; if (spe->timeless_decoding) attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; else @@ -1107,7 +1150,29 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) return err; spe->memory_id = id; arm_spe_set_event_name(evlist, id, "memory"); + id += 1; + } + + if (spe->synth_opts.instructions) { + if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { + pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); + goto synth_instructions_out; + } + if (spe->synth_opts.period > 1) + pr_warning("Arm SPE has a hardware-based sample period.\n" + "Additional instruction events will be discarded by --itrace\n"); + + spe->sample_instructions = true; + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = spe->synth_opts.period; + spe->instructions_sample_period = attr.sample_period; + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->instructions_id = id; + arm_spe_set_event_name(evlist, id, "instructions"); } +synth_instructions_out: return 0; } diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c new file mode 100644 index 000000000000..2242a885fbd7 --- /dev/null +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "arm64-frame-pointer-unwind-support.h" +#include "callchain.h" +#include "event.h" +#include "perf_regs.h" // SMPL_REG_MASK +#include "unwind.h" + +#define perf_event_arm_regs perf_event_arm64_regs +#include "../../arch/arm64/include/uapi/asm/perf_regs.h" +#undef perf_event_arm_regs + +struct entries { + u64 stack[2]; + size_t length; +}; + +static bool get_leaf_frame_caller_enabled(struct perf_sample *sample) +{ + return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs + && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR); +} + +static int add_entry(struct unwind_entry *entry, void *arg) +{ + struct entries *entries = arg; + + entries->stack[entries->length++] = entry->ip; + return 0; +} + +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx) +{ + int ret; + struct entries entries = {}; + struct regs_dump old_regs = sample->user_regs; + + if (!get_leaf_frame_caller_enabled(sample)) + return 0; + + /* + * If PC and SP are not recorded, get the value of PC from the stack + * and set its mask. SP is not used when doing the unwinding but it + * still needs to be set to prevent failures. + */ + + if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) { + sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC); + sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1]; + } + + if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) { + sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP); + sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; + } + + ret = unwind__get_entries(add_entry, &entries, thread, sample, 2); + sample->user_regs = old_regs; + + if (ret || entries.length != 2) + return ret; + + return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1]; +} diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h new file mode 100644 index 000000000000..32af9ce94398 --- /dev/null +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H +#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H + +#include "event.h" +#include "thread.h" + +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx); + +#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c679394b898d..825336304a37 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -123,7 +123,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, mm->prev = 0; mm->idx = mp->idx; mm->tid = mp->tid; - mm->cpu = mp->cpu; + mm->cpu = mp->cpu.cpu; if (!mp->len) { mm->base = NULL; @@ -174,13 +174,13 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, mp->idx = idx; if (per_cpu) { - mp->cpu = evlist->core.cpus->map[idx]; + mp->cpu = perf_cpu_map__cpu(evlist->core.cpus, idx); if (evlist->core.threads) mp->tid = perf_thread_map__pid(evlist->core.threads, 0); else mp->tid = -1; } else { - mp->cpu = -1; + mp->cpu.cpu = -1; mp->tid = perf_thread_map__pid(evlist->core.threads, idx); } } @@ -292,7 +292,7 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues, if (!queue->set) { queue->set = true; queue->tid = buffer->tid; - queue->cpu = buffer->cpu; + queue->cpu = buffer->cpu.cpu; } buffer->buffer_nr = queues->next_buffer_nr++; @@ -339,11 +339,11 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues, return 0; } -static bool filter_cpu(struct perf_session *session, int cpu) +static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu) { unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap; - return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap); + return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap); } static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, @@ -399,7 +399,7 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct auxtrace_buffer buffer = { .pid = -1, .tid = event->auxtrace.tid, - .cpu = event->auxtrace.cpu, + .cpu = { event->auxtrace.cpu }, .data_offset = data_offset, .offset = event->auxtrace.offset, .reference = event->auxtrace.reference, diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index bbf0d78c6401..19910b9011f3 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -15,6 +15,7 @@ #include <linux/list.h> #include <linux/perf_event.h> #include <linux/types.h> +#include <internal/cpumap.h> #include <asm/bitsperlong.h> #include <asm/barrier.h> @@ -240,7 +241,7 @@ struct auxtrace_buffer { size_t size; pid_t pid; pid_t tid; - int cpu; + struct perf_cpu cpu; void *data; off_t data_offset; void *mmap_addr; @@ -350,7 +351,7 @@ struct auxtrace_mmap_params { int prot; int idx; pid_t tid; - int cpu; + struct perf_cpu cpu; }; /** diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 528aeb0ab79d..16ec605a9fe4 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -424,7 +424,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n, size_t prologue_cnt = 0; int i, err; - if (IS_ERR(priv) || !priv || priv->is_tp) + if (IS_ERR_OR_NULL(priv) || priv->is_tp) goto errout; pev = &priv->pev; @@ -573,7 +573,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) bool need_prologue = false; int err, i; - if (IS_ERR(priv) || !priv) { + if (IS_ERR_OR_NULL(priv)) { pr_debug("Internal error when hook preprocessor\n"); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -645,8 +645,11 @@ int bpf__probe(struct bpf_object *obj) goto out; priv = bpf_program__priv(prog); - if (IS_ERR(priv) || !priv) { - err = PTR_ERR(priv); + if (IS_ERR_OR_NULL(priv)) { + if (!priv) + err = -BPF_LOADER_ERRNO__INTERNAL; + else + err = PTR_ERR(priv); goto out; } @@ -696,7 +699,7 @@ int bpf__unprobe(struct bpf_object *obj) struct bpf_prog_priv *priv = bpf_program__priv(prog); int i; - if (IS_ERR(priv) || !priv || priv->is_tp) + if (IS_ERR_OR_NULL(priv) || priv->is_tp) continue; for (i = 0; i < priv->pev.ntevs; i++) { @@ -754,7 +757,7 @@ int bpf__foreach_event(struct bpf_object *obj, struct perf_probe_event *pev; int i, fd; - if (IS_ERR(priv) || !priv) { + if (IS_ERR_OR_NULL(priv)) { pr_debug("bpf: failed to get private field\n"); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -1217,9 +1220,10 @@ bpf__obj_config_map(struct bpf_object *obj, pr_debug("ERROR: Invalid map config option '%s'\n", map_opt); err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT; out: - free(map_name); if (!err) *key_scan_pos += strlen(map_opt); + + free(map_name); return err; } diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 5a97fd7d0a71..3ce8d03cb7ec 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -265,7 +265,7 @@ static int bpf_program_profiler__read(struct evsel *evsel) return 0; } -static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, +static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { struct bpf_prog_profiler_bpf *skel; @@ -277,7 +277,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, assert(skel != NULL); ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu, &fd, BPF_ANY); + &cpu_map_idx, &fd, BPF_ANY); if (ret) return ret; } @@ -554,7 +554,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) filter_type == BPERF_FILTER_TGID) key = evsel->core.threads->map[i].pid; else if (filter_type == BPERF_FILTER_CPU) - key = evsel->core.cpus->map[i]; + key = evsel->core.cpus->map[i].cpu; else break; @@ -580,12 +580,12 @@ out: return err; } -static int bperf__install_pe(struct evsel *evsel, int cpu, int fd) +static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { struct bperf_leader_bpf *skel = evsel->leader_skel; return bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu, &fd, BPF_ANY); + &cpu_map_idx, &fd, BPF_ANY); } /* @@ -598,7 +598,7 @@ static int bperf_sync_counters(struct evsel *evsel) num_cpu = all_cpu_map->nr; for (i = 0; i < num_cpu; i++) { - cpu = all_cpu_map->map[i]; + cpu = all_cpu_map->map[i].cpu; bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); } return 0; @@ -619,15 +619,17 @@ static int bperf__disable(struct evsel *evsel) static int bperf__read(struct evsel *evsel) { struct bperf_follower_bpf *skel = evsel->follower_skel; - __u32 num_cpu_bpf = cpu__max_cpu(); + __u32 num_cpu_bpf = cpu__max_cpu().cpu; struct bpf_perf_event_value values[num_cpu_bpf]; int reading_map_fd, err = 0; - __u32 i, j, num_cpu; + __u32 i; + int j; bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + struct perf_cpu entry; __u32 cpu; err = bpf_map_lookup_elem(reading_map_fd, &i, values); @@ -637,16 +639,15 @@ static int bperf__read(struct evsel *evsel) case BPERF_FILTER_GLOBAL: assert(i == 0); - num_cpu = all_cpu_map->nr; - for (j = 0; j < num_cpu; j++) { - cpu = all_cpu_map->map[j]; + perf_cpu_map__for_each_cpu(entry, j, all_cpu_map) { + cpu = entry.cpu; perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; } break; case BPERF_FILTER_CPU: - cpu = evsel->core.cpus->map[i]; + cpu = evsel->core.cpus->map[i].cpu; perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; perf_counts(evsel->counts, i, 0)->run = values[cpu].running; @@ -771,11 +772,11 @@ static inline bool bpf_counter_skip(struct evsel *evsel) evsel->follower_skel == NULL; } -int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) +int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { if (bpf_counter_skip(evsel)) return 0; - return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); + return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd); } int bpf_counter__load(struct evsel *evsel, struct target *target) diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 65ebaa6694fb..4dbf26408b69 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -16,7 +16,7 @@ typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, struct target *target); typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, - int cpu, + int cpu_map_idx, int fd); struct bpf_counter_ops { @@ -40,7 +40,7 @@ int bpf_counter__enable(struct evsel *evsel); int bpf_counter__disable(struct evsel *evsel); int bpf_counter__read(struct evsel *evsel); void bpf_counter__destroy(struct evsel *evsel); -int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); +int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd); #else /* HAVE_BPF_SKEL */ diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index cbc6c2bca488..ac60c08e8e2a 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -48,7 +48,7 @@ static int bperf_load_program(struct evlist *evlist) struct cgroup *cgrp, *leader_cgrp; __u32 i, cpu; __u32 nr_cpus = evlist->core.all_cpus->nr; - int total_cpus = cpu__max_cpu(); + int total_cpus = cpu__max_cpu().cpu; int map_size, map_fd; int prog_fd, err; @@ -125,7 +125,7 @@ static int bperf_load_program(struct evlist *evlist) for (cpu = 0; cpu < nr_cpus; cpu++) { int fd = FD(evsel, cpu); __u32 idx = evsel->core.idx * total_cpus + - evlist->core.all_cpus->map[cpu]; + evlist->core.all_cpus->map[cpu].cpu; err = bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); @@ -212,7 +212,7 @@ static int bperf_cgrp__sync_counters(struct evlist *evlist) int prog_fd = bpf_program__fd(skel->progs.trigger_read); for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i]; + cpu = evlist->core.all_cpus->map[i].cpu; bperf_trigger_reading(prog_fd, cpu); } @@ -245,7 +245,7 @@ static int bperf_cgrp__read(struct evsel *evsel) { struct evlist *evlist = evsel->evlist; int i, cpu, nr_cpus = evlist->core.all_cpus->nr; - int total_cpus = cpu__max_cpu(); + int total_cpus = cpu__max_cpu().cpu; struct perf_counts_values *counts; struct bpf_perf_event_value *values; int reading_map_fd, err = 0; @@ -266,13 +266,13 @@ static int bperf_cgrp__read(struct evsel *evsel) idx = evsel->core.idx; err = bpf_map_lookup_elem(reading_map_fd, &idx, values); if (err) { - pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n", + pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n", idx, evsel__name(evsel), evsel->cgrp->name); goto out; } for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i]; + cpu = evlist->core.all_cpus->map[i].cpu; counts = perf_counts(evsel->counts, i, 0); counts->val = values[cpu].counter; diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c new file mode 100644 index 000000000000..d756cc66eef3 --- /dev/null +++ b/tools/perf/util/bpf_ftrace.c @@ -0,0 +1,152 @@ +#include <stdio.h> +#include <fcntl.h> +#include <stdint.h> +#include <stdlib.h> + +#include <linux/err.h> + +#include "util/ftrace.h" +#include "util/cpumap.h" +#include "util/thread_map.h" +#include "util/debug.h" +#include "util/evlist.h" +#include "util/bpf_counter.h" + +#include "util/bpf_skel/func_latency.skel.h" + +static struct func_latency_bpf *skel; + +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) +{ + int fd, err; + int i, ncpus = 1, ntasks = 1; + struct filter_entry *func; + + if (!list_is_singular(&ftrace->filters)) { + pr_err("ERROR: %s target function(s).\n", + list_empty(&ftrace->filters) ? "No" : "Too many"); + return -1; + } + + func = list_first_entry(&ftrace->filters, struct filter_entry, list); + + skel = func_latency_bpf__open(); + if (!skel) { + pr_err("Failed to open func latency skeleton\n"); + return -1; + } + + /* don't need to set cpu filter for system-wide mode */ + if (ftrace->target.cpu_list) { + ncpus = perf_cpu_map__nr(ftrace->evlist->core.cpus); + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + } + + if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { + ntasks = perf_thread_map__nr(ftrace->evlist->core.threads); + bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + } + + set_max_rlimit(); + + err = func_latency_bpf__load(skel); + if (err) { + pr_err("Failed to load func latency skeleton\n"); + goto out; + } + + if (ftrace->target.cpu_list) { + u32 cpu; + u8 val = 1; + + skel->bss->has_cpu = 1; + fd = bpf_map__fd(skel->maps.cpu_filter); + + for (i = 0; i < ncpus; i++) { + cpu = perf_cpu_map__cpu(ftrace->evlist->core.cpus, i).cpu; + bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); + } + } + + if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + + for (i = 0; i < ntasks; i++) { + pid = perf_thread_map__pid(ftrace->evlist->core.threads, i); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + } + + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, + false, func->name); + if (IS_ERR(skel->links.func_begin)) { + pr_err("Failed to attach fentry program\n"); + err = PTR_ERR(skel->links.func_begin); + goto out; + } + + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, + true, func->name); + if (IS_ERR(skel->links.func_end)) { + pr_err("Failed to attach fexit program\n"); + err = PTR_ERR(skel->links.func_end); + goto out; + } + + /* XXX: we don't actually use this fd - just for poll() */ + return open("/dev/null", O_RDONLY); + +out: + return err; +} + +int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + skel->bss->enabled = 1; + return 0; +} + +int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + skel->bss->enabled = 0; + return 0; +} + +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, + int buckets[]) +{ + int i, fd, err; + u32 idx; + u64 *hist; + int ncpus = cpu__max_cpu().cpu; + + fd = bpf_map__fd(skel->maps.latency); + + hist = calloc(ncpus, sizeof(*hist)); + if (hist == NULL) + return -ENOMEM; + + for (idx = 0; idx < NUM_BUCKET; idx++) { + err = bpf_map_lookup_elem(fd, &idx, hist); + if (err) { + buckets[idx] = 0; + continue; + } + + for (i = 0; i < ncpus; i++) + buckets[idx] += hist[i]; + } + + free(hist); + return 0; +} + +int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + func_latency_bpf__destroy(skel); + return 0; +} diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c new file mode 100644 index 000000000000..ea94187fe443 --- /dev/null +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Google +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +// This should be in sync with "util/ftrace.h" +#define NUM_BUCKET 22 + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 10000); +} functime SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cpu_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} task_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, NUM_BUCKET); +} latency SEC(".maps"); + + +int enabled = 0; +int has_cpu = 0; +int has_task = 0; + +SEC("kprobe/func") +int BPF_PROG(func_begin) +{ + __u64 key, now; + + if (!enabled) + return 0; + + key = bpf_get_current_pid_tgid(); + + if (has_cpu) { + __u32 cpu = bpf_get_smp_processor_id(); + __u8 *ok; + + ok = bpf_map_lookup_elem(&cpu_filter, &cpu); + if (!ok) + return 0; + } + + if (has_task) { + __u32 pid = key & 0xffffffff; + __u8 *ok; + + ok = bpf_map_lookup_elem(&task_filter, &pid); + if (!ok) + return 0; + } + + now = bpf_ktime_get_ns(); + + // overwrite timestamp for nested functions + bpf_map_update_elem(&functime, &key, &now, BPF_ANY); + return 0; +} + +SEC("kretprobe/func") +int BPF_PROG(func_end) +{ + __u64 tid; + __u64 *start; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + + start = bpf_map_lookup_elem(&functime, &tid); + if (start) { + __s64 delta = bpf_ktime_get_ns() - *start; + __u32 key; + __u64 *hist; + + bpf_map_delete_elem(&functime, &tid); + + if (delta < 0) + return 0; + + // calculate index using delta in usec + for (key = 0; key < (NUM_BUCKET - 1); key++) { + if (delta < ((1000UL) << key)) + break; + } + + hist = bpf_map_lookup_elem(&latency, &key); + if (!hist) + return 0; + + *hist += 1; + } + + return 0; +} diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index 2285b1eb3128..a9a909db8cc7 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -49,7 +49,9 @@ const char *branch_type_name(int type) "SYSCALL", "SYSRET", "COND_CALL", - "COND_RET" + "COND_RET", + "ERET", + "IRQ" }; if (type >= 0 && type < PERF_BR_MAX) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 8e2777133bd9..131207b91d15 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor) map__zput(node->ms.map); } -void callchain_param_setup(u64 sample_type) +void callchain_param_setup(u64 sample_type, const char *arch) { if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && @@ -1612,6 +1612,18 @@ void callchain_param_setup(u64 sample_type) else callchain_param.record_mode = CALLCHAIN_FP; } + + /* + * It's necessary to use libunwind to reliably determine the caller of + * a leaf function on aarch64, as otherwise we cannot know whether to + * start from the LR or FP. + * + * Always starting from the LR can result in duplicate or entirely + * erroneous entries. Always skipping the LR and starting from the FP + * can result in missing entries. + */ + if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64")) + dwarf_callchain_users = true; } static bool chain_match(struct callchain_list *base_chain, diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 5824134f983b..d95615daed73 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, } #endif +void arch__add_leaf_frame_record_opts(struct record_opts *opts); + char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso); char *callchain_node__scnprintf_value(struct callchain_node *node, @@ -298,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root, u64 *branch_count, u64 *predicted_count, u64 *abort_count, u64 *cycles_count); -void callchain_param_setup(u64 sample_type); +void callchain_param_setup(u64 sample_type, const char *arch); bool callchain_cnode_matched(struct callchain_node *base_cnode, struct callchain_node *pair_cnode); diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 582f3aeaf5e4..7a447d918458 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -4,6 +4,7 @@ #include <string.h> #include "evsel.h" #include "counts.h" +#include <perf/threadmap.h> #include <linux/zalloc.h> struct perf_counts *perf_counts__new(int ncpus, int nthreads) @@ -55,9 +56,12 @@ void evsel__reset_counts(struct evsel *evsel) perf_counts__reset(evsel->counts); } -int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads) +int evsel__alloc_counts(struct evsel *evsel) { - evsel->counts = perf_counts__new(ncpus, nthreads); + struct perf_cpu_map *cpus = evsel__cpus(evsel); + int nthreads = perf_thread_map__nr(evsel->core.threads); + + evsel->counts = perf_counts__new(perf_cpu_map__nr(cpus), nthreads); return evsel->counts != NULL ? 0 : -ENOMEM; } diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 7ff36bf6d644..5de275194f2b 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -18,21 +18,21 @@ struct perf_counts { static inline struct perf_counts_values* -perf_counts(struct perf_counts *counts, int cpu, int thread) +perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread) { - return xyarray__entry(counts->values, cpu, thread); + return xyarray__entry(counts->values, cpu_map_idx, thread); } static inline bool -perf_counts__is_loaded(struct perf_counts *counts, int cpu, int thread) +perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread) { - return *((bool *) xyarray__entry(counts->loaded, cpu, thread)); + return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)); } static inline void -perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool loaded) +perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded) { - *((bool *) xyarray__entry(counts->loaded, cpu, thread)) = loaded; + *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded; } struct perf_counts *perf_counts__new(int ncpus, int nthreads); @@ -40,7 +40,7 @@ void perf_counts__delete(struct perf_counts *counts); void perf_counts__reset(struct perf_counts *counts); void evsel__reset_counts(struct evsel *evsel); -int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads); +int evsel__alloc_counts(struct evsel *evsel); void evsel__free_counts(struct evsel *evsel); #endif /* __PERF_COUNTS_H */ diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 87d3eca9b872..12b2243222b0 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -13,9 +13,13 @@ #include <linux/ctype.h> #include <linux/zalloc.h> -static int max_cpu_num; -static int max_present_cpu_num; +static struct perf_cpu max_cpu_num; +static struct perf_cpu max_present_cpu_num; static int max_node_num; +/** + * The numa node X as read from /sys/devices/system/node/nodeX indexed by the + * CPU number. + */ static int *cpunode_map; static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) @@ -33,9 +37,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) * otherwise it would become 65535. */ if (cpus->cpu[i] == (u16) -1) - map->map[i] = -1; + map->map[i].cpu = -1; else - map->map[i] = (int) cpus->cpu[i]; + map->map[i].cpu = (int) cpus->cpu[i]; } } @@ -54,7 +58,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map int cpu, i = 0; for_each_set_bit(cpu, mask->mask, nbits) - map->map[i++] = cpu; + map->map[i++].cpu = cpu; } return map; @@ -87,7 +91,7 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = -1; + cpus->map[i].cpu = -1; refcount_set(&cpus->refcnt, 1); } @@ -104,7 +108,7 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = cpu_map__empty_aggr_cpu_id(); + cpus->map[i] = aggr_cpu_id__empty(); refcount_set(&cpus->refcnt, 1); } @@ -122,28 +126,21 @@ static int cpu__get_topology_int(int cpu, const char *name, int *value) return sysfs__read_int(path, value); } -int cpu_map__get_socket_id(int cpu) +int cpu__get_socket_id(struct perf_cpu cpu) { - int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value); + int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value); return ret ?: value; } -struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, - void *data __maybe_unused) +struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused) { - int cpu; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (idx > map->nr) - return id; - - cpu = map->map[idx]; - - id.socket = cpu_map__get_socket_id(cpu); + id.socket = cpu__get_socket_id(cpu); return id; } -static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) +static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) { struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; @@ -160,57 +157,64 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) return a->thread - b->thread; } -int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, - struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), - void *data) +struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, + aggr_cpu_id_get_t get_id, + void *data) { - int nr = cpus->nr; - struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr); - int cpu, s2; - struct aggr_cpu_id s1; + int idx; + struct perf_cpu cpu; + struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr); if (!c) - return -1; + return NULL; /* Reset size as it may only be partially filled */ c->nr = 0; - for (cpu = 0; cpu < nr; cpu++) { - s1 = f(cpus, cpu, data); - for (s2 = 0; s2 < c->nr; s2++) { - if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2])) + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + bool duplicate = false; + struct aggr_cpu_id cpu_id = get_id(cpu, data); + + for (int j = 0; j < c->nr; j++) { + if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) { + duplicate = true; break; + } } - if (s2 == c->nr) { - c->map[c->nr] = s1; + if (!duplicate) { + c->map[c->nr] = cpu_id; c->nr++; } } + /* Trim. */ + if (c->nr != cpus->nr) { + struct cpu_aggr_map *trimmed_c = + realloc(c, + sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr); + + if (trimmed_c) + c = trimmed_c; + } /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id); + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); + + return c; - *res = c; - return 0; } -int cpu_map__get_die_id(int cpu) +int cpu__get_die_id(struct perf_cpu cpu) { - int value, ret = cpu__get_topology_int(cpu, "die_id", &value); + int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value); return ret ?: value; } -struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) { - int cpu, die; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id; + int die; - if (idx > map->nr) - return id; - - cpu = map->map[idx]; - - die = cpu_map__get_die_id(cpu); + die = cpu__get_die_id(cpu); /* There is no die_id on legacy system. */ if (die == -1) die = 0; @@ -220,79 +224,59 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat * with the socket ID and then add die to * make a unique ID. */ - id = cpu_map__get_socket(map, idx, data); - if (cpu_map__aggr_cpu_id_is_empty(id)) + id = aggr_cpu_id__socket(cpu, data); + if (aggr_cpu_id__is_empty(&id)) return id; id.die = die; return id; } -int cpu_map__get_core_id(int cpu) +int cpu__get_core_id(struct perf_cpu cpu) { - int value, ret = cpu__get_topology_int(cpu, "core_id", &value); + int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value); return ret ?: value; } -int cpu_map__get_node_id(int cpu) -{ - return cpu__get_node(cpu); -} - -struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data) { - int cpu; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - - if (idx > map->nr) - return id; + struct aggr_cpu_id id; + int core = cpu__get_core_id(cpu); - cpu = map->map[idx]; - - cpu = cpu_map__get_core_id(cpu); - - /* cpu_map__get_die returns a struct with socket and die set*/ - id = cpu_map__get_die(map, idx, data); - if (cpu_map__aggr_cpu_id_is_empty(id)) + /* aggr_cpu_id__die returns a struct with socket and die set. */ + id = aggr_cpu_id__die(cpu, data); + if (aggr_cpu_id__is_empty(&id)) return id; /* * core_id is relative to socket and die, we need a global id. * So we combine the result from cpu_map__get_die with the core id */ - id.core = cpu; + id.core = core; return id; + } -struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) +struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data) { - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id; - if (idx < 0 || idx >= map->nr) + /* aggr_cpu_id__core returns a struct with socket, die and core set. */ + id = aggr_cpu_id__core(cpu, data); + if (aggr_cpu_id__is_empty(&id)) return id; - id.node = cpu_map__get_node_id(map->map[idx]); + id.cpu = cpu; return id; -} -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) -{ - return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); } -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep) +struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused) { - return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); -} - -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep) -{ - return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); -} + struct aggr_cpu_id id = aggr_cpu_id__empty(); -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap) -{ - return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL); + id.node = cpu__get_node(cpu); + return id; } /* setup simple routines to easily access node numbers given a cpu number */ @@ -335,8 +319,8 @@ static void set_max_cpu_num(void) int ret = -1; /* set up default */ - max_cpu_num = 4096; - max_present_cpu_num = 4096; + max_cpu_num.cpu = 4096; + max_present_cpu_num.cpu = 4096; mnt = sysfs__mountpoint(); if (!mnt) @@ -349,7 +333,7 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_cpu_num); + ret = get_max_num(path, &max_cpu_num.cpu); if (ret) goto out; @@ -360,11 +344,11 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_present_cpu_num); + ret = get_max_num(path, &max_present_cpu_num.cpu); out: if (ret) - pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num); + pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu); } /* Determine highest possible node in the system for sparse allocation */ @@ -403,31 +387,31 @@ int cpu__max_node(void) return max_node_num; } -int cpu__max_cpu(void) +struct perf_cpu cpu__max_cpu(void) { - if (unlikely(!max_cpu_num)) + if (unlikely(!max_cpu_num.cpu)) set_max_cpu_num(); return max_cpu_num; } -int cpu__max_present_cpu(void) +struct perf_cpu cpu__max_present_cpu(void) { - if (unlikely(!max_present_cpu_num)) + if (unlikely(!max_present_cpu_num.cpu)) set_max_cpu_num(); return max_present_cpu_num; } -int cpu__get_node(int cpu) +int cpu__get_node(struct perf_cpu cpu) { if (unlikely(cpunode_map == NULL)) { pr_debug("cpu_map not initialized\n"); return -1; } - return cpunode_map[cpu]; + return cpunode_map[cpu.cpu]; } static int init_cpunode_map(void) @@ -437,13 +421,13 @@ static int init_cpunode_map(void) set_max_cpu_num(); set_max_node_num(); - cpunode_map = calloc(max_cpu_num, sizeof(int)); + cpunode_map = calloc(max_cpu_num.cpu, sizeof(int)); if (!cpunode_map) { pr_err("%s: calloc failed\n", __func__); return -1; } - for (i = 0; i < max_cpu_num; i++) + for (i = 0; i < max_cpu_num.cpu; i++) cpunode_map[i] = -1; return 0; @@ -502,47 +486,39 @@ int cpu__setup_cpunode_map(void) return 0; } -bool cpu_map__has(struct perf_cpu_map *cpus, int cpu) -{ - return perf_cpu_map__idx(cpus, cpu) != -1; -} - -int cpu_map__cpu(struct perf_cpu_map *cpus, int idx) -{ - return cpus->map[idx]; -} - size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) { - int i, cpu, start = -1; + int i, start = -1; bool first = true; size_t ret = 0; #define COMMA first ? "" : "," for (i = 0; i < map->nr + 1; i++) { + struct perf_cpu cpu = { .cpu = INT_MAX }; bool last = i == map->nr; - cpu = last ? INT_MAX : map->map[i]; + if (!last) + cpu = map->map[i]; if (start == -1) { start = i; if (last) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[i]); + map->map[i].cpu); } - } else if (((i - start) != (cpu - map->map[start])) || last) { + } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) { int end = i - 1; if (start == end) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[start]); + map->map[start].cpu); } else { ret += snprintf(buf + ret, size - ret, "%s%d-%d", COMMA, - map->map[start], map->map[end]); + map->map[start].cpu, map->map[end].cpu); } first = false; start = i; @@ -569,23 +545,23 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) int i, cpu; char *ptr = buf; unsigned char *bitmap; - int last_cpu = cpu_map__cpu(map, map->nr - 1); + struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1); if (buf == NULL) return 0; - bitmap = zalloc(last_cpu / 8 + 1); + bitmap = zalloc(last_cpu.cpu / 8 + 1); if (bitmap == NULL) { buf[0] = '\0'; return 0; } for (i = 0; i < map->nr; i++) { - cpu = cpu_map__cpu(map, i); + cpu = perf_cpu_map__cpu(map, i).cpu; bitmap[cpu / 8] |= 1 << (cpu % 8); } - for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) { + for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) { unsigned char bits = bitmap[cpu / 8]; if (cpu % 8) @@ -614,32 +590,35 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ return online; } -bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) +bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b) { - return a.thread == b.thread && - a.node == b.node && - a.socket == b.socket && - a.die == b.die && - a.core == b.core; + return a->thread == b->thread && + a->node == b->node && + a->socket == b->socket && + a->die == b->die && + a->core == b->core && + a->cpu.cpu == b->cpu.cpu; } -bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) +bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) { - return a.thread == -1 && - a.node == -1 && - a.socket == -1 && - a.die == -1 && - a.core == -1; + return a->thread == -1 && + a->node == -1 && + a->socket == -1 && + a->die == -1 && + a->core == -1 && + a->cpu.cpu == -1; } -struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) +struct aggr_cpu_id aggr_cpu_id__empty(void) { struct aggr_cpu_id ret = { .thread = -1, .node = -1, .socket = -1, .die = -1, - .core = -1 + .core = -1, + .cpu = (struct perf_cpu){ .cpu = -1 }, }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index a27eeaf086e8..703ae6d3386e 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -2,71 +2,134 @@ #ifndef __PERF_CPUMAP_H #define __PERF_CPUMAP_H -#include <stdio.h> #include <stdbool.h> +#include <stdio.h> #include <internal/cpumap.h> #include <perf/cpumap.h> +/** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { + /** A value in the range 0 to number of threads. */ int thread; + /** The numa node X as read from /sys/devices/system/node/nodeX. */ int node; + /** + * The socket number as read from + * /sys/devices/system/cpu/cpuX/topology/physical_package_id. + */ int socket; + /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */ int die; + /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */ int core; + /** CPU aggregation, note there is one CPU for each SMT thread. */ + struct perf_cpu cpu; }; +/** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */ struct cpu_aggr_map { refcount_t refcnt; + /** Number of valid entries. */ int nr; + /** The entries. */ struct aggr_cpu_id map[]; }; struct perf_record_cpu_map_data; struct perf_cpu_map *perf_cpu_map__empty_new(int nr); -struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); -int cpu_map__get_socket_id(int cpu); -struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__get_die_id(int cpu); -struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__get_core_id(int cpu); -struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__get_node_id(int cpu); -struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep); const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ -static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) +int cpu__setup_cpunode_map(void); + +int cpu__max_node(void); +struct perf_cpu cpu__max_cpu(void); +struct perf_cpu cpu__max_present_cpu(void); + +/** + * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1. + */ +static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus) { - if (!sock || s > sock->nr || s < 0) - return 0; - return sock->map[s]; + return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1; } -int cpu__setup_cpunode_map(void); +/** + * cpu__get_node - Returns the numa node X as read from + * /sys/devices/system/node/nodeX for the given CPU. + */ +int cpu__get_node(struct perf_cpu cpu); +/** + * cpu__get_socket_id - Returns the socket number as read from + * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU. + */ +int cpu__get_socket_id(struct perf_cpu cpu); +/** + * cpu__get_die_id - Returns the die id as read from + * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU. + */ +int cpu__get_die_id(struct perf_cpu cpu); +/** + * cpu__get_core_id - Returns the core id as read from + * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU. + */ +int cpu__get_core_id(struct perf_cpu cpu); -int cpu__max_node(void); -int cpu__max_cpu(void); -int cpu__max_present_cpu(void); -int cpu__get_node(int cpu); +/** + * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry + * being empty. + */ +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); + +typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data); + +/** + * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in + * cpus. The aggr_cpu_id is created with 'get_id' that may have a data value + * passed to it. The cpu_aggr_map is sorted with duplicate values removed. + */ +struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, + aggr_cpu_id_get_t get_id, + void *data); -int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, - struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), - void *data); +bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); +bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a); +struct aggr_cpu_id aggr_cpu_id__empty(void); -int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); -bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); -bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b); -bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a); -struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void); +/** + * aggr_cpu_id__socket - Create an aggr_cpu_id with the socket populated with + * the socket for cpu. The function signature is compatible with + * aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data); +/** + * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated + * with the die and socket for cpu. The function signature is compatible with + * aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data); +/** + * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket + * populated with the core, die and socket for cpu. The function signature is + * compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data); +/** + * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket + * populated with the cpu, core, die and socket for cpu. The function signature + * is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data); +/** + * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for + * cpu. The function signature is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 51b429c86f98..d275d843c155 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -165,7 +165,8 @@ static bool has_die_topology(void) if (uname(&uts) < 0) return false; - if (strncmp(uts.machine, "x86_64", 6)) + if (strncmp(uts.machine, "x86_64", 6) && + strncmp(uts.machine, "s390x", 5)) return false; scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT, @@ -187,7 +188,7 @@ struct cpu_topology *cpu_topology__new(void) struct perf_cpu_map *map; bool has_die = has_die_topology(); - ncpus = cpu__max_present_cpu(); + ncpus = cpu__max_present_cpu().cpu; /* build online CPU map */ map = perf_cpu_map__new(NULL); @@ -218,7 +219,7 @@ struct cpu_topology *cpu_topology__new(void) tp->core_cpus_list = addr; for (i = 0; i < nr; i++) { - if (!cpu_map__has(map, i)) + if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i })) continue; ret = build_cpu_topology(tp, i); @@ -324,7 +325,7 @@ struct numa_topology *numa_topology__new(void) if (!node_map) goto out; - nr = (u32) node_map->nr; + nr = (u32) perf_cpu_map__nr(node_map); tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr); if (!tp) @@ -333,7 +334,7 @@ struct numa_topology *numa_topology__new(void) tp->nr = nr; for (i = 0; i < nr; i++) { - if (load_numa_node(&tp->nodes[i], node_map->map[i])) { + if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) { numa_topology__delete(tp); tp = NULL; break; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 4f672f7d008c..8b95fb3c4d7b 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -50,8 +50,6 @@ struct cs_etm_auxtrace { u8 timeless_decoding; u8 snapshot_mode; u8 data_queued; - u8 sample_branches; - u8 sample_instructions; int num_cpu; u64 latest_kernel_timestamp; @@ -410,8 +408,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, { struct cs_etm_packet *tmp; - if (etm->sample_branches || etm->synth_opts.last_branch || - etm->sample_instructions) { + if (etm->synth_opts.branches || etm->synth_opts.last_branch || + etm->synth_opts.instructions) { /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. @@ -1365,7 +1363,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; - etm->sample_branches = true; etm->branches_sample_type = attr.sample_type; etm->branches_id = id; id += 1; @@ -1389,7 +1386,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; - etm->sample_instructions = true; etm->instructions_sample_type = attr.sample_type; etm->instructions_id = id; id += 1; @@ -1420,7 +1416,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, tidq->prev_packet->last_instr_taken_branch) cs_etm__update_last_branch_rb(etmq, tidq); - if (etm->sample_instructions && + if (etm->synth_opts.instructions && tidq->period_instructions >= etm->instructions_sample_period) { /* * Emit instruction sample periodically @@ -1503,7 +1499,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, } } - if (etm->sample_branches) { + if (etm->synth_opts.branches) { bool generate_sample = false; /* Generate sample for tracing on packet */ @@ -1557,6 +1553,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, goto swap_packet; if (etmq->etm->synth_opts.last_branch && + etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr; @@ -1582,7 +1579,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, } - if (etm->sample_branches && + if (etm->synth_opts.branches && tidq->prev_packet->sample_type == CS_ETM_RANGE) { err = cs_etm__synth_branch_sample(etmq, tidq); if (err) @@ -1614,6 +1611,7 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq, * the trace. */ if (etmq->etm->synth_opts.last_branch && + etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 8f7705bbc2da..9e0aee276df8 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -318,6 +318,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, offset = tmp_val; len = offset >> 16; offset &= 0xffff; + if (flags & TEP_FIELD_IS_RELATIVE) + offset += fmtf->offset + fmtf->size; } if (flags & TEP_FIELD_IS_ARRAY) { diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index f5d260b1df4d..15a4547d608e 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -44,10 +44,6 @@ int perf_data__create_dir(struct perf_data *data, int nr) if (!files) return -ENOMEM; - data->dir.version = PERF_DIR_VERSION; - data->dir.files = files; - data->dir.nr = nr; - for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; @@ -62,6 +58,9 @@ int perf_data__create_dir(struct perf_data *data, int nr) file->fd = ret; } + data->dir.version = PERF_DIR_VERSION; + data->dir.files = files; + data->dir.nr = nr; return 0; out_err: diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2c06abf6dcd2..65e6c22f38e4 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -179,7 +179,7 @@ static int trace_event_printer(enum binary_printer_ops op, break; case BINARY_PRINT_CHAR_DATA: printed += color_fprintf(fp, color, "%c", - isprint(ch) ? ch : '.'); + isprint(ch) && isascii(ch) ? ch : '.'); break; case BINARY_PRINT_CHAR_PAD: printed += color_fprintf(fp, color, " "); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index b9904896eb97..579e44c59914 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -285,13 +285,13 @@ out_enomem: int perf_env__read_cpu_topology_map(struct perf_env *env) { - int cpu, nr_cpus; + int idx, nr_cpus; if (env->cpu != NULL) return 0; if (env->nr_cpus_avail == 0) - env->nr_cpus_avail = cpu__max_present_cpu(); + env->nr_cpus_avail = cpu__max_present_cpu().cpu; nr_cpus = env->nr_cpus_avail; if (nr_cpus == -1) @@ -301,10 +301,12 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) if (env->cpu == NULL) return -ENOMEM; - for (cpu = 0; cpu < nr_cpus; ++cpu) { - env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); - env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); - env->cpu[cpu].die_id = cpu_map__get_die_id(cpu); + for (idx = 0; idx < nr_cpus; ++idx) { + struct perf_cpu cpu = { .cpu = idx }; + + env->cpu[idx].core_id = cpu__get_core_id(cpu); + env->cpu[idx].socket_id = cpu__get_socket_id(cpu); + env->cpu[idx].die_id = cpu__get_die_id(cpu); } env->nr_cpus_avail = nr_cpus; @@ -381,7 +383,7 @@ static int perf_env__read_arch(struct perf_env *env) static int perf_env__read_nr_cpus_avail(struct perf_env *env) { if (env->nr_cpus_avail == 0) - env->nr_cpus_avail = cpu__max_present_cpu(); + env->nr_cpus_avail = cpu__max_present_cpu().cpu; return env->nr_cpus_avail ? 0 : -ENOENT; } @@ -487,7 +489,7 @@ const char *perf_env__pmu_mappings(struct perf_env *env) return env->pmu_mappings; } -int perf_env__numa_node(struct perf_env *env, int cpu) +int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) { if (!env->nr_numa_map) { struct numa_node *nn; @@ -495,7 +497,7 @@ int perf_env__numa_node(struct perf_env *env, int cpu) for (i = 0; i < env->nr_numa_nodes; i++) { nn = &env->numa_nodes[i]; - nr = max(nr, perf_cpu_map__max(nn->map)); + nr = max(nr, perf_cpu_map__max(nn->map).cpu); } nr++; @@ -514,13 +516,14 @@ int perf_env__numa_node(struct perf_env *env, int cpu) env->nr_numa_map = nr; for (i = 0; i < env->nr_numa_nodes; i++) { - int tmp, j; + struct perf_cpu tmp; + int j; nn = &env->numa_nodes[i]; - perf_cpu_map__for_each_cpu(j, tmp, nn->map) - env->numa_map[j] = i; + perf_cpu_map__for_each_cpu(tmp, j, nn->map) + env->numa_map[tmp.cpu] = i; } } - return cpu >= 0 && cpu < env->nr_numa_map ? env->numa_map[cpu] : -1; + return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; } diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 163e5ec503a2..a3541f98e1fc 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/rbtree.h> +#include "cpumap.h" #include "rwsem.h" struct perf_cpu_map; @@ -170,5 +171,5 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); -int perf_env__numa_node(struct perf_env *env, int cpu); +int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c index 7c554234b43d..57f02beef023 100644 --- a/tools/perf/util/evlist-hybrid.c +++ b/tools/perf/util/evlist-hybrid.c @@ -124,22 +124,23 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list) events_nr++; - if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 || - matched_cpus->nr < cpus->nr || - matched_cpus->nr < pmu->cpus->nr)) { + if (perf_cpu_map__nr(matched_cpus) > 0 && + (perf_cpu_map__nr(unmatched_cpus) > 0 || + perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) || + perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) { perf_cpu_map__put(evsel->core.cpus); perf_cpu_map__put(evsel->core.own_cpus); evsel->core.cpus = perf_cpu_map__get(matched_cpus); evsel->core.own_cpus = perf_cpu_map__get(matched_cpus); - if (unmatched_cpus->nr > 0) { + if (perf_cpu_map__nr(unmatched_cpus) > 0) { cpu_map__snprint(matched_cpus, buf1, sizeof(buf1)); pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n", buf1, pmu->name, evsel->name); } } - if (matched_cpus->nr == 0) { + if (perf_cpu_map__nr(matched_cpus) == 0) { evlist__remove(evlist, evsel); evsel__delete(evsel); @@ -153,8 +154,8 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list) perf_cpu_map__put(matched_cpus); perf_cpu_map__put(unmatched_cpus); } - - ret = (unmatched_count == events_nr) ? -1 : 0; + if (events_nr) + ret = (unmatched_count == events_nr) ? -1 : 0; out: perf_cpu_map__put(cpus); return ret; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5f92319ce258..41a66a48cbdf 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -342,36 +342,71 @@ static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) return perf_thread_map__nr(evlist->core.threads); } -void evlist__cpu_iter_start(struct evlist *evlist) -{ - struct evsel *pos; - - /* - * Reset the per evsel cpu_iter. This is needed because - * each evsel's cpumap may have a different index space, - * and some operations need the index to modify - * the FD xyarray (e.g. open, close) - */ - evlist__for_each_entry(evlist, pos) - pos->cpu_iter = 0; -} +struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity) +{ + struct evlist_cpu_iterator itr = { + .container = evlist, + .evsel = NULL, + .cpu_map_idx = 0, + .evlist_cpu_map_idx = 0, + .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus), + .cpu = (struct perf_cpu){ .cpu = -1}, + .affinity = affinity, + }; -bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu) -{ - if (ev->cpu_iter >= ev->core.cpus->nr) - return true; - if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu) - return true; - return false; + if (evlist__empty(evlist)) { + /* Ensure the empty list doesn't iterate. */ + itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr; + } else { + itr.evsel = evlist__first(evlist); + if (itr.affinity) { + itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0); + affinity__set(itr.affinity, itr.cpu.cpu); + itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu); + /* + * If this CPU isn't in the evsel's cpu map then advance + * through the list. + */ + if (itr.cpu_map_idx == -1) + evlist_cpu_iterator__next(&itr); + } + } + return itr; +} + +void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr) +{ + while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) { + evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel); + evlist_cpu_itr->cpu_map_idx = + perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus, + evlist_cpu_itr->cpu); + if (evlist_cpu_itr->cpu_map_idx != -1) + return; + } + evlist_cpu_itr->evlist_cpu_map_idx++; + if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) { + evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container); + evlist_cpu_itr->cpu = + perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus, + evlist_cpu_itr->evlist_cpu_map_idx); + if (evlist_cpu_itr->affinity) + affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu); + evlist_cpu_itr->cpu_map_idx = + perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus, + evlist_cpu_itr->cpu); + /* + * If this CPU isn't in the evsel's cpu map then advance through + * the list. + */ + if (evlist_cpu_itr->cpu_map_idx == -1) + evlist_cpu_iterator__next(evlist_cpu_itr); + } } -bool evsel__cpu_iter_skip(struct evsel *ev, int cpu) +bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr) { - if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) { - ev->cpu_iter++; - return false; - } - return true; + return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr; } static int evsel__strcmp(struct evsel *pos, char *evsel_name) @@ -400,37 +435,36 @@ static int evlist__is_enabled(struct evlist *evlist) static void __evlist__disable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; - struct affinity affinity; - int cpu, i, imm = 0; + struct evlist_cpu_iterator evlist_cpu_itr; + struct affinity saved_affinity, *affinity = NULL; bool has_imm = false; - if (affinity__setup(&affinity) < 0) - return; + // See explanation in evlist__close() + if (!cpu_map__is_dummy(evlist->core.cpus)) { + if (affinity__setup(&saved_affinity) < 0) + return; + affinity = &saved_affinity; + } /* Disable 'immediate' events last */ - for (imm = 0; imm <= 1; imm++) { - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); - - evlist__for_each_entry(evlist, pos) { - if (evsel__strcmp(pos, evsel_name)) - continue; - if (evsel__cpu_iter_skip(pos, cpu)) - continue; - if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) - continue; - if (pos->immediate) - has_imm = true; - if (pos->immediate != imm) - continue; - evsel__disable_cpu(pos, pos->cpu_iter - 1); - } + for (int imm = 0; imm <= 1; imm++) { + evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) { + pos = evlist_cpu_itr.evsel; + if (evsel__strcmp(pos, evsel_name)) + continue; + if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) + continue; + if (pos->immediate) + has_imm = true; + if (pos->immediate != imm) + continue; + evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx); } if (!has_imm) break; } - affinity__cleanup(&affinity); + affinity__cleanup(affinity); evlist__for_each_entry(evlist, pos) { if (evsel__strcmp(pos, evsel_name)) continue; @@ -462,26 +496,25 @@ void evlist__disable_evsel(struct evlist *evlist, char *evsel_name) static void __evlist__enable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; - struct affinity affinity; - int cpu, i; - - if (affinity__setup(&affinity) < 0) - return; + struct evlist_cpu_iterator evlist_cpu_itr; + struct affinity saved_affinity, *affinity = NULL; - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); + // See explanation in evlist__close() + if (!cpu_map__is_dummy(evlist->core.cpus)) { + if (affinity__setup(&saved_affinity) < 0) + return; + affinity = &saved_affinity; + } - evlist__for_each_entry(evlist, pos) { - if (evsel__strcmp(pos, evsel_name)) - continue; - if (evsel__cpu_iter_skip(pos, cpu)) - continue; - if (!evsel__is_group_leader(pos) || !pos->core.fd) - continue; - evsel__enable_cpu(pos, pos->cpu_iter - 1); - } + evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) { + pos = evlist_cpu_itr.evsel; + if (evsel__strcmp(pos, evsel_name)) + continue; + if (!evsel__is_group_leader(pos) || !pos->core.fd) + continue; + evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx); } - affinity__cleanup(&affinity); + affinity__cleanup(affinity); evlist__for_each_entry(evlist, pos) { if (evsel__strcmp(pos, evsel_name)) continue; @@ -800,7 +833,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) static int perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp, - int output, int cpu) + int output, struct perf_cpu cpu) { struct mmap *map = container_of(_map, struct mmap, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); @@ -1264,14 +1297,14 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel) void evlist__close(struct evlist *evlist) { struct evsel *evsel; + struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; - int cpu, i; /* * With perf record core.cpus is usually NULL. * Use the old method to handle this for now. */ - if (!evlist->core.cpus) { + if (!evlist->core.cpus || cpu_map__is_dummy(evlist->core.cpus)) { evlist__for_each_entry_reverse(evlist, evsel) evsel__close(evsel); return; @@ -1279,15 +1312,12 @@ void evlist__close(struct evlist *evlist) if (affinity__setup(&affinity) < 0) return; - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); - evlist__for_each_entry_reverse(evlist, evsel) { - if (evsel__cpu_iter_skip(evsel, cpu)) - continue; - perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1); - } + evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { + perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core, + evlist_cpu_itr.cpu_map_idx); } + affinity__cleanup(&affinity); evlist__for_each_entry_reverse(evlist, evsel) { perf_evsel__free_fd(&evsel->core); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 97bfb8d0be4f..64cba56fbc74 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -64,6 +64,7 @@ struct evlist { struct evsel *selected; struct events_stats stats; struct perf_env *env; + const char *hybrid_pmu_name; void (*trace_event_sample_raw)(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); @@ -110,6 +111,7 @@ int __evlist__add_default_attrs(struct evlist *evlist, __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) int arch_evlist__add_default_attrs(struct evlist *evlist); +struct evsel *arch_evlist__leader(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); @@ -325,17 +327,53 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel); #define evlist__for_each_entry_safe(evlist, tmp, evsel) \ __evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel) -#define evlist__for_each_cpu(evlist, index, cpu) \ - evlist__cpu_iter_start(evlist); \ - perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus) +/** Iterator state for evlist__for_each_cpu */ +struct evlist_cpu_iterator { + /** The list being iterated through. */ + struct evlist *container; + /** The current evsel of the iterator. */ + struct evsel *evsel; + /** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */ + int cpu_map_idx; + /** + * The CPU map index corresponding to evlist->core.all_cpus for the + * current CPU. Distinct from cpu_map_idx as the evsel's cpu map may + * contain fewer entries. + */ + int evlist_cpu_map_idx; + /** The number of CPU map entries in evlist->core.all_cpus. */ + int evlist_cpu_map_nr; + /** The current CPU of the iterator. */ + struct perf_cpu cpu; + /** If present, used to set the affinity when switching between CPUs. */ + struct affinity *affinity; +}; + +/** + * evlist__for_each_cpu - without affinity, iterate over the evlist. With + * affinity, iterate over all CPUs and then the evlist + * for each evsel on that CPU. When switching between + * CPUs the affinity is set to the CPU to avoid IPIs + * during syscalls. + * @evlist_cpu_itr: the iterator instance. + * @evlist: evlist instance to iterate. + * @affinity: NULL or used to set the affinity to the current CPU. + */ +#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \ + for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \ + !evlist_cpu_iterator__end(&evlist_cpu_itr); \ + evlist_cpu_iterator__next(&evlist_cpu_itr)) + +/** Returns an iterator set to the first CPU/evsel of evlist. */ +struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity); +/** Move to next element in iterator, updating CPU, evsel and the affinity. */ +void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr); +/** Returns true when iterator is at the end of the CPUs and evlist. */ +bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr); struct evsel *evlist__get_tracking_event(struct evlist *evlist); void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel); -void evlist__cpu_iter_start(struct evlist *evlist); -bool evsel__cpu_iter_skip(struct evsel *ev, int cpu); -bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu); - struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ac0127be0459..22d3267ce294 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1064,6 +1064,17 @@ void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_un { } +static void evsel__set_default_freq_period(struct record_opts *opts, + struct perf_event_attr *attr) +{ + if (opts->freq) { + attr->freq = 1; + attr->sample_freq = opts->freq; + } else { + attr->sample_period = opts->default_interval; + } +} + /* * The enable_on_exec/disabled value strategy: * @@ -1130,14 +1141,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, * We default some events to have a default interval. But keep * it a weak assumption overridable by the user. */ - if (!attr->sample_period) { - if (opts->freq) { - attr->freq = 1; - attr->sample_freq = opts->freq; - } else { - attr->sample_period = opts->default_interval; - } - } + if ((evsel->is_libpfm_event && !attr->sample_period) || + (!evsel->is_libpfm_event && (!attr->sample_period || + opts->user_freq != UINT_MAX || + opts->user_interval != ULLONG_MAX))) + evsel__set_default_freq_period(opts, attr); + /* * If attr->freq was set (here or earlier), ask for period * to be sampled. @@ -1372,9 +1381,9 @@ int evsel__append_addr_filter(struct evsel *evsel, const char *filter) } /* Caller has to clear disabled after going through all CPUs. */ -int evsel__enable_cpu(struct evsel *evsel, int cpu) +int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx) { - return perf_evsel__enable_cpu(&evsel->core, cpu); + return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx); } int evsel__enable(struct evsel *evsel) @@ -1387,9 +1396,9 @@ int evsel__enable(struct evsel *evsel) } /* Caller has to set disabled after going through all CPUs. */ -int evsel__disable_cpu(struct evsel *evsel, int cpu) +int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx) { - return perf_evsel__disable_cpu(&evsel->core, cpu); + return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx); } int evsel__disable(struct evsel *evsel) @@ -1455,7 +1464,7 @@ void evsel__delete(struct evsel *evsel) free(evsel); } -void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, +void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -1463,12 +1472,12 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, if (!evsel->prev_raw_counts) return; - if (cpu == -1) { + if (cpu_map_idx == -1) { tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; } else { - tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); - *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; + tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; } count->val = count->val - tmp.val; @@ -1476,46 +1485,28 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, count->run = count->run - tmp.run; } -void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, s8 *pscaled) +static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread) { - s8 scaled = 0; + struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread); - if (scale) { - if (count->run == 0) { - scaled = -1; - count->val = 0; - } else if (count->run < count->ena) { - scaled = 1; - count->val = (u64)((double) count->val * count->ena / count->run); - } - } - - if (pscaled) - *pscaled = scaled; -} - -static int evsel__read_one(struct evsel *evsel, int cpu, int thread) -{ - struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); - - return perf_evsel__read(&evsel->core, cpu, thread, count); + return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count); } -static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run) +static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, + u64 val, u64 ena, u64 run) { struct perf_counts_values *count; - count = perf_counts(counter->counts, cpu, thread); + count = perf_counts(counter->counts, cpu_map_idx, thread); count->val = val; count->ena = ena; count->run = run; - perf_counts__set_loaded(counter->counts, cpu, thread, true); + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); } -static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data) +static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data) { u64 read_format = leader->core.attr.read_format; struct sample_read_value *v; @@ -1534,7 +1525,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, v = (struct sample_read_value *) data; - evsel__set_count(leader, cpu, thread, v[0].value, ena, run); + evsel__set_count(leader, cpu_map_idx, thread, v[0].value, ena, run); for (i = 1; i < nr; i++) { struct evsel *counter; @@ -1543,13 +1534,13 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, if (!counter) return -EINVAL; - evsel__set_count(counter, cpu, thread, v[i].value, ena, run); + evsel__set_count(counter, cpu_map_idx, thread, v[i].value, ena, run); } return 0; } -static int evsel__read_group(struct evsel *leader, int cpu, int thread) +static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) { struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->core.attr.read_format; @@ -1570,67 +1561,67 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread) ps->group_data = data; } - if (FD(leader, cpu, thread) < 0) + if (FD(leader, cpu_map_idx, thread) < 0) return -EINVAL; - if (readn(FD(leader, cpu, thread), data, size) <= 0) + if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0) return -errno; - return evsel__process_group_data(leader, cpu, thread, data); + return evsel__process_group_data(leader, cpu_map_idx, thread, data); } -int evsel__read_counter(struct evsel *evsel, int cpu, int thread) +int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) { u64 read_format = evsel->core.attr.read_format; if (read_format & PERF_FORMAT_GROUP) - return evsel__read_group(evsel, cpu, thread); + return evsel__read_group(evsel, cpu_map_idx, thread); - return evsel__read_one(evsel, cpu, thread); + return evsel__read_one(evsel, cpu_map_idx, thread); } -int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale) +int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale) { struct perf_counts_values count; size_t nv = scale ? 3 : 1; - if (FD(evsel, cpu, thread) < 0) + if (FD(evsel, cpu_map_idx, thread) < 0) return -EINVAL; - if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) + if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0) return -ENOMEM; - if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) + if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0) return -errno; - evsel__compute_deltas(evsel, cpu, thread, &count); + evsel__compute_deltas(evsel, cpu_map_idx, thread, &count); perf_counts_values__scale(&count, scale, NULL); - *perf_counts(evsel->counts, cpu, thread) = count; + *perf_counts(evsel->counts, cpu_map_idx, thread) = count; return 0; } static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other, - int cpu) + int cpu_map_idx) { - int cpuid; + struct perf_cpu cpu; - cpuid = perf_cpu_map__cpu(evsel->core.cpus, cpu); - return perf_cpu_map__idx(other->core.cpus, cpuid); + cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); + return perf_cpu_map__idx(other->core.cpus, cpu); } -static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu) +static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx) { struct evsel *leader = evsel__leader(evsel); if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) || (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) { - return evsel__match_other_cpu(evsel, leader, cpu); + return evsel__match_other_cpu(evsel, leader, cpu_map_idx); } - return cpu; + return cpu_map_idx; } -static int get_group_fd(struct evsel *evsel, int cpu, int thread) +static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) { struct evsel *leader = evsel__leader(evsel); int fd; @@ -1644,11 +1635,11 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) */ BUG_ON(!leader->core.fd); - cpu = evsel__hybrid_group_cpu(evsel, cpu); - if (cpu == -1) + cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx); + if (cpu_map_idx == -1) return -1; - fd = FD(leader, cpu, thread); + fd = FD(leader, cpu_map_idx, thread); BUG_ON(fd == -1); return fd; @@ -1662,16 +1653,16 @@ static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int } static int update_fds(struct evsel *evsel, - int nr_cpus, int cpu_idx, + int nr_cpus, int cpu_map_idx, int nr_threads, int thread_idx) { struct evsel *pos; - if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) + if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads) return -EINVAL; evlist__for_each_entry(evsel->evlist, pos) { - nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx; evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); @@ -1685,10 +1676,10 @@ static int update_fds(struct evsel *evsel, return 0; } -bool evsel__ignore_missing_thread(struct evsel *evsel, - int nr_cpus, int cpu, - struct perf_thread_map *threads, - int thread, int err) +static bool evsel__ignore_missing_thread(struct evsel *evsel, + int nr_cpus, int cpu_map_idx, + struct perf_thread_map *threads, + int thread, int err) { pid_t ignore_pid = perf_thread_map__pid(threads, thread); @@ -1711,7 +1702,7 @@ bool evsel__ignore_missing_thread(struct evsel *evsel, * We should remove fd for missing_thread first * because thread_map__remove() will decrease threads->nr. */ - if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) + if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread)) return false; if (thread_map__remove(threads, thread)) @@ -1800,7 +1791,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, nthreads = threads->nr; if (evsel->core.fd == NULL && - perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0) + perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0) return -ENOMEM; evsel->open_flags = PERF_FLAG_FD_CLOEXEC; @@ -1993,9 +1984,9 @@ bool evsel__increase_rlimit(enum rlimit_action *set_rlimit) static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, - int start_cpu, int end_cpu) + int start_cpu_map_idx, int end_cpu_map_idx) { - int cpu, thread, nthreads; + int idx, thread, nthreads; int pid = -1, err, old_errno; enum rlimit_action set_rlimit = NO_CHANGE; @@ -2022,7 +2013,7 @@ fallback_missing_features: display_attr(&evsel->core.attr); - for (cpu = start_cpu; cpu < end_cpu; cpu++) { + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; @@ -2033,17 +2024,18 @@ retry_open: if (!evsel->cgrp && !evsel->core.system_wide) pid = perf_thread_map__pid(threads, thread); - group_fd = get_group_fd(evsel, cpu, thread); + group_fd = get_group_fd(evsel, idx, thread); test_attr__ready(); pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, cpus->map[cpu], group_fd, evsel->open_flags); + pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); - fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu], + fd = sys_perf_event_open(&evsel->core.attr, pid, + perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); - FD(evsel, cpu, thread) = fd; + FD(evsel, idx, thread) = fd; if (fd < 0) { err = -errno; @@ -2053,10 +2045,11 @@ retry_open: goto try_fallback; } - bpf_counter__install_pe(evsel, cpu, fd); + bpf_counter__install_pe(evsel, idx, fd); if (unlikely(test_attr__enabled)) { - test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], + test_attr__open(&evsel->core.attr, pid, + perf_cpu_map__cpu(cpus, idx), fd, group_fd, evsel->open_flags); } @@ -2097,7 +2090,8 @@ try_fallback: if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { + if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus), + idx, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ nthreads--; @@ -2112,7 +2106,7 @@ try_fallback: if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit)) goto retry_open; - if (err != -EINVAL || cpu > 0 || thread > 0) + if (err != -EINVAL || idx > 0 || thread > 0) goto out_close; if (evsel__detect_missing_features(evsel)) @@ -2124,12 +2118,12 @@ out_close: old_errno = errno; do { while (--thread >= 0) { - if (FD(evsel, cpu, thread) >= 0) - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; } thread = nthreads; - } while (--cpu >= 0); + } while (--idx >= 0); errno = old_errno; return err; } @@ -2137,7 +2131,7 @@ out_close: int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1); + return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus)); } void evsel__close(struct evsel *evsel) @@ -2146,13 +2140,12 @@ void evsel__close(struct evsel *evsel) perf_evsel__free_id(&evsel->core); } -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu) +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) { - if (cpu == -1) - return evsel__open_cpu(evsel, cpus, NULL, 0, - cpus ? cpus->nr : 1); + if (cpu_map_idx == -1) + return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus)); - return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1); + return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1); } int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) @@ -2706,6 +2699,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } return sample->raw_data + offset; @@ -2950,6 +2945,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "wrong clockid (%d).", clockid); if (perf_missing_features.aux_output) return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel."); + if (!target__has_cpu(target)) + return scnprintf(msg, size, + "Invalid event (%s) in per-thread mode, enable system wide with '-a'.", + evsel__name(evsel)); break; case ENODATA: return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. " @@ -2973,15 +2972,15 @@ struct perf_env *evsel__env(struct evsel *evsel) static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) { - int cpu, thread; + int cpu_map_idx, thread; - for (cpu = 0; cpu < xyarray__max_x(evsel->core.fd); cpu++) { + for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) { for (thread = 0; thread < xyarray__max_y(evsel->core.fd); thread++) { - int fd = FD(evsel, cpu, thread); + int fd = FD(evsel, cpu_map_idx, thread); if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, - cpu, thread, fd) < 0) + cpu_map_idx, thread, fd) < 0) return -1; } } @@ -2994,7 +2993,7 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist) struct perf_cpu_map *cpus = evsel->core.cpus; struct perf_thread_map *threads = evsel->core.threads; - if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr)) + if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr)) return -ENOMEM; return store_evsel_ids(evsel, evlist); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 29d49a8c1e92..041b42d33bf5 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -11,6 +11,7 @@ #include <perf/evsel.h> #include "symbol_conf.h" #include <internal/cpumap.h> +#include <perf/cpumap.h> struct bpf_object; struct cgroup; @@ -121,7 +122,6 @@ struct evsel { bool errored; struct hashmap *per_pkg_mask; int err; - int cpu_iter; struct { evsel__sb_cb_t *cb; void *data; @@ -192,12 +192,9 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel) static inline int evsel__nr_cpus(struct evsel *evsel) { - return evsel__cpus(evsel)->nr; + return perf_cpu_map__nr(evsel__cpus(evsel)); } -void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, s8 *pscaled); - void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, struct perf_counts_values *count); @@ -288,12 +285,12 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); int evsel__append_addr_filter(struct evsel *evsel, const char *filter); -int evsel__enable_cpu(struct evsel *evsel, int cpu); +int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx); int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); -int evsel__disable_cpu(struct evsel *evsel, int cpu); +int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx); -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu); +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx); int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); @@ -305,10 +302,6 @@ bool evsel__detect_missing_features(struct evsel *evsel); enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX }; bool evsel__increase_rlimit(enum rlimit_action *set_rlimit); -bool evsel__ignore_missing_thread(struct evsel *evsel, - int nr_cpus, int cpu, - struct perf_thread_map *threads, - int thread, int err); bool evsel__precise_ip_fallback(struct evsel *evsel); struct perf_sample; @@ -337,32 +330,32 @@ static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) (e1->core.attr.config == e2->core.attr.config); } -int evsel__read_counter(struct evsel *evsel, int cpu, int thread); +int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread); -int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale); +int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale); /** * evsel__read_on_cpu - Read out the results on a CPU and thread * * @evsel - event selector to read value - * @cpu - CPU of interest + * @cpu_map_idx - CPU of interest * @thread - thread of interest */ -static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread) +static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread) { - return __evsel__read_on_cpu(evsel, cpu, thread, false); + return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false); } /** * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled * * @evsel - event selector to read value - * @cpu - CPU of interest + * @cpu_map_idx - CPU of interest * @thread - thread of interest */ -static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread) +static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread) { - return __evsel__read_on_cpu(evsel, cpu, thread, true); + return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } int evsel__parse_sample(struct evsel *evsel, union perf_event *event, diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 666b59baeb70..675f318ce7c1 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -405,12 +405,17 @@ double expr_id_data__source_count(const struct expr_id_data *data) double expr__get_literal(const char *literal) { static struct cpu_topology *topology; + double result = NAN; - if (!strcmp("#smt_on", literal)) - return smt_on() > 0 ? 1.0 : 0.0; + if (!strcasecmp("#smt_on", literal)) { + result = smt_on() > 0 ? 1.0 : 0.0; + goto out; + } - if (!strcmp("#num_cpus", literal)) - return cpu__max_present_cpu(); + if (!strcmp("#num_cpus", literal)) { + result = cpu__max_present_cpu().cpu; + goto out; + } /* * Assume that topology strings are consistent, such as CPUs "0-1" @@ -422,16 +427,24 @@ double expr__get_literal(const char *literal) topology = cpu_topology__new(); if (!topology) { pr_err("Error creating CPU topology"); - return NAN; + goto out; } } - if (!strcmp("#num_packages", literal)) - return topology->package_cpus_lists; - if (!strcmp("#num_dies", literal)) - return topology->die_cpus_lists; - if (!strcmp("#num_cores", literal)) - return topology->core_cpus_lists; + if (!strcmp("#num_packages", literal)) { + result = topology->package_cpus_lists; + goto out; + } + if (!strcmp("#num_dies", literal)) { + result = topology->die_cpus_lists; + goto out; + } + if (!strcmp("#num_cores", literal)) { + result = topology->core_cpus_lists; + goto out; + } pr_err("Unrecognized literal '%s'", literal); - return NAN; +out: + pr_debug2("literal: %s = %f\n", literal, result); + return result; } diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h new file mode 100644 index 000000000000..887f68a185f7 --- /dev/null +++ b/tools/perf/util/ftrace.h @@ -0,0 +1,81 @@ +#ifndef __PERF_FTRACE_H__ +#define __PERF_FTRACE_H__ + +#include <linux/list.h> + +#include "target.h" + +struct evlist; + +struct perf_ftrace { + struct evlist *evlist; + struct target target; + const char *tracer; + struct list_head filters; + struct list_head notrace; + struct list_head graph_funcs; + struct list_head nograph_funcs; + unsigned long percpu_buffer_size; + bool inherit; + int graph_depth; + int func_stack_trace; + int func_irq_info; + int graph_nosleep_time; + int graph_noirqs; + int graph_verbose; + int graph_thresh; + unsigned int initial_delay; +}; + +struct filter_entry { + struct list_head list; + char name[]; +}; + +#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */ + +#ifdef HAVE_BPF_SKEL + +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace); +int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace); +int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace); +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, + int buckets[]); +int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace); + +#else /* !HAVE_BPF_SKEL */ + +static inline int +perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, + int buckets[] __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* __PERF_FTRACE_H__ */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e3c1a532d059..6da12e522edc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -472,7 +472,7 @@ static int write_nrcpus(struct feat_fd *ff, u32 nrc, nra; int ret; - nrc = cpu__max_present_cpu(); + nrc = cpu__max_present_cpu().cpu; nr = sysconf(_SC_NPROCESSORS_ONLN); if (nr < 0) @@ -1163,7 +1163,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp) u32 nr, cpu; u16 level; - nr = cpu__max_cpu(); + nr = cpu__max_cpu().cpu; for (cpu = 0; cpu < nr; cpu++) { for (level = 0; level < MAX_CACHE_LVL; level++) { @@ -1195,7 +1195,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp) static int write_cache(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - u32 max_caches = cpu__max_cpu() * MAX_CACHE_LVL; + u32 max_caches = cpu__max_cpu().cpu * MAX_CACHE_LVL; struct cpu_cache_level caches[max_caches]; u32 cnt = 0, i, version = 1; int ret; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b776465e04ef..0a8033b09e28 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); - hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); + hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); + hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); + if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 621f35ae1efa..2a15e22fb89c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,7 +75,8 @@ enum hist_column { HISTC_MEM_BLOCKED, HISTC_LOCAL_INS_LAT, HISTC_GLOBAL_INS_LAT, - HISTC_P_STAGE_CYC, + HISTC_LOCAL_P_STAGE_CYC, + HISTC_GLOBAL_P_STAGE_CYC, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 5acf053fca7d..aa0c5179836d 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -50,41 +50,32 @@ #ifndef SYM_END #define SYM_END(name, sym_type) \ .type name sym_type ASM_NL \ + .set .L__sym_size_##name, .-name ASM_NL \ .size name, .-name #endif -/* - * SYM_FUNC_START_ALIAS -- use where there are two global names for one - * function - */ -#ifndef SYM_FUNC_START_ALIAS -#define SYM_FUNC_START_ALIAS(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +/* SYM_ALIAS -- use only if you have to */ +#ifndef SYM_ALIAS +#define SYM_ALIAS(alias, name, sym_type, linkage) \ + linkage(alias) ASM_NL \ + .set alias, name ASM_NL \ + .type alias sym_type ASM_NL \ + .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \ + .size alias, .L__sym_size_##alias #endif /* SYM_FUNC_START -- use for global functions */ #ifndef SYM_FUNC_START -/* - * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two - * later. - */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif /* SYM_FUNC_START_LOCAL -- use for local functions */ #ifndef SYM_FUNC_START_LOCAL -/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif -/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */ -#ifndef SYM_FUNC_END_ALIAS -#define SYM_FUNC_END_ALIAS(name) \ - SYM_END(name, SYM_T_FUNC) -#endif - /* SYM_FUNC_START_WEAK -- use for weak functions */ #ifndef SYM_FUNC_START_WEAK #define SYM_FUNC_START_WEAK(name) \ @@ -96,9 +87,32 @@ * SYM_FUNC_START_WEAK, ... */ #ifndef SYM_FUNC_END -/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_END(name) \ SYM_END(name, SYM_T_FUNC) #endif +/* + * SYM_FUNC_ALIAS -- define a global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS +#define SYM_FUNC_ALIAS(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + +/* + * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_LOCAL +#define SYM_FUNC_ALIAS_LOCAL(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL) +#endif + +/* + * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_WEAK +#define SYM_FUNC_ALIAS_WEAK(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index c397be0c2e32..15f60fd09424 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -23,7 +23,9 @@ #include "unwind.h" #include "libunwind-aarch64.h" +#define perf_event_arm_regs perf_event_arm64_regs #include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> +#undef perf_event_arm_regs #include "../../arch/arm64/util/unwind-libunwind.c" /* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fb8496df8432..394550003693 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -16,6 +16,7 @@ #include "map_symbol.h" #include "branch.h" #include "mem-events.h" +#include "path.h" #include "srcline.h" #include "symbol.h" #include "sort.h" @@ -34,6 +35,7 @@ #include "bpf-event.h" #include <internal/lib.h> // page_size #include "cgroup.h" +#include "arm64-frame-pointer-unwind-support.h" #include <linux/ctype.h> #include <symbol/kallsyms.h> @@ -1415,7 +1417,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i struct stat st; /*sshfs might return bad dent->d_type, so we have to stat*/ - snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name); + path__join(path, sizeof(path), dir_name, dent->d_name); if (stat(path, &st)) continue; @@ -2071,6 +2073,7 @@ static void ip__resolve_ams(struct thread *thread, ams->addr = ip; ams->al_addr = al.addr; + ams->al_level = al.level; ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; @@ -2090,6 +2093,7 @@ static void ip__resolve_data(struct thread *thread, ams->addr = addr; ams->al_addr = al.addr; + ams->al_level = al.level; ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; @@ -2710,6 +2714,15 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, return err; } +static u64 get_leaf_frame_caller(struct perf_sample *sample, + struct thread *thread, int usr_idx) +{ + if (machine__normalized_is(thread->maps->machine, "arm64")) + return get_leaf_frame_caller_aarch64(sample, thread, usr_idx); + else + return 0; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct callchain_cursor *cursor, struct evsel *evsel, @@ -2723,9 +2736,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain = sample->callchain; int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err, nr_entries; + int i, j, err, nr_entries, usr_idx; int skip_idx = -1; int first_call = 0; + u64 leaf_frame_caller; if (chain) chain_nr = chain->nr; @@ -2850,6 +2864,34 @@ check_calls: continue; } + /* + * PERF_CONTEXT_USER allows us to locate where the user stack ends. + * Depending on callchain_param.order and the position of PERF_CONTEXT_USER, + * the index will be different in order to add the missing frame + * at the right place. + */ + + usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1; + + if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) { + + leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx); + + /* + * check if leaf_frame_Caller != ip to not add the same + * value twice. + */ + + if (leaf_frame_caller && leaf_frame_caller != ip) { + + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, leaf_frame_caller, + false, NULL, NULL, 0); + if (err) + return (err < 0) ? err : 0; + } + } + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, false, NULL, NULL, 0); @@ -3079,14 +3121,19 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, } /* - * Compares the raw arch string. N.B. see instead perf_env__arch() if a - * normalized arch is needed. + * Compares the raw arch string. N.B. see instead perf_env__arch() or + * machine__normalized_is() if a normalized arch is needed. */ bool machine__is(struct machine *machine, const char *arch) { return machine && !strcmp(perf_env__raw_arch(machine->env), arch); } +bool machine__normalized_is(struct machine *machine, const char *arch) +{ + return machine && !strcmp(perf_env__arch(machine->env), arch); +} + int machine__nr_cpus_avail(struct machine *machine) { return machine ? perf_env__nr_cpus_avail(machine->env) : 0; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index a143087eeb47..c5a45dc8df4c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -208,6 +208,7 @@ static inline bool machine__is_host(struct machine *machine) } bool machine__is(struct machine *machine, const char *arch); +bool machine__normalized_is(struct machine *machine, const char *arch); int machine__nr_cpus_avail(struct machine *machine); struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h index 7d22ade082c8..e08817b0c30f 100644 --- a/tools/perf/util/map_symbol.h +++ b/tools/perf/util/map_symbol.h @@ -18,6 +18,7 @@ struct addr_map_symbol { struct map_symbol ms; u64 addr; u64 al_addr; + char al_level; u64 phys_addr; u64 data_page_size; }; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 3167b4628b6d..ed0ab838bcc5 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -309,6 +309,9 @@ static const char * const mem_hops[] = { * to be set with mem_hops field. */ "core, same node", + "node, same socket", + "socket, same board", + "board", }; int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) @@ -316,7 +319,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) size_t i, l = 0; u64 m = PERF_MEM_LVL_NA; u64 hit, miss; - int printed; + int printed = 0; if (mem_info) m = mem_info->data_src.mem_lvl; @@ -335,18 +338,22 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) l += 7; } - if (mem_info && mem_info->data_src.mem_hops) + /* + * Incase mem_hops field is set, we can skip printing data source via + * PERF_MEM_LVL namespace. + */ + if (mem_info && mem_info->data_src.mem_hops) { l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]); - - printed = 0; - for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (printed++) { - strcat(out, " or "); - l += 4; + } else { + for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (printed++) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, mem_lvl[i]); } - l += scnprintf(out + l, sz - l, mem_lvl[i]); } if (mem_info && mem_info->data_src.mem_lvl_num) { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index fffe02aae3ed..d8492e339521 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -209,8 +209,8 @@ static struct metric *metric__new(const struct pmu_event *pe, m->metric_name = pe->metric_name; m->modifier = modifier ? strdup(modifier) : NULL; if (modifier && !m->modifier) { - free(m); expr__ctx_free(m->pctx); + free(m); return NULL; } m->metric_expr = pe->metric_expr; @@ -314,7 +314,7 @@ static int setup_metric_events(struct hashmap *ids, */ metric_id = evsel__metric_id(ev); evlist__for_each_entry_continue(metric_evlist, ev) { - if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) + if (!strcmp(evsel__metric_id(ev), metric_id)) ev->metric_leader = metric_events[i]; } } @@ -1115,13 +1115,27 @@ out: return ret; } +/** + * metric_list_cmp - list_sort comparator that sorts metrics with more events to + * the front. duration_time is excluded from the count. + */ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, const struct list_head *r) { const struct metric *left = container_of(l, struct metric, nd); const struct metric *right = container_of(r, struct metric, nd); + struct expr_id_data *data; + int left_count, right_count; + + left_count = hashmap__size(left->pctx->ids); + if (!expr__get_id(left->pctx, "duration_time", &data)) + left_count--; + + right_count = hashmap__size(right->pctx->ids); + if (!expr__get_id(right->pctx, "duration_time", &data)) + right_count--; - return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids); + return right_count - left_count; } /** @@ -1299,14 +1313,16 @@ err_out: /** * parse_ids - Build the event string for the ids and parse them creating an * evlist. The encoded metric_ids are decoded. + * @metric_no_merge: is metric sharing explicitly disabled. * @fake_pmu: used when testing metrics not supported by the current CPU. * @ids: the event identifiers parsed from a metric. * @modifier: any modifiers added to the events. * @has_constraint: false if events should be placed in a weak group. * @out_evlist: the created list of events. */ -static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, - const char *modifier, bool has_constraint, struct evlist **out_evlist) +static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, + struct expr_parse_ctx *ids, const char *modifier, + bool has_constraint, struct evlist **out_evlist) { struct parse_events_error parse_error; struct evlist *parsed_evlist; @@ -1314,12 +1330,19 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, int ret; *out_evlist = NULL; - if (hashmap__size(ids->ids) == 0) { + if (!metric_no_merge || hashmap__size(ids->ids) == 0) { char *tmp; /* - * No ids/events in the expression parsing context. Events may - * have been removed because of constant evaluation, e.g.: - * event1 if #smt_on else 0 + * We may fail to share events between metrics because + * duration_time isn't present in one metric. For example, a + * ratio of cache misses doesn't need duration_time but the same + * events may be used for a misses per second. Events without + * sharing implies multiplexing, that is best avoided, so place + * duration_time in every group. + * + * Also, there may be no ids/events in the expression parsing + * context because of constant evaluation, e.g.: + * event1 if #smt_on else 0 * Add a duration_time event to avoid a parse error on an empty * string. */ @@ -1387,7 +1410,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, ret = build_combined_expr_ctx(&metric_list, &combined); if (!ret && combined && hashmap__size(combined->ids)) { - ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL, + ret = parse_ids(metric_no_merge, fake_pmu, combined, + /*modifier=*/NULL, /*has_constraint=*/true, &combined_evlist); } @@ -1435,7 +1459,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, } } if (!metric_evlist) { - ret = parse_ids(fake_pmu, m->pctx, m->modifier, + ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier, m->has_constraint, &m->evlist); if (ret) goto out; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 23ecdba9e670..0e8ff8d1e206 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -94,7 +94,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx) } } -static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) +static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, int affinity) { void *data; size_t mmap_len; @@ -138,7 +138,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx) } static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused, - int cpu __maybe_unused, int affinity __maybe_unused) + struct perf_cpu cpu __maybe_unused, int affinity __maybe_unused) { return 0; } @@ -240,7 +240,8 @@ void mmap__munmap(struct mmap *map) static void build_node_mask(int node, struct mmap_cpu_mask *mask) { - int c, cpu, nr_cpus; + int idx, nr_cpus; + struct perf_cpu cpu; const struct perf_cpu_map *cpu_map = NULL; cpu_map = cpu_map__online(); @@ -248,16 +249,16 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) return; nr_cpus = perf_cpu_map__nr(cpu_map); - for (c = 0; c < nr_cpus; c++) { - cpu = cpu_map->map[c]; /* map c index to online cpu index */ + for (idx = 0; idx < nr_cpus; idx++) { + cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) - set_bit(cpu, mask->bits); + set_bit(cpu.cpu, mask->bits); } } static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) { - map->affinity_mask.nbits = cpu__max_cpu(); + map->affinity_mask.nbits = cpu__max_cpu().cpu; map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits); if (!map->affinity_mask.bits) return -1; @@ -265,12 +266,12 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params * if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - set_bit(map->core.cpu, map->affinity_mask.bits); + set_bit(map->core.cpu.cpu, map->affinity_mask.bits); return 0; } -int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu) { if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 8e259b9610f8..83f6bd4d4082 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/ring_buffer.h> #include <linux/bitops.h> +#include <perf/cpumap.h> #include <stdbool.h> #include <pthread.h> // for cpu_set_t #ifdef HAVE_AIO_SUPPORT @@ -52,7 +53,7 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu); void mmap__munmap(struct mmap *map); union perf_event *perf_mmap__read_forward(struct mmap *map); diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 608b20c72a5c..48aa3217300b 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -60,17 +60,49 @@ void namespaces__free(struct namespaces *namespaces) free(namespaces); } +static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path) +{ + FILE *f = NULL; + char *statln = NULL; + size_t linesz = 0; + char *nspid; + + f = fopen(path, "r"); + if (f == NULL) + return -1; + + while (getline(&statln, &linesz, f) != -1) { + /* Use tgid if CONFIG_PID_NS is not defined. */ + if (strstr(statln, "Tgid:") != NULL) { + nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'), + NULL, 10); + nsi->nstgid = nsi->tgid; + } + + if (strstr(statln, "NStgid:") != NULL) { + nspid = strrchr(statln, '\t'); + nsi->nstgid = (pid_t)strtol(nspid, NULL, 10); + /* + * If innermost tgid is not the first, process is in a different + * PID namespace. + */ + nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid; + break; + } + } + + fclose(f); + free(statln); + return 0; +} + int nsinfo__init(struct nsinfo *nsi) { char oldns[PATH_MAX]; char spath[PATH_MAX]; char *newns = NULL; - char *statln = NULL; - char *nspid; struct stat old_stat; struct stat new_stat; - FILE *f = NULL; - size_t linesz = 0; int rv = -1; if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) @@ -100,34 +132,9 @@ int nsinfo__init(struct nsinfo *nsi) if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX) goto out; - f = fopen(spath, "r"); - if (f == NULL) - goto out; - - while (getline(&statln, &linesz, f) != -1) { - /* Use tgid if CONFIG_PID_NS is not defined. */ - if (strstr(statln, "Tgid:") != NULL) { - nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'), - NULL, 10); - nsi->nstgid = nsi->tgid; - } - - if (strstr(statln, "NStgid:") != NULL) { - nspid = strrchr(statln, '\t'); - nsi->nstgid = (pid_t)strtol(nspid, NULL, 10); - /* If innermost tgid is not the first, process is in a different - * PID namespace. - */ - nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid; - break; - } - } - rv = 0; + rv = nsinfo__get_nspid(nsi, spath); out: - if (f != NULL) - (void) fclose(f); - free(statln); free(newns); return rv; } @@ -299,3 +306,12 @@ int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi) return ret; } + +bool nsinfo__is_in_root_namespace(void) +{ + struct nsinfo nsi; + + memset(&nsi, 0x0, sizeof(nsi)); + nsinfo__get_nspid(&nsi, "/proc/self/status"); + return !nsi.in_pidns; +} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index ad9775db7b9c..9ceea9643507 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -59,6 +59,8 @@ void nsinfo__mountns_exit(struct nscookie *nc); char *nsinfo__realpath(const char *path, struct nsinfo *nsi); int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi); +bool nsinfo__is_in_root_namespace(void); + static inline void __nsinfo__zput(struct nsinfo **nsip) { if (nsip) { diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 9fc86971027b..284f8eabd3b9 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -63,10 +63,13 @@ static int create_event_hybrid(__u32 config_type, int *idx, static int pmu_cmp(struct parse_events_state *parse_state, struct perf_pmu *pmu) { - if (!parse_state->hybrid_pmu_name) - return 0; + if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name) + return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name); + + if (parse_state->hybrid_pmu_name) + return strcmp(parse_state->hybrid_pmu_name, pmu->name); - return strcmp(parse_state->hybrid_pmu_name, pmu->name); + return 0; } static int add_hw_hybrid(struct parse_events_state *parse_state, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ba74fdf74af9..24997925ae00 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1648,6 +1648,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, { struct parse_events_term *term; struct list_head *list = NULL; + struct list_head *orig_head = NULL; struct perf_pmu *pmu = NULL; int ok = 0; char *config; @@ -1674,7 +1675,6 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, } list_add_tail(&term->list, head); - /* Add it for all PMUs that support the alias */ list = malloc(sizeof(struct list_head)); if (!list) @@ -1687,16 +1687,27 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, str)) { + parse_events_copy_term_list(head, &orig_head); if (!parse_events_add_pmu(parse_state, list, - pmu->name, head, + pmu->name, orig_head, true, true)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; } + parse_events_terms__delete(orig_head); } } } + + if (parse_state->fake_pmu) { + if (!parse_events_add_pmu(parse_state, list, str, head, + true, true)) { + pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str); + ok++; + } + } + out_err: if (ok) *listp = list; @@ -1824,6 +1835,11 @@ out: return ret; } +__weak struct evsel *arch_evlist__leader(struct list_head *list) +{ + return list_first_entry(list, struct evsel, core.node); +} + void parse_events__set_leader(char *name, struct list_head *list, struct parse_events_state *parse_state) { @@ -1837,9 +1853,10 @@ void parse_events__set_leader(char *name, struct list_head *list, if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) return; - __perf_evlist__set_leader(list); - leader = list_entry(list->next, struct evsel, core.node); + leader = arch_evlist__leader(list); + __perf_evlist__set_leader(list, &leader->core); leader->group_name = name ? strdup(name) : NULL; + list_move(&leader->core.node, list); } /* list_event is assumed to point to malloc'ed memory */ @@ -2092,8 +2109,17 @@ static void perf_pmu__parse_init(void) pmu = NULL; while ((pmu = perf_pmu__scan(pmu)) != NULL) { list_for_each_entry(alias, &pmu->aliases, list) { - if (strchr(alias->name, '-')) + char *tmp = strchr(alias->name, '-'); + + if (tmp) { + char *tmp2 = NULL; + + tmp2 = strchr(tmp + 1, '-'); len++; + if (tmp2) + len++; + } + len++; } } @@ -2113,8 +2139,20 @@ static void perf_pmu__parse_init(void) list_for_each_entry(alias, &pmu->aliases, list) { struct perf_pmu_event_symbol *p = perf_pmu_events_list + len; char *tmp = strchr(alias->name, '-'); + char *tmp2 = NULL; - if (tmp != NULL) { + if (tmp) + tmp2 = strchr(tmp + 1, '-'); + if (tmp2) { + SET_SYMBOL(strndup(alias->name, tmp - alias->name), + PMU_EVENT_SYMBOL_PREFIX); + p++; + tmp++; + SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX); + p++; + SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2); + len += 3; + } else if (tmp) { SET_SYMBOL(strndup(alias->name, tmp - alias->name), PMU_EVENT_SYMBOL_PREFIX); p++; @@ -2141,23 +2179,38 @@ err: */ int perf_pmu__test_parse_init(void) { - struct perf_pmu_event_symbol *list; + struct perf_pmu_event_symbol *list, *tmp, symbols[] = { + {(char *)"read", PMU_EVENT_SYMBOL}, + {(char *)"event", PMU_EVENT_SYMBOL_PREFIX}, + {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX}, + {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX}, + {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2}, + }; + unsigned long i, j; - list = malloc(sizeof(*list) * 1); + tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols)); if (!list) return -ENOMEM; - list->type = PMU_EVENT_SYMBOL; - list->symbol = strdup("read"); - - if (!list->symbol) { - free(list); - return -ENOMEM; + for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) { + tmp->type = symbols[i].type; + tmp->symbol = strdup(symbols[i].symbol); + if (!tmp->symbol) + goto err_free; } perf_pmu_events_list = list; - perf_pmu_events_list_num = 1; + perf_pmu_events_list_num = ARRAY_SIZE(symbols); + + qsort(perf_pmu_events_list, ARRAY_SIZE(symbols), + sizeof(struct perf_pmu_event_symbol), comp_pmu); return 0; + +err_free: + for (j = 0, tmp = list; j < i; j++, tmp++) + free(tmp->symbol); + free(list); + return -ENOMEM; } enum perf_pmu_event_symbol_type diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index c7fc93f54577..a38b8b160e80 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -53,6 +53,7 @@ enum perf_pmu_event_symbol_type { PMU_EVENT_SYMBOL, /* normal style PMU event */ PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */ PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */ + PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */ }; struct perf_pmu_event_symbol { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 4efe9872c667..5b6e4b5249cf 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -149,6 +149,8 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat return PE_PMU_EVENT_PRE; case PMU_EVENT_SYMBOL_SUFFIX: return PE_PMU_EVENT_SUF; + case PMU_EVENT_SYMBOL_SUFFIX2: + return PE_PMU_EVENT_SUF2; case PMU_EVENT_SYMBOL: return parse_state->fake_pmu ? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 174158982fae..be8c51770051 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list, %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR -%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM %type <num> PE_VALUE @@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list, %type <str> PE_MODIFIER_EVENT %type <str> PE_MODIFIER_BP %type <str> PE_EVENT_NAME -%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type <str> PE_DRV_CFG_TERM %type <str> event_pmu_name %destructor { free ($$); } <str> @@ -372,6 +372,19 @@ PE_KERNEL_PMU_EVENT opt_pmu_config $$ = list; } | +PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc +{ + struct list_head *list; + char pmu_name[128]; + snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5); + free($1); + free($3); + free($5); + if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) + YYABORT; + $$ = list; +} +| PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc { struct list_head *list; diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 020411682a3c..c28dd50bd571 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -11,7 +11,7 @@ typedef void (*setup_probe_fn_t)(struct evsel *evsel); -static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const char *str) { struct evlist *evlist; struct evsel *evsel; @@ -29,7 +29,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) evsel = evlist__first(evlist); while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags); if (fd < 0) { if (pid == -1 && errno == EACCES) { pid = 0; @@ -43,7 +43,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) fn(evsel); - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags); if (fd < 0) { if (errno == EINVAL) err = -EINVAL; @@ -61,12 +61,13 @@ static bool perf_probe_api(setup_probe_fn_t fn) { const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; struct perf_cpu_map *cpus; - int cpu, ret, i = 0; + struct perf_cpu cpu; + int ret, i = 0; cpus = perf_cpu_map__new(NULL); if (!cpus) return false; - cpu = cpus->map[0]; + cpu = perf_cpu_map__cpu(cpus, 0); perf_cpu_map__put(cpus); do { @@ -136,15 +137,17 @@ bool perf_can_record_cpu_wide(void) .exclude_kernel = 1, }; struct perf_cpu_map *cpus; - int cpu, fd; + struct perf_cpu cpu; + int fd; cpus = perf_cpu_map__new(NULL); if (!cpus) return false; - cpu = cpus->map[0]; + + cpu = perf_cpu_map__cpu(cpus, 0); perf_cpu_map__put(cpus); - fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0); if (fd < 0) return false; close(fd); diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 47b7531f51da..98af3fa4ea35 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), - bit_name(HW_INDEX), + bit_name(TYPE_SAVE), bit_name(HW_INDEX), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 06a7461ba864..a982e40ee5a9 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> +#include <string.h> #include "perf_regs.h" #include "event.h" @@ -20,6 +21,671 @@ uint64_t __weak arch__user_reg_mask(void) } #ifdef HAVE_PERF_REGS_SUPPORT + +#define perf_event_arm_regs perf_event_arm64_regs +#include "../../arch/arm64/include/uapi/asm/perf_regs.h" +#undef perf_event_arm_regs + +#include "../../arch/arm/include/uapi/asm/perf_regs.h" +#include "../../arch/csky/include/uapi/asm/perf_regs.h" +#include "../../arch/mips/include/uapi/asm/perf_regs.h" +#include "../../arch/powerpc/include/uapi/asm/perf_regs.h" +#include "../../arch/riscv/include/uapi/asm/perf_regs.h" +#include "../../arch/s390/include/uapi/asm/perf_regs.h" +#include "../../arch/x86/include/uapi/asm/perf_regs.h" + +static const char *__perf_reg_name_arm64(int id) +{ + switch (id) { + case PERF_REG_ARM64_X0: + return "x0"; + case PERF_REG_ARM64_X1: + return "x1"; + case PERF_REG_ARM64_X2: + return "x2"; + case PERF_REG_ARM64_X3: + return "x3"; + case PERF_REG_ARM64_X4: + return "x4"; + case PERF_REG_ARM64_X5: + return "x5"; + case PERF_REG_ARM64_X6: + return "x6"; + case PERF_REG_ARM64_X7: + return "x7"; + case PERF_REG_ARM64_X8: + return "x8"; + case PERF_REG_ARM64_X9: + return "x9"; + case PERF_REG_ARM64_X10: + return "x10"; + case PERF_REG_ARM64_X11: + return "x11"; + case PERF_REG_ARM64_X12: + return "x12"; + case PERF_REG_ARM64_X13: + return "x13"; + case PERF_REG_ARM64_X14: + return "x14"; + case PERF_REG_ARM64_X15: + return "x15"; + case PERF_REG_ARM64_X16: + return "x16"; + case PERF_REG_ARM64_X17: + return "x17"; + case PERF_REG_ARM64_X18: + return "x18"; + case PERF_REG_ARM64_X19: + return "x19"; + case PERF_REG_ARM64_X20: + return "x20"; + case PERF_REG_ARM64_X21: + return "x21"; + case PERF_REG_ARM64_X22: + return "x22"; + case PERF_REG_ARM64_X23: + return "x23"; + case PERF_REG_ARM64_X24: + return "x24"; + case PERF_REG_ARM64_X25: + return "x25"; + case PERF_REG_ARM64_X26: + return "x26"; + case PERF_REG_ARM64_X27: + return "x27"; + case PERF_REG_ARM64_X28: + return "x28"; + case PERF_REG_ARM64_X29: + return "x29"; + case PERF_REG_ARM64_SP: + return "sp"; + case PERF_REG_ARM64_LR: + return "lr"; + case PERF_REG_ARM64_PC: + return "pc"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_arm(int id) +{ + switch (id) { + case PERF_REG_ARM_R0: + return "r0"; + case PERF_REG_ARM_R1: + return "r1"; + case PERF_REG_ARM_R2: + return "r2"; + case PERF_REG_ARM_R3: + return "r3"; + case PERF_REG_ARM_R4: + return "r4"; + case PERF_REG_ARM_R5: + return "r5"; + case PERF_REG_ARM_R6: + return "r6"; + case PERF_REG_ARM_R7: + return "r7"; + case PERF_REG_ARM_R8: + return "r8"; + case PERF_REG_ARM_R9: + return "r9"; + case PERF_REG_ARM_R10: + return "r10"; + case PERF_REG_ARM_FP: + return "fp"; + case PERF_REG_ARM_IP: + return "ip"; + case PERF_REG_ARM_SP: + return "sp"; + case PERF_REG_ARM_LR: + return "lr"; + case PERF_REG_ARM_PC: + return "pc"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_csky(int id) +{ + switch (id) { + case PERF_REG_CSKY_A0: + return "a0"; + case PERF_REG_CSKY_A1: + return "a1"; + case PERF_REG_CSKY_A2: + return "a2"; + case PERF_REG_CSKY_A3: + return "a3"; + case PERF_REG_CSKY_REGS0: + return "regs0"; + case PERF_REG_CSKY_REGS1: + return "regs1"; + case PERF_REG_CSKY_REGS2: + return "regs2"; + case PERF_REG_CSKY_REGS3: + return "regs3"; + case PERF_REG_CSKY_REGS4: + return "regs4"; + case PERF_REG_CSKY_REGS5: + return "regs5"; + case PERF_REG_CSKY_REGS6: + return "regs6"; + case PERF_REG_CSKY_REGS7: + return "regs7"; + case PERF_REG_CSKY_REGS8: + return "regs8"; + case PERF_REG_CSKY_REGS9: + return "regs9"; + case PERF_REG_CSKY_SP: + return "sp"; + case PERF_REG_CSKY_LR: + return "lr"; + case PERF_REG_CSKY_PC: + return "pc"; +#if defined(__CSKYABIV2__) + case PERF_REG_CSKY_EXREGS0: + return "exregs0"; + case PERF_REG_CSKY_EXREGS1: + return "exregs1"; + case PERF_REG_CSKY_EXREGS2: + return "exregs2"; + case PERF_REG_CSKY_EXREGS3: + return "exregs3"; + case PERF_REG_CSKY_EXREGS4: + return "exregs4"; + case PERF_REG_CSKY_EXREGS5: + return "exregs5"; + case PERF_REG_CSKY_EXREGS6: + return "exregs6"; + case PERF_REG_CSKY_EXREGS7: + return "exregs7"; + case PERF_REG_CSKY_EXREGS8: + return "exregs8"; + case PERF_REG_CSKY_EXREGS9: + return "exregs9"; + case PERF_REG_CSKY_EXREGS10: + return "exregs10"; + case PERF_REG_CSKY_EXREGS11: + return "exregs11"; + case PERF_REG_CSKY_EXREGS12: + return "exregs12"; + case PERF_REG_CSKY_EXREGS13: + return "exregs13"; + case PERF_REG_CSKY_EXREGS14: + return "exregs14"; + case PERF_REG_CSKY_TLS: + return "tls"; + case PERF_REG_CSKY_HI: + return "hi"; + case PERF_REG_CSKY_LO: + return "lo"; +#endif + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_mips(int id) +{ + switch (id) { + case PERF_REG_MIPS_PC: + return "PC"; + case PERF_REG_MIPS_R1: + return "$1"; + case PERF_REG_MIPS_R2: + return "$2"; + case PERF_REG_MIPS_R3: + return "$3"; + case PERF_REG_MIPS_R4: + return "$4"; + case PERF_REG_MIPS_R5: + return "$5"; + case PERF_REG_MIPS_R6: + return "$6"; + case PERF_REG_MIPS_R7: + return "$7"; + case PERF_REG_MIPS_R8: + return "$8"; + case PERF_REG_MIPS_R9: + return "$9"; + case PERF_REG_MIPS_R10: + return "$10"; + case PERF_REG_MIPS_R11: + return "$11"; + case PERF_REG_MIPS_R12: + return "$12"; + case PERF_REG_MIPS_R13: + return "$13"; + case PERF_REG_MIPS_R14: + return "$14"; + case PERF_REG_MIPS_R15: + return "$15"; + case PERF_REG_MIPS_R16: + return "$16"; + case PERF_REG_MIPS_R17: + return "$17"; + case PERF_REG_MIPS_R18: + return "$18"; + case PERF_REG_MIPS_R19: + return "$19"; + case PERF_REG_MIPS_R20: + return "$20"; + case PERF_REG_MIPS_R21: + return "$21"; + case PERF_REG_MIPS_R22: + return "$22"; + case PERF_REG_MIPS_R23: + return "$23"; + case PERF_REG_MIPS_R24: + return "$24"; + case PERF_REG_MIPS_R25: + return "$25"; + case PERF_REG_MIPS_R28: + return "$28"; + case PERF_REG_MIPS_R29: + return "$29"; + case PERF_REG_MIPS_R30: + return "$30"; + case PERF_REG_MIPS_R31: + return "$31"; + default: + break; + } + return NULL; +} + +static const char *__perf_reg_name_powerpc(int id) +{ + switch (id) { + case PERF_REG_POWERPC_R0: + return "r0"; + case PERF_REG_POWERPC_R1: + return "r1"; + case PERF_REG_POWERPC_R2: + return "r2"; + case PERF_REG_POWERPC_R3: + return "r3"; + case PERF_REG_POWERPC_R4: + return "r4"; + case PERF_REG_POWERPC_R5: + return "r5"; + case PERF_REG_POWERPC_R6: + return "r6"; + case PERF_REG_POWERPC_R7: + return "r7"; + case PERF_REG_POWERPC_R8: + return "r8"; + case PERF_REG_POWERPC_R9: + return "r9"; + case PERF_REG_POWERPC_R10: + return "r10"; + case PERF_REG_POWERPC_R11: + return "r11"; + case PERF_REG_POWERPC_R12: + return "r12"; + case PERF_REG_POWERPC_R13: + return "r13"; + case PERF_REG_POWERPC_R14: + return "r14"; + case PERF_REG_POWERPC_R15: + return "r15"; + case PERF_REG_POWERPC_R16: + return "r16"; + case PERF_REG_POWERPC_R17: + return "r17"; + case PERF_REG_POWERPC_R18: + return "r18"; + case PERF_REG_POWERPC_R19: + return "r19"; + case PERF_REG_POWERPC_R20: + return "r20"; + case PERF_REG_POWERPC_R21: + return "r21"; + case PERF_REG_POWERPC_R22: + return "r22"; + case PERF_REG_POWERPC_R23: + return "r23"; + case PERF_REG_POWERPC_R24: + return "r24"; + case PERF_REG_POWERPC_R25: + return "r25"; + case PERF_REG_POWERPC_R26: + return "r26"; + case PERF_REG_POWERPC_R27: + return "r27"; + case PERF_REG_POWERPC_R28: + return "r28"; + case PERF_REG_POWERPC_R29: + return "r29"; + case PERF_REG_POWERPC_R30: + return "r30"; + case PERF_REG_POWERPC_R31: + return "r31"; + case PERF_REG_POWERPC_NIP: + return "nip"; + case PERF_REG_POWERPC_MSR: + return "msr"; + case PERF_REG_POWERPC_ORIG_R3: + return "orig_r3"; + case PERF_REG_POWERPC_CTR: + return "ctr"; + case PERF_REG_POWERPC_LINK: + return "link"; + case PERF_REG_POWERPC_XER: + return "xer"; + case PERF_REG_POWERPC_CCR: + return "ccr"; + case PERF_REG_POWERPC_SOFTE: + return "softe"; + case PERF_REG_POWERPC_TRAP: + return "trap"; + case PERF_REG_POWERPC_DAR: + return "dar"; + case PERF_REG_POWERPC_DSISR: + return "dsisr"; + case PERF_REG_POWERPC_SIER: + return "sier"; + case PERF_REG_POWERPC_MMCRA: + return "mmcra"; + case PERF_REG_POWERPC_MMCR0: + return "mmcr0"; + case PERF_REG_POWERPC_MMCR1: + return "mmcr1"; + case PERF_REG_POWERPC_MMCR2: + return "mmcr2"; + case PERF_REG_POWERPC_MMCR3: + return "mmcr3"; + case PERF_REG_POWERPC_SIER2: + return "sier2"; + case PERF_REG_POWERPC_SIER3: + return "sier3"; + case PERF_REG_POWERPC_PMC1: + return "pmc1"; + case PERF_REG_POWERPC_PMC2: + return "pmc2"; + case PERF_REG_POWERPC_PMC3: + return "pmc3"; + case PERF_REG_POWERPC_PMC4: + return "pmc4"; + case PERF_REG_POWERPC_PMC5: + return "pmc5"; + case PERF_REG_POWERPC_PMC6: + return "pmc6"; + case PERF_REG_POWERPC_SDAR: + return "sdar"; + case PERF_REG_POWERPC_SIAR: + return "siar"; + default: + break; + } + return NULL; +} + +static const char *__perf_reg_name_riscv(int id) +{ + switch (id) { + case PERF_REG_RISCV_PC: + return "pc"; + case PERF_REG_RISCV_RA: + return "ra"; + case PERF_REG_RISCV_SP: + return "sp"; + case PERF_REG_RISCV_GP: + return "gp"; + case PERF_REG_RISCV_TP: + return "tp"; + case PERF_REG_RISCV_T0: + return "t0"; + case PERF_REG_RISCV_T1: + return "t1"; + case PERF_REG_RISCV_T2: + return "t2"; + case PERF_REG_RISCV_S0: + return "s0"; + case PERF_REG_RISCV_S1: + return "s1"; + case PERF_REG_RISCV_A0: + return "a0"; + case PERF_REG_RISCV_A1: + return "a1"; + case PERF_REG_RISCV_A2: + return "a2"; + case PERF_REG_RISCV_A3: + return "a3"; + case PERF_REG_RISCV_A4: + return "a4"; + case PERF_REG_RISCV_A5: + return "a5"; + case PERF_REG_RISCV_A6: + return "a6"; + case PERF_REG_RISCV_A7: + return "a7"; + case PERF_REG_RISCV_S2: + return "s2"; + case PERF_REG_RISCV_S3: + return "s3"; + case PERF_REG_RISCV_S4: + return "s4"; + case PERF_REG_RISCV_S5: + return "s5"; + case PERF_REG_RISCV_S6: + return "s6"; + case PERF_REG_RISCV_S7: + return "s7"; + case PERF_REG_RISCV_S8: + return "s8"; + case PERF_REG_RISCV_S9: + return "s9"; + case PERF_REG_RISCV_S10: + return "s10"; + case PERF_REG_RISCV_S11: + return "s11"; + case PERF_REG_RISCV_T3: + return "t3"; + case PERF_REG_RISCV_T4: + return "t4"; + case PERF_REG_RISCV_T5: + return "t5"; + case PERF_REG_RISCV_T6: + return "t6"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_s390(int id) +{ + switch (id) { + case PERF_REG_S390_R0: + return "R0"; + case PERF_REG_S390_R1: + return "R1"; + case PERF_REG_S390_R2: + return "R2"; + case PERF_REG_S390_R3: + return "R3"; + case PERF_REG_S390_R4: + return "R4"; + case PERF_REG_S390_R5: + return "R5"; + case PERF_REG_S390_R6: + return "R6"; + case PERF_REG_S390_R7: + return "R7"; + case PERF_REG_S390_R8: + return "R8"; + case PERF_REG_S390_R9: + return "R9"; + case PERF_REG_S390_R10: + return "R10"; + case PERF_REG_S390_R11: + return "R11"; + case PERF_REG_S390_R12: + return "R12"; + case PERF_REG_S390_R13: + return "R13"; + case PERF_REG_S390_R14: + return "R14"; + case PERF_REG_S390_R15: + return "R15"; + case PERF_REG_S390_FP0: + return "FP0"; + case PERF_REG_S390_FP1: + return "FP1"; + case PERF_REG_S390_FP2: + return "FP2"; + case PERF_REG_S390_FP3: + return "FP3"; + case PERF_REG_S390_FP4: + return "FP4"; + case PERF_REG_S390_FP5: + return "FP5"; + case PERF_REG_S390_FP6: + return "FP6"; + case PERF_REG_S390_FP7: + return "FP7"; + case PERF_REG_S390_FP8: + return "FP8"; + case PERF_REG_S390_FP9: + return "FP9"; + case PERF_REG_S390_FP10: + return "FP10"; + case PERF_REG_S390_FP11: + return "FP11"; + case PERF_REG_S390_FP12: + return "FP12"; + case PERF_REG_S390_FP13: + return "FP13"; + case PERF_REG_S390_FP14: + return "FP14"; + case PERF_REG_S390_FP15: + return "FP15"; + case PERF_REG_S390_MASK: + return "MASK"; + case PERF_REG_S390_PC: + return "PC"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_x86(int id) +{ + switch (id) { + case PERF_REG_X86_AX: + return "AX"; + case PERF_REG_X86_BX: + return "BX"; + case PERF_REG_X86_CX: + return "CX"; + case PERF_REG_X86_DX: + return "DX"; + case PERF_REG_X86_SI: + return "SI"; + case PERF_REG_X86_DI: + return "DI"; + case PERF_REG_X86_BP: + return "BP"; + case PERF_REG_X86_SP: + return "SP"; + case PERF_REG_X86_IP: + return "IP"; + case PERF_REG_X86_FLAGS: + return "FLAGS"; + case PERF_REG_X86_CS: + return "CS"; + case PERF_REG_X86_SS: + return "SS"; + case PERF_REG_X86_DS: + return "DS"; + case PERF_REG_X86_ES: + return "ES"; + case PERF_REG_X86_FS: + return "FS"; + case PERF_REG_X86_GS: + return "GS"; + case PERF_REG_X86_R8: + return "R8"; + case PERF_REG_X86_R9: + return "R9"; + case PERF_REG_X86_R10: + return "R10"; + case PERF_REG_X86_R11: + return "R11"; + case PERF_REG_X86_R12: + return "R12"; + case PERF_REG_X86_R13: + return "R13"; + case PERF_REG_X86_R14: + return "R14"; + case PERF_REG_X86_R15: + return "R15"; + +#define XMM(x) \ + case PERF_REG_X86_XMM ## x: \ + case PERF_REG_X86_XMM ## x + 1: \ + return "XMM" #x; + XMM(0) + XMM(1) + XMM(2) + XMM(3) + XMM(4) + XMM(5) + XMM(6) + XMM(7) + XMM(8) + XMM(9) + XMM(10) + XMM(11) + XMM(12) + XMM(13) + XMM(14) + XMM(15) +#undef XMM + default: + return NULL; + } + + return NULL; +} + +const char *perf_reg_name(int id, const char *arch) +{ + const char *reg_name = NULL; + + if (!strcmp(arch, "csky")) + reg_name = __perf_reg_name_csky(id); + else if (!strcmp(arch, "mips")) + reg_name = __perf_reg_name_mips(id); + else if (!strcmp(arch, "powerpc")) + reg_name = __perf_reg_name_powerpc(id); + else if (!strcmp(arch, "riscv")) + reg_name = __perf_reg_name_riscv(id); + else if (!strcmp(arch, "s390")) + reg_name = __perf_reg_name_s390(id); + else if (!strcmp(arch, "x86")) + reg_name = __perf_reg_name_x86(id); + else if (!strcmp(arch, "arm")) + reg_name = __perf_reg_name_arm(id); + else if (!strcmp(arch, "arm64")) + reg_name = __perf_reg_name_arm64(id); + + return reg_name ?: "unknown"; +} + int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { int i, idx = 0; diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index eeac181ebccf..ce1127af05e4 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -11,8 +11,11 @@ struct sample_reg { const char *name; uint64_t mask; }; -#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } -#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } + +#define SMPL_REG_MASK(b) (1ULL << (b)) +#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) } +#define SMPL_REG2_MASK(b) (3ULL << (b)) +#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) } #define SMPL_REG_END { .name = NULL } enum { @@ -31,22 +34,16 @@ extern const struct sample_reg sample_reg_masks[]; #define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) +const char *perf_reg_name(int id, const char *arch); int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); -static inline const char *perf_reg_name(int id) -{ - const char *reg_name = __perf_reg_name(id); - - return reg_name ?: "unknown"; -} - #else #define PERF_REGS_MASK 0 #define PERF_REGS_MAX 0 #define DWARF_MINIMAL_REGS PERF_REGS_MASK -static inline const char *perf_reg_name(int id __maybe_unused) +static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused) { return "unknown"; } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b2a02c9ab8ea..a834918a0a0d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -3083,6 +3083,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, for (j = 0; j < num_matched_functions; j++) { sym = syms[j]; + if (sym->type != STT_FUNC) + continue; + /* There can be duplicated symbols in the map */ for (i = 0; i < j; i++) if (sym->start == syms[i]->start) { diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 7f782a31bda3..52d8995cfd73 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -428,6 +428,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -636,17 +638,17 @@ static Py_ssize_t pyrf_cpu_map__length(PyObject *obj) { struct pyrf_cpu_map *pcpus = (void *)obj; - return pcpus->cpus->nr; + return perf_cpu_map__nr(pcpus->cpus); } static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_cpu_map *pcpus = (void *)obj; - if (i >= pcpus->cpus->nr) + if (i >= perf_cpu_map__nr(pcpus->cpus)) return NULL; - return Py_BuildValue("i", pcpus->cpus->map[i]); + return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu); } static PySequenceMethods pyrf_cpu_map__sequence_methods = { @@ -1057,7 +1059,7 @@ static struct mmap *get_md(struct evlist *evlist, int cpu) for (i = 0; i < evlist->core.nr_mmaps; i++) { struct mmap *md = &evlist->mmap[i]; - if (md->core.cpu == cpu) + if (md->core.cpu.cpu == cpu) return md; } @@ -1443,7 +1445,7 @@ error: * Dummy, to avoid dragging all the test_attr infrastructure in the python * binding. */ -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags) { } diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index bff669b615ee..007a64681416 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -106,7 +106,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call if (opts->group) evlist__set_leader(evlist); - if (evlist->core.cpus->map[0] < 0) + if (perf_cpu_map__cpu(evlist->core.cpus, 0).cpu < 0) opts->no_inherit = true; use_comm_exec = perf_can_comm_exec(); @@ -229,7 +229,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) { struct evlist *temp_evlist; struct evsel *evsel; - int err, fd, cpu; + int err, fd; + struct perf_cpu cpu = { .cpu = 0 }; bool ret = false; pid_t pid = -1; @@ -246,14 +247,16 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) { struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); - cpu = cpus ? cpus->map[0] : 0; + if (cpus) + cpu = perf_cpu_map__cpu(cpus, 0); + perf_cpu_map__put(cpus); } else { - cpu = evlist->core.cpus->map[0]; + cpu = perf_cpu_map__cpu(evlist->core.cpus, 0); } while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, perf_event_open_cloexec_flag()); if (fd < 0) { if (pid == -1 && errno == EACCES) { diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 32a721b3e9a5..a5d945415bbc 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -392,6 +392,8 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } else offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c0c010350bc2..e752e1f4a5f0 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -36,6 +36,7 @@ #include "../debug.h" #include "../dso.h" #include "../callchain.h" +#include "../env.h" #include "../evsel.h" #include "../event.h" #include "../thread.h" @@ -687,7 +688,7 @@ static void set_sample_datasrc_in_dict(PyObject *dict, _PyUnicode_FromString(decode)); } -static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) +static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size) { unsigned int i = 0, r; int printed = 0; @@ -702,7 +703,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) printed += scnprintf(bf + printed, size - printed, "%5s:0x%" PRIx64 " ", - perf_reg_name(r), val); + perf_reg_name(r, arch), val); } } @@ -711,6 +712,7 @@ static void set_regs_in_dict(PyObject *dict, struct evsel *evsel) { struct perf_event_attr *attr = &evsel->core.attr; + const char *arch = perf_env__arch(evsel__env(evsel)); /* * Here value 28 is a constant size which can be used to print @@ -722,12 +724,12 @@ static void set_regs_in_dict(PyObject *dict, int size = __sw_hweight64(attr->sample_regs_intr) * 28; char bf[size]; - regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf)); + regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf)); pydict_set_item_string_decref(dict, "iregs", _PyUnicode_FromString(bf)); - regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf)); + regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf)); pydict_set_item_string_decref(dict, "uregs", _PyUnicode_FromString(bf)); @@ -942,6 +944,8 @@ static void python_process_tracepoint(struct perf_sample *sample, offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -1553,7 +1557,7 @@ static void get_handler_name(char *str, size_t size, } static void -process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp, +process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp, struct perf_counts_values *count) { PyObject *handler, *t; @@ -1573,7 +1577,7 @@ process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp, return; } - PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu.cpu)); PyTuple_SetItem(t, n++, _PyLong_FromLong(thread)); tuple_set_u64(t, n++, tstamp); @@ -1597,14 +1601,14 @@ static void python_process_stat(struct perf_stat_config *config, int cpu, thread; if (config->aggr_mode == AGGR_GLOBAL) { - process_stat(counter, -1, -1, tstamp, + process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp, &counter->counts->aggr); return; } for (thread = 0; thread < threads->nr; thread++) { - for (cpu = 0; cpu < cpus->nr; cpu++) { - process_stat(counter, cpus->map[cpu], + for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { + process_stat(counter, perf_cpu_map__cpu(cpus, cpu), perf_thread_map__pid(threads, thread), tstamp, perf_counts(counter->counts, cpu, thread)); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d8857d1b6d7c..498b05708db5 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -15,6 +15,7 @@ #include "map_symbol.h" #include "branch.h" #include "debug.h" +#include "env.h" #include "evlist.h" #include "evsel.h" #include "memswap.h" @@ -1168,7 +1169,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) } } -static void regs_dump__printf(u64 mask, u64 *regs) +static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) { unsigned rid, i = 0; @@ -1176,7 +1177,7 @@ static void regs_dump__printf(u64 mask, u64 *regs) u64 val = regs[i++]; printf(".... %-5s 0x%016" PRIx64 "\n", - perf_reg_name(rid), val); + perf_reg_name(rid, arch), val); } } @@ -1194,7 +1195,7 @@ static inline const char *regs_dump_abi(struct regs_dump *d) return regs_abi[d->abi]; } -static void regs__printf(const char *type, struct regs_dump *regs) +static void regs__printf(const char *type, struct regs_dump *regs, const char *arch) { u64 mask = regs->mask; @@ -1203,23 +1204,23 @@ static void regs__printf(const char *type, struct regs_dump *regs) mask, regs_dump_abi(regs)); - regs_dump__printf(mask, regs->regs); + regs_dump__printf(mask, regs->regs, arch); } -static void regs_user__printf(struct perf_sample *sample) +static void regs_user__printf(struct perf_sample *sample, const char *arch) { struct regs_dump *user_regs = &sample->user_regs; if (user_regs->regs) - regs__printf("user", user_regs); + regs__printf("user", user_regs, arch); } -static void regs_intr__printf(struct perf_sample *sample) +static void regs_intr__printf(struct perf_sample *sample, const char *arch) { struct regs_dump *intr_regs = &sample->intr_regs; if (intr_regs->regs) - regs__printf("intr", intr_regs); + regs__printf("intr", intr_regs, arch); } static void stack_user__printf(struct stack_dump *dump) @@ -1304,7 +1305,7 @@ char *get_page_size_name(u64 size, char *str) } static void dump_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample) + struct perf_sample *sample, const char *arch) { u64 sample_type; char str[PAGE_SIZE_NAME_LEN]; @@ -1325,10 +1326,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); if (sample_type & PERF_SAMPLE_REGS_USER) - regs_user__printf(sample); + regs_user__printf(sample, arch); if (sample_type & PERF_SAMPLE_REGS_INTR) - regs_intr__printf(sample); + regs_intr__printf(sample, arch); if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); @@ -1502,11 +1503,12 @@ static int machines__deliver_event(struct machines *machines, ++evlist->stats.nr_unknown_id; return 0; } - dump_sample(evsel, event, sample); if (machine == NULL) { ++evlist->stats.nr_unprocessable_samples; + dump_sample(evsel, event, sample, perf_env__arch(NULL)); return 0; } + dump_sample(evsel, event, sample, perf_env__arch(machine->env)); return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -2536,16 +2538,16 @@ int perf_session__cpu_bitmap(struct perf_session *session, return -1; } - for (i = 0; i < map->nr; i++) { - int cpu = map->map[i]; + for (i = 0; i < perf_cpu_map__nr(map); i++) { + struct perf_cpu cpu = perf_cpu_map__cpu(map, i); - if (cpu >= nr_cpus) { + if (cpu.cpu >= nr_cpus) { pr_err("Requested CPU %d too large. " - "Consider raising MAX_NR_CPUS\n", cpu); + "Consider raising MAX_NR_CPUS\n", cpu.cpu); goto out_delete_map; } - set_bit(cpu, cpu_bitmap); + set_bit(cpu.cpu, cpu_bitmap); } err = 0; @@ -2597,7 +2599,7 @@ int perf_event__process_id_index(struct perf_session *session, if (!sid) return -ENOENT; sid->idx = e->idx; - sid->cpu = e->cpu; + sid->cpu.cpu = e->cpu; sid->tid = e->tid; } return 0; diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 34f1b1b1176c..2b0a36ebf27a 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -5,6 +5,56 @@ #include "api/fs/fs.h" #include "smt.h" +/** + * hweight_str - Returns the number of bits set in str. Stops at first non-hex + * or ',' character. + */ +static int hweight_str(char *str) +{ + int result = 0; + + while (*str) { + switch (*str++) { + case '0': + case ',': + break; + case '1': + case '2': + case '4': + case '8': + result++; + break; + case '3': + case '5': + case '6': + case '9': + case 'a': + case 'A': + case 'c': + case 'C': + result += 2; + break; + case '7': + case 'b': + case 'B': + case 'd': + case 'D': + case 'e': + case 'E': + result += 3; + break; + case 'f': + case 'F': + result += 4; + break; + default: + goto done; + } + } +done: + return result; +} + int smt_on(void) { static bool cached; @@ -15,9 +65,12 @@ int smt_on(void) if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) - goto done; + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) { + cached = true; + return cached_result; + } + cached_result = 0; ncpu = sysconf(_SC_NPROCESSORS_CONF); for (cpu = 0; cpu < ncpu; cpu++) { unsigned long long siblings; @@ -26,27 +79,21 @@ int smt_on(void) char fn[256]; snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/core_cpus", cpu); + "devices/system/cpu/cpu%d/topology/thread_siblings", cpu); if (sysfs__read_str(fn, &str, &strlen) < 0) { snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/thread_siblings", - cpu); + "devices/system/cpu/cpu%d/topology/core_cpus", cpu); if (sysfs__read_str(fn, &str, &strlen) < 0) continue; } /* Entry is hex, but does not have 0x, so need custom parser */ - siblings = strtoull(str, NULL, 16); + siblings = hweight_str(str); free(str); - if (hweight64(siblings) > 1) { + if (siblings > 1) { cached_result = 1; - cached = true; break; } } - if (!cached) { - cached_result = 0; -done: - cached = true; - } + cached = true; return cached_result; } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a111065b484e..2da081ef532b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char *default_sort_order = "comm,dso,symbol"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; -const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; +const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; const char default_tracepoint_sort_order[] = "trace"; @@ -46,8 +46,8 @@ const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; -const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; -const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; +static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"}; +static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"}; /* * Replaces all occurrences of a char used with the: @@ -915,7 +915,7 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf, struct addr_map_symbol *from = &he->branch_info->from; return _hist_entry__sym_snprintf(&from->ms, from->al_addr, - he->level, bf, size, width); + from->al_level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); @@ -928,7 +928,7 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, struct addr_map_symbol *to = &he->branch_info->to; return _hist_entry__sym_snprintf(&to->ms, to->al_addr, - he->level, bf, size, width); + to->al_level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = { }; static int64_t -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { return left->p_stage_cyc - right->p_stage_cyc; } +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, + he->p_stage_cyc * he->stat.nr_events); +} + + static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); } -struct sort_entry sort_p_stage_cyc = { - .se_header = "Pipeline Stage Cycle", - .se_cmp = sort__global_p_stage_cyc_cmp, +struct sort_entry sort_local_p_stage_cyc = { + .se_header = "Local Pipeline Stage Cycle", + .se_cmp = sort__p_stage_cyc_cmp, .se_snprintf = hist_entry__p_stage_cyc_snprintf, - .se_width_idx = HISTC_P_STAGE_CYC, + .se_width_idx = HISTC_LOCAL_P_STAGE_CYC, +}; + +struct sort_entry sort_global_p_stage_cyc = { + .se_header = "Pipeline Stage Cycle", + .se_cmp = sort__p_stage_cyc_cmp, + .se_snprintf = hist_entry__global_p_stage_cyc_snprintf, + .se_width_idx = HISTC_GLOBAL_P_STAGE_CYC, }; struct sort_entry sort_mem_daddr_sym = { @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), - DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc), + DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc), + DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc), }; #undef DIM @@ -2365,6 +2381,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, tep_read_number_field(field, a->raw_data, &dyn); offset = dyn & 0xffff; size = (dyn >> 16) & 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; /* record max width for output */ if (size > hde->dynamic_len) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 7b7145501933..f994261888e1 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -235,7 +235,8 @@ enum sort_type { SORT_CODE_PAGE_SIZE, SORT_LOCAL_INS_LAT, SORT_GLOBAL_INS_LAT, - SORT_PIPELINE_STAGE_CYC, + SORT_LOCAL_PIPELINE_STAGE_CYC, + SORT_GLOBAL_PIPELINE_STAGE_CYC, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 588601000f3f..9cbe351b141f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -4,6 +4,7 @@ #include <linux/string.h> #include <linux/time64.h> #include <math.h> +#include <perf/cpumap.h> #include "color.h" #include "counts.h" #include "evlist.h" @@ -120,11 +121,10 @@ static void aggr_printout(struct perf_stat_config *config, id.die, config->csv_output ? 0 : -3, id.core, config->csv_sep); - } else if (id.core > -1) { + } else if (id.cpu.cpu > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - evsel__cpus(evsel)->map[id.core], - config->csv_sep); + id.cpu.cpu, config->csv_sep); } break; case AGGR_THREAD: @@ -327,26 +327,24 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static int first_shadow_cpu(struct perf_stat_config *config, - struct evsel *evsel, struct aggr_cpu_id id) +static int first_shadow_cpu_map_idx(struct perf_stat_config *config, + struct evsel *evsel, const struct aggr_cpu_id *id) { - struct evlist *evlist = evsel->evlist; - int i; + struct perf_cpu_map *cpus = evsel__cpus(evsel); + struct perf_cpu cpu; + int idx; if (config->aggr_mode == AGGR_NONE) - return id.core; + return perf_cpu_map__idx(cpus, id->cpu); if (!config->aggr_get_id) return 0; - for (i = 0; i < evsel__nr_cpus(evsel); i++) { - int cpu2 = evsel__cpus(evsel)->map[i]; + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu); - if (cpu_map__compare_aggr_cpu_id( - config->aggr_get_id(config, evlist->core.cpus, cpu2), - id)) { - return cpu2; - } + if (aggr_cpu_id__equal(&cpu_id, id)) + return idx; } return 0; } @@ -505,7 +503,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_cpu(config, counter, id), + first_shadow_cpu_map_idx(config, counter, &id), &out, &config->metric_events, st); if (!config->csv_output && !config->metric_only) { print_noise(config, counter, noise); @@ -516,23 +514,26 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int static void aggr_update_shadow(struct perf_stat_config *config, struct evlist *evlist) { - int cpu, s; + int idx, s; + struct perf_cpu cpu; struct aggr_cpu_id s2, id; u64 val; struct evsel *counter; + struct perf_cpu_map *cpus; for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { + cpus = evsel__cpus(counter); val = 0; - for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { - s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); - if (!cpu_map__compare_aggr_cpu_id(s2, id)) + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + s2 = config->aggr_get_id(config, cpu); + if (!aggr_cpu_id__equal(&s2, &id)) continue; - val += perf_counts(counter->counts, cpu, 0)->val; + val += perf_counts(counter->counts, idx, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(config, counter, id), + first_shadow_cpu_map_idx(config, counter, &id), &rt_stat); } } @@ -584,15 +585,16 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { - if (strcmp(evsel__name(alias), evsel__name(counter)) || - alias->scale != counter->scale || - alias->cgrp != counter->cgrp || - strcmp(alias->unit, counter->unit) || - evsel__is_clock(alias) != evsel__is_clock(counter) || - !strcmp(alias->pmu_name, counter->pmu_name)) - break; - alias->merged_stat = true; - cb(config, alias, data, false); + /* Merge events with the same name, etc. but on different PMUs. */ + if (!strcmp(evsel__name(alias), evsel__name(counter)) && + alias->scale == counter->scale && + alias->cgrp == counter->cgrp && + !strcmp(alias->unit, counter->unit) && + evsel__is_clock(alias) == evsel__is_clock(counter) && + strcmp(alias->pmu_name, counter->pmu_name)) { + alias->merged_stat = true; + cb(config, alias, data, false); + } } } @@ -627,25 +629,28 @@ struct aggr_data { u64 ena, run, val; struct aggr_cpu_id id; int nr; - int cpu; + int cpu_map_idx; }; static void aggr_cb(struct perf_stat_config *config, struct evsel *counter, void *data, bool first) { struct aggr_data *ad = data; - int cpu; + int idx; + struct perf_cpu cpu; + struct perf_cpu_map *cpus; struct aggr_cpu_id s2; - for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { + cpus = evsel__cpus(counter); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { struct perf_counts_values *counts; - s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); - if (!cpu_map__compare_aggr_cpu_id(s2, ad->id)) + s2 = config->aggr_get_id(config, cpu); + if (!aggr_cpu_id__equal(&s2, &ad->id)) continue; if (first) ad->nr++; - counts = perf_counts(counter->counts, cpu, 0); + counts = perf_counts(counter->counts, idx, 0); /* * When any result is bad, make them all to give * consistent output in interval mode. @@ -665,7 +670,7 @@ static void aggr_cb(struct perf_stat_config *config, static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, char *prefix, bool metric_only, - bool *first, int cpu) + bool *first, struct perf_cpu cpu) { struct aggr_data ad; FILE *output = config->output; @@ -695,10 +700,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - if (cpu != -1) { - id = cpu_map__empty_aggr_cpu_id(); - id.core = cpu; - } + if (cpu.cpu != -1) + id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); + printout(config, id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat); if (!metric_only) @@ -731,8 +735,8 @@ static void print_aggr(struct perf_stat_config *config, first = true; evlist__for_each_entry(evlist, counter) { print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, -1); + prefix, metric_only, + &first, (struct perf_cpu){ .cpu = -1 }); } if (metric_only) fputc('\n', output); @@ -778,7 +782,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread( continue; buf[i].counter = counter; - buf[i].id = cpu_map__empty_aggr_cpu_id(); + buf[i].id = aggr_cpu_id__empty(); buf[i].id.thread = thread; buf[i].uval = uval; buf[i].val = val; @@ -866,7 +870,7 @@ static void print_counter_aggr(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running, + printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); @@ -878,9 +882,9 @@ static void counter_cb(struct perf_stat_config *config __maybe_unused, { struct aggr_data *ad = data; - ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; - ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; - ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; + ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run; } /* @@ -893,11 +897,12 @@ static void print_counter(struct perf_stat_config *config, FILE *output = config->output; u64 ena, run, val; double uval; - int cpu; + int idx; + struct perf_cpu cpu; struct aggr_cpu_id id; - for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { - struct aggr_data ad = { .cpu = cpu }; + perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { + struct aggr_data ad = { .cpu_map_idx = idx }; if (!collect_data(config, counter, counter_cb, &ad)) return; @@ -909,8 +914,7 @@ static void print_counter(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - id = cpu_map__empty_aggr_cpu_id(); - id.core = cpu; + id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); printout(config, id, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); @@ -922,29 +926,32 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { - int cpu; - int nrcpus = 0; - struct evsel *counter; - u64 ena, run, val; - double uval; - struct aggr_cpu_id id; + int all_idx; + struct perf_cpu cpu; - nrcpus = evlist->core.cpus->nr; - for (cpu = 0; cpu < nrcpus; cpu++) { + perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.cpus) { + struct evsel *counter; bool first = true; if (prefix) fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { - id = cpu_map__empty_aggr_cpu_id(); - id.core = cpu; + u64 ena, run, val; + double uval; + struct aggr_cpu_id id; + int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); + + if (counter_idx < 0) + continue; + + id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { aggr_printout(config, counter, id, 0); first = false; } - val = perf_counts(counter->counts, cpu, 0)->val; - ena = perf_counts(counter->counts, cpu, 0)->ena; - run = perf_counts(counter->counts, cpu, 0)->run; + val = perf_counts(counter->counts, counter_idx, 0)->val; + ena = perf_counts(counter->counts, counter_idx, 0)->ena; + run = perf_counts(counter->counts, counter_idx, 0)->run; uval = val * counter->scale; printout(config, id, 0, counter, uval, prefix, @@ -1208,19 +1215,23 @@ static void print_percore_thread(struct perf_stat_config *config, { int s; struct aggr_cpu_id s2, id; + struct perf_cpu_map *cpus; bool first = true; + int idx; + struct perf_cpu cpu; - for (int i = 0; i < evsel__nr_cpus(counter); i++) { - s2 = config->aggr_get_id(config, evsel__cpus(counter), i); + cpus = evsel__cpus(counter); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + s2 = config->aggr_get_id(config, cpu); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; - if (cpu_map__compare_aggr_cpu_id(s2, id)) + if (aggr_cpu_id__equal(&s2, &id)) break; } print_counter_aggrdata(config, counter, s, prefix, false, - &first, i); + &first, cpu); } } @@ -1243,8 +1254,8 @@ static void print_percore(struct perf_stat_config *config, fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, -1); + prefix, metric_only, + &first, (struct perf_cpu){ .cpu = -1 }); } if (metric_only) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 5c7308efa768..10af7804e482 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -32,7 +32,7 @@ struct saved_value { struct evsel *evsel; enum stat_type type; int ctx; - int cpu; + int cpu_map_idx; struct cgroup *cgrp; struct runtime_stat *stat; struct stats stats; @@ -47,8 +47,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) rb_node); const struct saved_value *b = entry; - if (a->cpu != b->cpu) - return a->cpu - b->cpu; + if (a->cpu_map_idx != b->cpu_map_idx) + return a->cpu_map_idx - b->cpu_map_idx; /* * Previously the rbtree was used to link generic metrics. @@ -105,7 +105,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, } static struct saved_value *saved_value_lookup(struct evsel *evsel, - int cpu, + int cpu_map_idx, bool create, enum stat_type type, int ctx, @@ -115,7 +115,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { - .cpu = cpu, + .cpu_map_idx = cpu_map_idx, .evsel = evsel, .type = type, .ctx = ctx, @@ -213,10 +213,10 @@ struct runtime_stat_data { static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int cpu, u64 count, + int cpu_map_idx, u64 count, struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, + struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type, rsd->ctx, st, rsd->cgrp); if (v) @@ -229,7 +229,7 @@ static void update_runtime_stat(struct runtime_stat *st, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu, struct runtime_stat *st) + int cpu_map_idx, struct runtime_stat *st) { u64 count_ns = count; struct saved_value *v; @@ -241,88 +241,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd); + update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd); + update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd); + update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd); + update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd); + update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS)) update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT)) update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd); + update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd); + update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd); + update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); + update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, + v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu, true, STAT_NONE, 0, st, rsd.cgrp); + cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -464,12 +464,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int cpu, + enum stat_type type, int cpu_map_idx, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -477,12 +477,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int cpu, + enum stat_type type, int cpu_map_idx, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -490,7 +490,7 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -498,7 +498,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -513,7 +513,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -521,7 +521,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -532,7 +532,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -540,7 +540,7 @@ static void print_branch_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd); + total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -551,7 +551,7 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -559,7 +559,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -570,7 +570,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -578,7 +578,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -588,7 +588,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -596,7 +596,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -606,7 +606,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -614,7 +614,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -624,7 +624,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -632,7 +632,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -690,61 +690,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int cpu, struct runtime_stat *st, +static double td_total_slots(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd); } -static double td_bad_spec(int cpu, struct runtime_stat *st, +static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd); - total_slots = td_total_slots(cpu, st, rsd); + total_slots = td_total_slots(cpu_map_idx, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int cpu, struct runtime_stat *st, +static double td_retiring(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(cpu, st, rsd); + double total_slots = td_total_slots(cpu_map_idx, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu, rsd); + cpu_map_idx, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int cpu, struct runtime_stat *st, +static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(cpu, st, rsd); + double total_slots = td_total_slots(cpu_map_idx, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu, rsd); + cpu_map_idx, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int cpu, struct runtime_stat *st, +static double td_be_bound(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(cpu, st, rsd) + - td_bad_spec(cpu, st, rsd) + - td_retiring(cpu, st, rsd)); + double sum = (td_fe_bound(cpu_map_idx, st, rsd) + + td_bad_spec(cpu_map_idx, st, rsd) + + td_retiring(cpu_map_idx, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -755,15 +755,15 @@ static double td_be_bound(int cpu, struct runtime_stat *st, * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int cpu, enum stat_type type, +static double td_metric_ratio(int cpu_map_idx, enum stat_type type, struct runtime_stat *stat, struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd); - double d = runtime_stat_avg(stat, type, cpu, rsd); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd); + double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd); if (sum) return d / sum; @@ -775,23 +775,23 @@ static double td_metric_ratio(int cpu, enum stat_type type, * We allow two missing. */ -static bool full_td(int cpu, struct runtime_stat *stat, +static bool full_td(int cpu_map_idx, struct runtime_stat *stat, struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, int cpu, +static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -799,9 +799,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu, double smi_num, aperf, cycles, cost = 0.0; const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd); - aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd); - cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -818,7 +818,7 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu, static int prepare_metric(struct evsel **metric_events, struct metric_ref *metric_refs, struct expr_parse_ctx *pctx, - int cpu, + int cpu_map_idx, struct runtime_stat *st) { double scale; @@ -836,7 +836,7 @@ static int prepare_metric(struct evsel **metric_events, scale = 1e-9; source_count = 1; } else { - v = saved_value_lookup(metric_events[i], cpu, false, + v = saved_value_lookup(metric_events[i], cpu_map_idx, false, STAT_NONE, 0, st, metric_events[i]->cgrp); if (!v) @@ -874,7 +874,7 @@ static void generic_metric(struct perf_stat_config *config, const char *metric_name, const char *metric_unit, int runtime, - int cpu, + int cpu_map_idx, struct perf_stat_output_ctx *out, struct runtime_stat *st) { @@ -889,7 +889,7 @@ static void generic_metric(struct perf_stat_config *config, return; pctx->runtime = runtime; - i = prepare_metric(metric_events, metric_refs, pctx, cpu, st); + i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st); if (i < 0) { expr__ctx_free(pctx); return; @@ -934,7 +934,7 @@ static void generic_metric(struct perf_stat_config *config, expr__ctx_free(pctx); } -double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) +double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st) { struct expr_parse_ctx *pctx; double ratio = 0.0; @@ -943,7 +943,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_sta if (!pctx) return NAN; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0) + if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) @@ -956,7 +956,7 @@ out: void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int cpu, + double avg, int cpu_map_idx, struct perf_stat_output_ctx *out, struct rblist *metric_events, struct runtime_stat *st) @@ -975,7 +975,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (config->iostat_run) { iostat_print_metric(config, evsel, out); } else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); if (total) { ratio = avg / total; @@ -985,11 +985,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - cpu, &rsd)); + cpu_map_idx, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -999,8 +999,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0) - print_branch_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0) + print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -1009,8 +1009,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0) - print_l1_dcache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0) + print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -1019,8 +1019,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0) - print_l1_icache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0) + print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -1029,8 +1029,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0) - print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0) + print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1039,8 +1039,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0) - print_itlb_cache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0) + print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1049,27 +1049,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0) - print_ll_cache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0) + print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd); + print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd); + print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); if (total) { ratio = avg / total; @@ -1078,7 +1078,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1088,8 +1088,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); if (total2 < avg) total2 = avg; @@ -1099,19 +1099,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); if (avg) ratio = total / avg; @@ -1124,28 +1124,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(cpu, st, &rsd); + double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(cpu, st, &rsd); + double retiring = td_retiring(cpu_map_idx, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(cpu, st, &rsd); + double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(cpu, st, &rsd); + double be_bound = td_be_bound(cpu_map_idx, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1158,14 +1158,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(cpu, st, &rsd) > 0) + if (td_total_slots(cpu_map_idx, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(cpu, st, &rsd)) { - double retiring = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double retiring = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_RETIRING, st, &rsd); if (retiring > 0.7) @@ -1173,8 +1173,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(cpu, st, &rsd)) { - double fe_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double fe_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_FE_BOUND, st, &rsd); if (fe_bound > 0.2) @@ -1182,8 +1182,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(cpu, st, &rsd)) { - double be_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double be_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BE_BOUND, st, &rsd); if (be_bound > 0.2) @@ -1191,8 +1191,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "backend bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(cpu, st, &rsd)) { - double bad_spec = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double bad_spec = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BAD_SPEC, st, &rsd); if (bad_spec > 0.1) @@ -1200,11 +1200,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double retiring = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double retiring = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_RETIRING, st, &rsd); - double heavy_ops = td_metric_ratio(cpu, + double heavy_ops = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_HEAVY_OPS, st, &rsd); double light_ops = retiring - heavy_ops; @@ -1220,11 +1220,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "light operations", light_ops * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double bad_spec = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double bad_spec = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BAD_SPEC, st, &rsd); - double br_mis = td_metric_ratio(cpu, + double br_mis = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BR_MISPREDICT, st, &rsd); double m_clears = bad_spec - br_mis; @@ -1240,11 +1240,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "machine clears", m_clears * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double fe_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double fe_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_FE_BOUND, st, &rsd); - double fetch_lat = td_metric_ratio(cpu, + double fetch_lat = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_FETCH_LAT, st, &rsd); double fetch_bw = fe_bound - fetch_lat; @@ -1260,11 +1260,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth", fetch_bw * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double be_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double be_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BE_BOUND, st, &rsd); - double mem_bound = td_metric_ratio(cpu, + double mem_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_MEM_BOUND, st, &rsd); double core_bound = be_bound - mem_bound; @@ -1281,12 +1281,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, core_bound * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, - evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { + evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) { char unit = ' '; char unit_buf[10] = "/sec"; - total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); if (total) ratio = convert_unit_double(1000000000.0 * avg / total, &unit); @@ -1294,7 +1294,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu, out, st, &rsd); + print_smi_cost(config, cpu_map_idx, out, st, &rsd); } else { num = 0; } @@ -1307,7 +1307,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, mexp->metric_refs, evsel->name, mexp->metric_name, - mexp->metric_unit, mexp->runtime, cpu, out, st); + mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 09ea334586f2..ee6f03481215 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -152,11 +152,13 @@ static void evsel__free_stat_priv(struct evsel *evsel) zfree(&evsel->stats); } -static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads) +static int evsel__alloc_prev_raw_counts(struct evsel *evsel) { + int cpu_map_nr = evsel__nr_cpus(evsel); + int nthreads = perf_thread_map__nr(evsel->core.threads); struct perf_counts *counts; - counts = perf_counts__new(ncpus, nthreads); + counts = perf_counts__new(cpu_map_nr, nthreads); if (counts) evsel->prev_raw_counts = counts; @@ -177,12 +179,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel) static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) { - int ncpus = evsel__nr_cpus(evsel); - int nthreads = perf_thread_map__nr(evsel->core.threads); - if (evsel__alloc_stat_priv(evsel) < 0 || - evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || - (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + evsel__alloc_counts(evsel) < 0 || + (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0)) return -ENOMEM; return 0; @@ -293,11 +292,12 @@ static bool pkg_id_equal(const void *__key1, const void *__key2, return *key1 == *key2; } -static int check_per_pkg(struct evsel *counter, - struct perf_counts_values *vals, int cpu, bool *skip) +static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, + int cpu_map_idx, bool *skip) { struct hashmap *mask = counter->per_pkg_mask; struct perf_cpu_map *cpus = evsel__cpus(counter); + struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx); int s, d, ret = 0; uint64_t *key; @@ -328,7 +328,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu, NULL).socket; + s = cpu__get_socket_id(cpu); if (s < 0) return -1; @@ -336,7 +336,7 @@ static int check_per_pkg(struct evsel *counter, * On multi-die system, die_id > 0. On no-die system, die_id = 0. * We use hashmap(socket, die) to check the used socket+die pair. */ - d = cpu_map__get_die(cpus, cpu, NULL).die; + d = cpu__get_die_id(cpu); if (d < 0) return -1; @@ -345,9 +345,10 @@ static int check_per_pkg(struct evsel *counter, return -ENOMEM; *key = (uint64_t)d << 32 | s; - if (hashmap__find(mask, (void *)key, NULL)) + if (hashmap__find(mask, (void *)key, NULL)) { *skip = true; - else + free(key); + } else ret = hashmap__add(mask, (void *)key, (void *)1); return ret; @@ -355,14 +356,14 @@ static int check_per_pkg(struct evsel *counter, static int process_counter_values(struct perf_stat_config *config, struct evsel *evsel, - int cpu, int thread, + int cpu_map_idx, int thread, struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; static struct perf_counts_values zero; bool skip = false; - if (check_per_pkg(evsel, count, cpu, &skip)) { + if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) { pr_err("failed to read per-pkg counter\n"); return -1; } @@ -378,11 +379,11 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, case AGGR_NODE: case AGGR_NONE: if (!evsel->snapshot) - evsel__compute_deltas(evsel, cpu, thread, count); + evsel__compute_deltas(evsel, cpu_map_idx, thread, count); perf_counts_values__scale(count, config->scale, NULL); if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { perf_stat__update_shadow_stats(evsel, count->val, - cpu, &rt_stat); + cpu_map_idx, &rt_stat); } if (config->aggr_mode == AGGR_THREAD) { @@ -411,15 +412,15 @@ static int process_counter_maps(struct perf_stat_config *config, { int nthreads = perf_thread_map__nr(counter->core.threads); int ncpus = evsel__nr_cpus(counter); - int cpu, thread; + int idx, thread; if (counter->core.system_wide) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { - if (process_counter_values(config, counter, cpu, thread, - perf_counts(counter->counts, cpu, thread))) + for (idx = 0; idx < ncpus; idx++) { + if (process_counter_values(config, counter, idx, thread, + perf_counts(counter->counts, idx, thread))) return -1; } } @@ -531,7 +532,7 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, - int cpu) + int cpu_map_idx) { struct perf_event_attr *attr = &evsel->core.attr; struct evsel *leader = evsel__leader(evsel); @@ -585,7 +586,7 @@ int create_perf_stat_counter(struct evsel *evsel, } if (target__has_cpu(target) && !target__has_per_thread(target)) - return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); + return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx); return evsel__open_per_thread(evsel, evsel->core.threads); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 32c8527de347..335d19cc3063 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -108,8 +108,7 @@ struct runtime_stat { struct rblist value_list; }; -typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, - struct perf_cpu_map *m, int cpu); +typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu); struct perf_stat_config { enum aggr_mode aggr_mode; @@ -209,7 +208,7 @@ void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu, struct runtime_stat *st); + int cpu_map_idx, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; @@ -249,10 +248,10 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, - int cpu); + int cpu_map_idx); void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv); struct metric_expr; -double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st); +double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st); #endif diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 96f941e01681..1e0c731fc539 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -728,20 +728,20 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) int i; int ret = 0; struct perf_cpu_map *m; - int c; + struct perf_cpu c; m = perf_cpu_map__new(s); if (!m) return -1; - for (i = 0; i < m->nr; i++) { - c = m->map[i]; - if (c >= nr_cpus) { + for (i = 0; i < perf_cpu_map__nr(m); i++) { + c = perf_cpu_map__cpu(m, i); + if (c.cpu >= nr_cpus) { ret = -1; break; } - set_bit(c, cpumask_bits(b)); + set_bit(c.cpu, cpumask_bits(b)); } perf_cpu_map__put(m); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b2ed3140a1fa..dfde9eada224 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -231,7 +231,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols) prev = curr; curr = rb_entry(nd, struct symbol, rb_node); - if (prev->end == prev->start && prev->end != curr->start) + if (prev->end == prev->start || prev->end != curr->start) arch__symbols__fixup_end(prev, curr); } diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 198982109f0f..b654de0841f8 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1186,12 +1186,12 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool, static void synthesize_cpus(struct cpu_map_entries *cpus, struct perf_cpu_map *map) { - int i; + int i, map_nr = perf_cpu_map__nr(map); - cpus->nr = map->nr; + cpus->nr = map_nr; - for (i = 0; i < map->nr; i++) - cpus->cpu[i] = map->map[i]; + for (i = 0; i < map_nr; i++) + cpus->cpu[i] = perf_cpu_map__cpu(map, i).cpu; } static void synthesize_mask(struct perf_record_record_cpu_map *mask, @@ -1202,13 +1202,13 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask, mask->nr = BITS_TO_LONGS(max); mask->long_size = sizeof(long); - for (i = 0; i < map->nr; i++) - set_bit(map->map[i], mask->mask); + for (i = 0; i < perf_cpu_map__nr(map); i++) + set_bit(perf_cpu_map__cpu(map, i).cpu, mask->mask); } static size_t cpus_size(struct perf_cpu_map *map) { - return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16); + return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16); } static size_t mask_size(struct perf_cpu_map *map, int *max) @@ -1217,9 +1217,9 @@ static size_t mask_size(struct perf_cpu_map *map, int *max) *max = 0; - for (i = 0; i < map->nr; i++) { + for (i = 0; i < perf_cpu_map__nr(map); i++) { /* bit position of the cpu is + 1 */ - int bit = map->map[i] + 1; + int bit = perf_cpu_map__cpu(map, i).cpu + 1; if (bit > *max) *max = bit; @@ -1354,7 +1354,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool, } int perf_event__synthesize_stat(struct perf_tool *tool, - u32 cpu, u32 thread, u64 id, + struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine) @@ -1366,7 +1366,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool, event.header.misc = 0; event.id = id; - event.cpu = cpu; + event.cpu = cpu.cpu; event.thread = thread; event.val = count->val; event.ena = count->ena; @@ -1763,7 +1763,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ } e->idx = sid->idx; - e->cpu = sid->cpu; + e->cpu = sid->cpu.cpu; e->tid = sid->tid; } } @@ -1784,6 +1784,25 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too perf_event__handler_t process, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize) { + /* + * When perf runs in non-root PID namespace, and the namespace's proc FS + * is not mounted, nsinfo__is_in_root_namespace() returns false. + * In this case, the proc FS is coming for the parent namespace, thus + * perf tool will wrongly gather process info from its parent PID + * namespace. + * + * To avoid the confusion that the perf tool runs in a child PID + * namespace but it synthesizes thread info from its parent PID + * namespace, returns failure with warning. + */ + if (!nsinfo__is_in_root_namespace()) { + pr_err("Perf runs in non-root PID namespace but it tries to "); + pr_err("gather process info from its parent PID namespace.\n"); + pr_err("Please mount the proc file system properly, e.g. "); + pr_err("add the option '--mount-proc' for unshare command.\n"); + return -EPERM; + } + if (target__has_task(target)) return perf_event__synthesize_thread_map(tool, threads, process, machine, needs_mmap, data_mmap); diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index c931433bacbf..78a0450db164 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -6,6 +6,7 @@ #include <sys/types.h> // pid_t #include <linux/compiler.h> #include <linux/types.h> +#include <perf/cpumap.h> struct auxtrace_record; struct dso; @@ -63,7 +64,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 27945eeb0cb5..c1ebfc5d2e0c 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -95,15 +95,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) if (target->cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", - top->evlist->core.cpus->nr > 1 ? "s" : "", + perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : "", target->cpu_list); else { if (target->tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", - top->evlist->core.cpus->nr, - top->evlist->core.cpus->nr > 1 ? "s" : ""); + perf_cpu_map__nr(top->evlist->core.cpus), + perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : ""); } perf_top__reset_sample_counters(top); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index df3c4671be72..fb4f6616b5fa 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -416,3 +416,18 @@ char *perf_exe(char *buf, int len) } return strcpy(buf, "perf"); } + +void perf_debuginfod_setup(struct perf_debuginfod *di) +{ + /* + * By default '!di->set' we clear DEBUGINFOD_URLS, so debuginfod + * processing is not triggered, otherwise we set it to 'di->urls' + * value. If 'di->urls' is "system" we keep DEBUGINFOD_URLS value. + */ + if (!di->set) + setenv("DEBUGINFOD_URLS", "", 1); + else if (di->urls && strcmp(di->urls, "system")) + setenv("DEBUGINFOD_URLS", di->urls, 1); + + pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS")); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9f0d36ba77f2..7b625cbd2dd8 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -11,6 +11,9 @@ #include <stddef.h> #include <linux/compiler.h> #include <sys/types.h> +#ifndef __cplusplus +#include <internal/cpumap.h> +#endif /* General helper functions */ void usage(const char *err) __noreturn; @@ -66,6 +69,12 @@ extern bool test_attr__enabled; void test_attr__ready(void); void test_attr__init(void); struct perf_event_attr; -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags); + +struct perf_debuginfod { + const char *urls; + bool set; +}; +void perf_debuginfod_setup(struct perf_debuginfod *di); #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore index 0b319fc8bb17..eada0297ef88 100644 --- a/tools/power/acpi/.gitignore +++ b/tools/power/acpi/.gitignore @@ -2,4 +2,5 @@ /acpidbg /acpidump /ec +/pfrut /include/ diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile index a249c50ebf55..5ff1d9c864d0 100644 --- a/tools/power/acpi/Makefile +++ b/tools/power/acpi/Makefile @@ -9,18 +9,18 @@ include ../../scripts/Makefile.include .NOTPARALLEL: -all: acpidbg acpidump ec -clean: acpidbg_clean acpidump_clean ec_clean -install: acpidbg_install acpidump_install ec_install -uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall +all: acpidbg acpidump ec pfrut +clean: acpidbg_clean acpidump_clean ec_clean pfrut_clean +install: acpidbg_install acpidump_install ec_install pfrut_install +uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall -acpidbg acpidump ec: FORCE +acpidbg acpidump ec pfrut: FORCE $(call descend,tools/$@,all) -acpidbg_clean acpidump_clean ec_clean: +acpidbg_clean acpidump_clean ec_clean pfrut_clean: $(call descend,tools/$(@:_clean=),clean) -acpidbg_install acpidump_install ec_install: +acpidbg_install acpidump_install ec_install pfrut_install: $(call descend,tools/$(@:_install=),install) -acpidbg_uninstall acpidump_uninstall ec_uninstall: +acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall: $(call descend,tools/$(@:_uninstall=),uninstall) .PHONY: FORCE diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index 1d7616f5d0ae..b71aada77688 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -9,7 +9,7 @@ objdir := $(OUTPUT)tools/$(TOOL)/ toolobjs := $(addprefix $(objdir),$(TOOL_OBJS)) $(OUTPUT)$(TOOL): $(toolobjs) FORCE $(ECHO) " LD " $(subst $(OUTPUT),,$@) - $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@ + $(QUIET) $(LD) $(CFLAGS) $(toolobjs) $(LDFLAGS) -L$(OUTPUT) -o $@ $(ECHO) " STRIP " $(subst $(OUTPUT),,$@) $(QUIET) $(STRIPCMD) $@ diff --git a/tools/power/acpi/man/pfrut.8 b/tools/power/acpi/man/pfrut.8 new file mode 100644 index 000000000000..3db574770e8d --- /dev/null +++ b/tools/power/acpi/man/pfrut.8 @@ -0,0 +1,137 @@ +.TH "PFRUT" "8" "October 2021" "pfrut 1.0" "" +.hy +.SH Name +.PP +pfrut \- Platform Firmware Runtime Update and Telemetry tool +.SH SYNOPSIS +.PP +\f[B]pfrut\f[R] [\f[I]Options\f[R]] +.SH DESCRIPTION +.PP +The PFRUT(Platform Firmware Runtime Update and Telemetry) kernel interface is designed +to +.PD 0 +.P +.PD +interact with the platform firmware interface defined in the +.PD 0 +.P +.PD +Management Mode Firmware Runtime +Update (https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf) +.PD 0 +.P +.PD +\f[B]pfrut\f[R] is the tool to interact with the kernel interface. +.PD 0 +.P +.PD +.SH OPTIONS +.TP +.B \f[B]\-h\f[R], \f[B]\-\-help\f[R] +Display helper information. +.TP +.B \f[B]\-l\f[R], \f[B]\-\-load\f[R] +Load the capsule file into the system. +To be more specific, the capsule file will be copied to the +communication buffer. +.TP +.B \f[B]\-s\f[R], \f[B]\-\-stage\f[R] +Stage the capsule image from communication buffer into Management Mode +and perform authentication. +.TP +.B \f[B]\-a\f[R], \f[B]\-\-activate\f[R] +Activate a previous staged capsule image. +.TP +.B \f[B]\-u\f[R], \f[B]\-\-update\f[R] +Perform both stage and activation actions. +.TP +.B \f[B]\-q\f[R], \f[B]\-\-query\f[R] +Query the update capability. +.TP +.B \f[B]\-d\f[R], \f[B]\-\-setrev\f[R] +Set the revision ID of code injection/driver update. +.TP +.B \f[B]\-D\f[R], \f[B]\-\-setrevlog\f[R] +Set the revision ID of telemetry. +.TP +.B \f[B]\-G\f[R], \f[B]\-\-getloginfo\f[R] +Get telemetry log information and print it out. +.TP +.B \f[B]\-T\f[R], \f[B]\-\-type\f[R] +Set the telemetry log data type. +.TP +.B \f[B]\-L\f[R], \f[B]\-\-level\f[R] +Set the telemetry log level. +.TP +.B \f[B]\-R\f[R], \f[B]\-\-read\f[R] +Read all the telemetry data and print it out. +.SH EXAMPLES +.PP +\f[B]pfrut \-G\f[R] +.PP +log_level:4 +.PD 0 +.P +.PD +log_type:0 +.PD 0 +.P +.PD +log_revid:2 +.PD 0 +.P +.PD +max_data_size:65536 +.PD 0 +.P +.PD +chunk1_size:0 +.PD 0 +.P +.PD +chunk2_size:1401 +.PD 0 +.P +.PD +rollover_cnt:0 +.PD 0 +.P +.PD +reset_cnt:4 +.PP +\f[B]pfru \-q\f[R] +.PP +code injection image type:794bf8b2\-6e7b\-454e\-885f\-3fb9bb185402 +.PD 0 +.P +.PD +fw_version:0 +.PD 0 +.P +.PD +code_rt_version:1 +.PD 0 +.P +.PD +driver update image type:0e5f0b14\-f849\-7945\-ad81\-bc7b6d2bb245 +.PD 0 +.P +.PD +drv_rt_version:0 +.PD 0 +.P +.PD +drv_svn:0 +.PD 0 +.P +.PD +platform id:39214663\-b1a8\-4eaa\-9024\-f2bb53ea4723 +.PD 0 +.P +.PD +oem id:a36db54f\-ea2a\-e14e\-b7c4\-b5780e51ba3d +.PP +\f[B]pfrut \-l yours.cap \-u \-T 1 \-L 4\f[R] +.SH AUTHORS +Chen Yu. diff --git a/tools/power/acpi/tools/pfrut/Makefile b/tools/power/acpi/tools/pfrut/Makefile new file mode 100644 index 000000000000..61c1a96fd433 --- /dev/null +++ b/tools/power/acpi/tools/pfrut/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0+ + +include ../../Makefile.config + +TOOL = pfrut +EXTRA_INSTALL = install-man +EXTRA_UNINSTALL = uninstall-man + +CFLAGS += -Wall -O2 +CFLAGS += -DPFRUT_HEADER='"../../../../../include/uapi/linux/pfrut.h"' +LDFLAGS += -luuid + +TOOL_OBJS = \ + pfrut.o + +include ../../Makefile.rules + +install-man: $(srctree)/man/pfrut.8 + $(ECHO) " INST " pfrut.8 + $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/pfrut.8 +uninstall-man: + $(ECHO) " UNINST " pfrut.8 + $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/pfrut.8 diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c new file mode 100644 index 000000000000..d79c335594b2 --- /dev/null +++ b/tools/power/acpi/tools/pfrut/pfrut.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Platform Firmware Runtime Update tool to do Management + * Mode code injection/driver update and telemetry retrieval. + * + * This tool uses the interfaces provided by pfr_update and + * pfr_telemetry drivers. These interfaces are exposed via + * /dev/pfr_update and /dev/pfr_telemetry. Write operation + * on the /dev/pfr_update is to load the EFI capsule into + * kernel space. Mmap/read operations on /dev/pfr_telemetry + * could be used to read the telemetry data to user space. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <uuid/uuid.h> +#include PFRUT_HEADER + +char *capsule_name; +int action, query_cap, log_type, log_level, log_read, log_getinfo, + revid, log_revid; +int set_log_level, set_log_type, + set_revid, set_log_revid; + +char *progname; + +#define LOG_ERR 0 +#define LOG_WARN 1 +#define LOG_INFO 2 +#define LOG_VERB 4 +#define LOG_EXEC_IDX 0 +#define LOG_HISTORY_IDX 1 +#define REVID_1 1 +#define REVID_2 2 + +static int valid_log_level(int level) +{ + return level == LOG_ERR || level == LOG_WARN || + level == LOG_INFO || level == LOG_VERB; +} + +static int valid_log_type(int type) +{ + return type == LOG_EXEC_IDX || type == LOG_HISTORY_IDX; +} + +static inline int valid_log_revid(int id) +{ + return id == REVID_1 || id == REVID_2; +} + +static void help(void) +{ + fprintf(stderr, + "usage: %s [OPTIONS]\n" + " code injection:\n" + " -l, --load\n" + " -s, --stage\n" + " -a, --activate\n" + " -u, --update [stage and activate]\n" + " -q, --query\n" + " -d, --revid update\n" + " telemetry:\n" + " -G, --getloginfo\n" + " -T, --type(0:execution, 1:history)\n" + " -L, --level(0, 1, 2, 4)\n" + " -R, --read\n" + " -D, --revid log\n", + progname); +} + +char *option_string = "l:sauqd:GT:L:RD:h"; +static struct option long_options[] = { + {"load", required_argument, 0, 'l'}, + {"stage", no_argument, 0, 's'}, + {"activate", no_argument, 0, 'a'}, + {"update", no_argument, 0, 'u'}, + {"query", no_argument, 0, 'q'}, + {"getloginfo", no_argument, 0, 'G'}, + {"type", required_argument, 0, 'T'}, + {"level", required_argument, 0, 'L'}, + {"read", no_argument, 0, 'R'}, + {"setrev", required_argument, 0, 'd'}, + {"setrevlog", required_argument, 0, 'D'}, + {"help", no_argument, 0, 'h'}, + {} +}; + +static void parse_options(int argc, char **argv) +{ + int option_index = 0; + char *pathname; + int opt; + + pathname = strdup(argv[0]); + progname = basename(pathname); + + while ((opt = getopt_long_only(argc, argv, option_string, + long_options, &option_index)) != -1) { + switch (opt) { + case 'l': + capsule_name = optarg; + break; + case 's': + action = 1; + break; + case 'a': + action = 2; + break; + case 'u': + action = 3; + break; + case 'q': + query_cap = 1; + break; + case 'G': + log_getinfo = 1; + break; + case 'T': + log_type = atoi(optarg); + set_log_type = 1; + break; + case 'L': + log_level = atoi(optarg); + set_log_level = 1; + break; + case 'R': + log_read = 1; + break; + case 'd': + revid = atoi(optarg); + set_revid = 1; + break; + case 'D': + log_revid = atoi(optarg); + set_log_revid = 1; + break; + case 'h': + help(); + exit(0); + default: + break; + } + } +} + +void print_cap(struct pfru_update_cap_info *cap) +{ + char *uuid; + + uuid = malloc(37); + if (!uuid) { + perror("Can not allocate uuid buffer\n"); + exit(1); + } + + uuid_unparse(cap->code_type, uuid); + printf("code injection image type:%s\n", uuid); + printf("fw_version:%d\n", cap->fw_version); + printf("code_rt_version:%d\n", cap->code_rt_version); + + uuid_unparse(cap->drv_type, uuid); + printf("driver update image type:%s\n", uuid); + printf("drv_rt_version:%d\n", cap->drv_rt_version); + printf("drv_svn:%d\n", cap->drv_svn); + + uuid_unparse(cap->platform_id, uuid); + printf("platform id:%s\n", uuid); + uuid_unparse(cap->oem_id, uuid); + printf("oem id:%s\n", uuid); + printf("oem information length:%d\n", cap->oem_info_len); + + free(uuid); +} + +int main(int argc, char *argv[]) +{ + int fd_update, fd_update_log, fd_capsule; + struct pfrt_log_data_info data_info; + struct pfrt_log_info info; + struct pfru_update_cap_info cap; + void *addr_map_capsule; + struct stat st; + char *log_buf; + int ret = 0; + + if (getuid() != 0) { + printf("Please run the tool as root - Exiting.\n"); + return 1; + } + + parse_options(argc, argv); + + fd_update = open("/dev/acpi_pfr_update0", O_RDWR); + if (fd_update < 0) { + printf("PFRU device not supported - Quit...\n"); + return 1; + } + + fd_update_log = open("/dev/acpi_pfr_telemetry0", O_RDWR); + if (fd_update_log < 0) { + printf("PFRT device not supported - Quit...\n"); + return 1; + } + + if (query_cap) { + ret = ioctl(fd_update, PFRU_IOC_QUERY_CAP, &cap); + if (ret) + perror("Query Update Capability info failed."); + else + print_cap(&cap); + + close(fd_update); + close(fd_update_log); + + return ret; + } + + if (log_getinfo) { + ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info); + if (ret) { + perror("Get telemetry data info failed."); + close(fd_update); + close(fd_update_log); + + return 1; + } + + ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_INFO, &info); + if (ret) { + perror("Get telemetry info failed."); + close(fd_update); + close(fd_update_log); + + return 1; + } + + printf("log_level:%d\n", info.log_level); + printf("log_type:%d\n", info.log_type); + printf("log_revid:%d\n", info.log_revid); + printf("max_data_size:%d\n", data_info.max_data_size); + printf("chunk1_size:%d\n", data_info.chunk1_size); + printf("chunk2_size:%d\n", data_info.chunk2_size); + printf("rollover_cnt:%d\n", data_info.rollover_cnt); + printf("reset_cnt:%d\n", data_info.reset_cnt); + + return 0; + } + + info.log_level = -1; + info.log_type = -1; + info.log_revid = -1; + + if (set_log_level) { + if (!valid_log_level(log_level)) { + printf("Invalid log level %d\n", + log_level); + } else { + info.log_level = log_level; + } + } + + if (set_log_type) { + if (!valid_log_type(log_type)) { + printf("Invalid log type %d\n", + log_type); + } else { + info.log_type = log_type; + } + } + + if (set_log_revid) { + if (!valid_log_revid(log_revid)) { + printf("Invalid log revid %d, unchanged.\n", + log_revid); + } else { + info.log_revid = log_revid; + } + } + + ret = ioctl(fd_update_log, PFRT_LOG_IOC_SET_INFO, &info); + if (ret) { + perror("Log information set failed.(log_level, log_type, log_revid)"); + close(fd_update); + close(fd_update_log); + + return 1; + } + + if (set_revid) { + ret = ioctl(fd_update, PFRU_IOC_SET_REV, &revid); + if (ret) { + perror("pfru update revid set failed"); + close(fd_update); + close(fd_update_log); + + return 1; + } + + printf("pfru update revid set to %d\n", revid); + } + + if (capsule_name) { + fd_capsule = open(capsule_name, O_RDONLY); + if (fd_capsule < 0) { + perror("Can not open capsule file..."); + close(fd_update); + close(fd_update_log); + + return 1; + } + + if (fstat(fd_capsule, &st) < 0) { + perror("Can not fstat capsule file..."); + close(fd_capsule); + close(fd_update); + close(fd_update_log); + + return 1; + } + + addr_map_capsule = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, + fd_capsule, 0); + if (addr_map_capsule == MAP_FAILED) { + perror("Failed to mmap capsule file."); + close(fd_capsule); + close(fd_update); + close(fd_update_log); + + return 1; + } + + ret = write(fd_update, (char *)addr_map_capsule, st.st_size); + printf("Load %d bytes of capsule file into the system\n", + ret); + + if (ret == -1) { + perror("Failed to load capsule file"); + close(fd_capsule); + close(fd_update); + close(fd_update_log); + + return 1; + } + + munmap(addr_map_capsule, st.st_size); + close(fd_capsule); + printf("Load done.\n"); + } + + if (action) { + if (action == 1) { + ret = ioctl(fd_update, PFRU_IOC_STAGE, NULL); + } else if (action == 2) { + ret = ioctl(fd_update, PFRU_IOC_ACTIVATE, NULL); + } else if (action == 3) { + ret = ioctl(fd_update, PFRU_IOC_STAGE_ACTIVATE, NULL); + } else { + close(fd_update); + close(fd_update_log); + + return 1; + } + printf("Update finished, return %d\n", ret); + } + + close(fd_update); + + if (log_read) { + void *p_mmap; + int max_data_sz; + + ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info); + if (ret) { + perror("Get telemetry data info failed."); + close(fd_update_log); + + return 1; + } + + max_data_sz = data_info.max_data_size; + if (!max_data_sz) { + printf("No telemetry data available.\n"); + close(fd_update_log); + + return 1; + } + + log_buf = malloc(max_data_sz + 1); + if (!log_buf) { + perror("log_buf allocate failed."); + close(fd_update_log); + + return 1; + } + + p_mmap = mmap(NULL, max_data_sz, PROT_READ, MAP_SHARED, fd_update_log, 0); + if (p_mmap == MAP_FAILED) { + perror("mmap error."); + close(fd_update_log); + + return 1; + } + + memcpy(log_buf, p_mmap, max_data_sz); + log_buf[max_data_sz] = '\0'; + printf("%s\n", log_buf); + free(log_buf); + + munmap(p_mmap, max_data_sz); + } + + close(fd_update_log); + + return 0; +} diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 3b1594447f29..e9b6de314654 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -143,9 +143,9 @@ UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \ utils/helpers/bitmask.h \ utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def -LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h -LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c -LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o +LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h lib/acpi_cppc.h +LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c lib/acpi_cppc.c +LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o lib/acpi_cppc.o LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS)) override CFLAGS += -pipe diff --git a/tools/power/cpupower/ToDo b/tools/power/cpupower/TODO index b196a139a3e4..b196a139a3e4 100644 --- a/tools/power/cpupower/ToDo +++ b/tools/power/cpupower/TODO diff --git a/tools/power/cpupower/lib/acpi_cppc.c b/tools/power/cpupower/lib/acpi_cppc.c new file mode 100644 index 000000000000..c401ac331e9f --- /dev/null +++ b/tools/power/cpupower/lib/acpi_cppc.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "cpupower_intern.h" +#include "acpi_cppc.h" + +/* ACPI CPPC sysfs access ***********************************************/ + +static int acpi_cppc_read_file(unsigned int cpu, const char *fname, + char *buf, size_t buflen) +{ + char path[SYSFS_PATH_MAX]; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/acpi_cppc/%s", + cpu, fname); + return cpupower_read_sysfs(path, buf, buflen); +} + +static const char * const acpi_cppc_value_files[] = { + [HIGHEST_PERF] = "highest_perf", + [LOWEST_PERF] = "lowest_perf", + [NOMINAL_PERF] = "nominal_perf", + [LOWEST_NONLINEAR_PERF] = "lowest_nonlinear_perf", + [LOWEST_FREQ] = "lowest_freq", + [NOMINAL_FREQ] = "nominal_freq", + [REFERENCE_PERF] = "reference_perf", + [WRAPAROUND_TIME] = "wraparound_time" +}; + +unsigned long acpi_cppc_get_data(unsigned int cpu, enum acpi_cppc_value which) +{ + unsigned long long value; + unsigned int len; + char linebuf[MAX_LINE_LEN]; + char *endp; + + if (which >= MAX_CPPC_VALUE_FILES) + return 0; + + len = acpi_cppc_read_file(cpu, acpi_cppc_value_files[which], + linebuf, sizeof(linebuf)); + if (len == 0) + return 0; + + value = strtoull(linebuf, &endp, 0); + + if (endp == linebuf || errno == ERANGE) + return 0; + + return value; +} diff --git a/tools/power/cpupower/lib/acpi_cppc.h b/tools/power/cpupower/lib/acpi_cppc.h new file mode 100644 index 000000000000..85ca83080316 --- /dev/null +++ b/tools/power/cpupower/lib/acpi_cppc.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ACPI_CPPC_H__ +#define __ACPI_CPPC_H__ + +enum acpi_cppc_value { + HIGHEST_PERF, + LOWEST_PERF, + NOMINAL_PERF, + LOWEST_NONLINEAR_PERF, + LOWEST_FREQ, + NOMINAL_FREQ, + REFERENCE_PERF, + WRAPAROUND_TIME, + MAX_CPPC_VALUE_FILES +}; + +unsigned long acpi_cppc_get_data(unsigned int cpu, + enum acpi_cppc_value which); + +#endif /* _ACPI_CPPC_H */ diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index c3b56db8b921..1516d23c17c9 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -83,20 +83,21 @@ static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = { [STATS_NUM_TRANSITIONS] = "stats/total_trans" }; - -static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, - enum cpufreq_value which) +unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, + const char **table, + unsigned int index, + unsigned int size) { unsigned long value; unsigned int len; char linebuf[MAX_LINE_LEN]; char *endp; - if (which >= MAX_CPUFREQ_VALUE_READ_FILES) + if (!table || index >= size || !table[index]) return 0; - len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which], - linebuf, sizeof(linebuf)); + len = sysfs_cpufreq_read_file(cpu, table[index], linebuf, + sizeof(linebuf)); if (len == 0) return 0; @@ -109,6 +110,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, return value; } +static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu, + enum cpufreq_value which) +{ + return cpufreq_get_sysfs_value_from_table(cpu, cpufreq_value_files, + which, + MAX_CPUFREQ_VALUE_READ_FILES); +} + /* read access to files which contain one string */ enum cpufreq_string { @@ -124,7 +133,7 @@ static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = { static char *sysfs_cpufreq_get_one_string(unsigned int cpu, - enum cpufreq_string which) + enum cpufreq_string which) { char linebuf[MAX_LINE_LEN]; char *result; diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h index 95f4fd9e2656..2f3c84035806 100644 --- a/tools/power/cpupower/lib/cpufreq.h +++ b/tools/power/cpupower/lib/cpufreq.h @@ -203,6 +203,18 @@ int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); int cpufreq_set_frequency(unsigned int cpu, unsigned long target_frequency); +/* + * get the sysfs value from specific table + * + * Read the value with the sysfs file name from specific table. Does + * only work if the cpufreq driver has the specific sysfs interfaces. + */ + +unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, + const char **table, + unsigned int index, + unsigned int size); + #ifdef __cplusplus } #endif diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1 index 6aa8d239dff9..dd545b499480 100644 --- a/tools/power/cpupower/man/cpupower-frequency-info.1 +++ b/tools/power/cpupower/man/cpupower-frequency-info.1 @@ -53,6 +53,9 @@ human\-readable output for the \-f, \-w, \-s and \-y parameters. \fB\-n\fR \fB\-\-no-rounding\fR Output frequencies and latencies without rounding off values. .TP +\fB\-c\fR \fB\-\-perf\fR +Get performances and frequencies capabilities of CPPC, by reading it from hardware (only available on the hardware with CPPC). +.TP .SH "REMARKS" .LP By default only values of core zero are displayed. How to display settings of diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1 index 21916cff7516..8cef3c71e19e 100644 --- a/tools/power/cpupower/man/cpupower-idle-set.1 +++ b/tools/power/cpupower/man/cpupower-idle-set.1 @@ -4,7 +4,7 @@ cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options .SH "SYNTAX" .LP -cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP] +cpupower [ \-c cpulist ] idle\-set [\fIoptions\fP] .SH "DESCRIPTION" .LP The cpupower idle\-set subcommand allows to set cpu idle, also called cpu diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index f9895e31ff5a..0646f615fe2d 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -84,43 +84,6 @@ static void proc_cpufreq_output(void) } static int no_rounding; -static void print_speed(unsigned long speed) -{ - unsigned long tmp; - - if (no_rounding) { - if (speed > 1000000) - printf("%u.%06u GHz", ((unsigned int) speed/1000000), - ((unsigned int) speed%1000000)); - else if (speed > 1000) - printf("%u.%03u MHz", ((unsigned int) speed/1000), - (unsigned int) (speed%1000)); - else - printf("%lu kHz", speed); - } else { - if (speed > 1000000) { - tmp = speed%10000; - if (tmp >= 5000) - speed += 10000; - printf("%u.%02u GHz", ((unsigned int) speed/1000000), - ((unsigned int) (speed%1000000)/10000)); - } else if (speed > 100000) { - tmp = speed%1000; - if (tmp >= 500) - speed += 1000; - printf("%u MHz", ((unsigned int) speed/1000)); - } else if (speed > 1000) { - tmp = speed%100; - if (tmp >= 50) - speed += 100; - printf("%u.%01u MHz", ((unsigned int) speed/1000), - ((unsigned int) (speed%1000)/100)); - } - } - - return; -} - static void print_duration(unsigned long duration) { unsigned long tmp; @@ -183,9 +146,12 @@ static int get_boost_mode_x86(unsigned int cpu) printf(_(" Supported: %s\n"), support ? _("yes") : _("no")); printf(_(" Active: %s\n"), active ? _("yes") : _("no")); - if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD && - cpupower_cpu_info.family >= 0x10) || - cpupower_cpu_info.vendor == X86_VENDOR_HYGON) { + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD && + cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { + return 0; + } else if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD && + cpupower_cpu_info.family >= 0x10) || + cpupower_cpu_info.vendor == X86_VENDOR_HYGON) { ret = decode_pstates(cpu, b_states, pstates, &pstate_no); if (ret) return ret; @@ -254,11 +220,11 @@ static int get_boost_mode(unsigned int cpu) if (freqs) { printf(_(" boost frequency steps: ")); while (freqs->next) { - print_speed(freqs->frequency); + print_speed(freqs->frequency, no_rounding); printf(", "); freqs = freqs->next; } - print_speed(freqs->frequency); + print_speed(freqs->frequency, no_rounding); printf("\n"); cpufreq_put_available_frequencies(freqs); } @@ -277,7 +243,7 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human) return -EINVAL; } if (human) { - print_speed(freq); + print_speed(freq, no_rounding); } else printf("%lu", freq); printf(_(" (asserted by call to kernel)\n")); @@ -296,7 +262,7 @@ static int get_freq_hardware(unsigned int cpu, unsigned int human) return -EINVAL; } if (human) { - print_speed(freq); + print_speed(freq, no_rounding); } else printf("%lu", freq); printf(_(" (asserted by call to hardware)\n")); @@ -316,9 +282,9 @@ static int get_hardware_limits(unsigned int cpu, unsigned int human) if (human) { printf(_(" hardware limits: ")); - print_speed(min); + print_speed(min, no_rounding); printf(" - "); - print_speed(max); + print_speed(max, no_rounding); printf("\n"); } else { printf("%lu %lu\n", min, max); @@ -350,9 +316,9 @@ static int get_policy(unsigned int cpu) return -EINVAL; } printf(_(" current policy: frequency should be within ")); - print_speed(policy->min); + print_speed(policy->min, no_rounding); printf(_(" and ")); - print_speed(policy->max); + print_speed(policy->max, no_rounding); printf(".\n "); printf(_("The governor \"%s\" may decide which speed to use\n" @@ -436,7 +402,7 @@ static int get_freq_stats(unsigned int cpu, unsigned int human) struct cpufreq_stats *stats = cpufreq_get_stats(cpu, &total_time); while (stats) { if (human) { - print_speed(stats->frequency); + print_speed(stats->frequency, no_rounding); printf(":%.2f%%", (100.0 * stats->time_in_state) / total_time); } else @@ -472,6 +438,17 @@ static int get_latency(unsigned int cpu, unsigned int human) return 0; } +/* --performance / -c */ + +static int get_perf_cap(unsigned int cpu) +{ + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD && + cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) + amd_pstate_show_perf_and_freq(cpu, no_rounding); + + return 0; +} + static void debug_output_one(unsigned int cpu) { struct cpufreq_available_frequencies *freqs; @@ -486,11 +463,11 @@ static void debug_output_one(unsigned int cpu) if (freqs) { printf(_(" available frequency steps: ")); while (freqs->next) { - print_speed(freqs->frequency); + print_speed(freqs->frequency, no_rounding); printf(", "); freqs = freqs->next; } - print_speed(freqs->frequency); + print_speed(freqs->frequency, no_rounding); printf("\n"); cpufreq_put_available_frequencies(freqs); } @@ -500,6 +477,7 @@ static void debug_output_one(unsigned int cpu) if (get_freq_hardware(cpu, 1) < 0) get_freq_kernel(cpu, 1); get_boost_mode(cpu); + get_perf_cap(cpu); } static struct option info_opts[] = { @@ -518,6 +496,7 @@ static struct option info_opts[] = { {"proc", no_argument, NULL, 'o'}, {"human", no_argument, NULL, 'm'}, {"no-rounding", no_argument, NULL, 'n'}, + {"performance", no_argument, NULL, 'c'}, { }, }; @@ -531,7 +510,7 @@ int cmd_freq_info(int argc, char **argv) int output_param = 0; do { - ret = getopt_long(argc, argv, "oefwldpgrasmybn", info_opts, + ret = getopt_long(argc, argv, "oefwldpgrasmybnc", info_opts, NULL); switch (ret) { case '?': @@ -554,6 +533,7 @@ int cmd_freq_info(int argc, char **argv) case 'e': case 's': case 'y': + case 'c': if (output_param) { output_param = -1; cont = 0; @@ -660,6 +640,9 @@ int cmd_freq_info(int argc, char **argv) case 'y': ret = get_latency(cpu, human); break; + case 'c': + ret = get_perf_cap(cpu); + break; } if (ret) return ret; diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 97f2c857048e..c519cc89c97f 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -8,7 +8,10 @@ #include <pci/pci.h> #include "helpers/helpers.h" +#include "cpufreq.h" +#include "acpi_cppc.h" +/* ACPI P-States Helper Functions for AMD Processors ***************/ #define MSR_AMD_PSTATE_STATUS 0xc0010063 #define MSR_AMD_PSTATE 0xc0010064 #define MSR_AMD_PSTATE_LIMIT 0xc0010061 @@ -146,4 +149,78 @@ int amd_pci_get_num_boost_states(int *active, int *states) pci_cleanup(pci_acc); return 0; } + +/* ACPI P-States Helper Functions for AMD Processors ***************/ + +/* AMD P-State Helper Functions ************************************/ +enum amd_pstate_value { + AMD_PSTATE_HIGHEST_PERF, + AMD_PSTATE_MAX_FREQ, + AMD_PSTATE_LOWEST_NONLINEAR_FREQ, + MAX_AMD_PSTATE_VALUE_READ_FILES, +}; + +static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = { + [AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf", + [AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq", + [AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq", +}; + +static unsigned long amd_pstate_get_data(unsigned int cpu, + enum amd_pstate_value value) +{ + return cpufreq_get_sysfs_value_from_table(cpu, + amd_pstate_value_files, + value, + MAX_AMD_PSTATE_VALUE_READ_FILES); +} + +void amd_pstate_boost_init(unsigned int cpu, int *support, int *active) +{ + unsigned long highest_perf, nominal_perf, cpuinfo_min, + cpuinfo_max, amd_pstate_max; + + highest_perf = amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF); + nominal_perf = acpi_cppc_get_data(cpu, NOMINAL_PERF); + + *support = highest_perf > nominal_perf ? 1 : 0; + if (!(*support)) + return; + + cpufreq_get_hardware_limits(cpu, &cpuinfo_min, &cpuinfo_max); + amd_pstate_max = amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ); + + *active = cpuinfo_max == amd_pstate_max ? 1 : 0; +} + +void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding) +{ + printf(_(" AMD PSTATE Highest Performance: %lu. Maximum Frequency: "), + amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF)); + /* + * If boost isn't active, the cpuinfo_max doesn't indicate real max + * frequency. So we read it back from amd-pstate sysfs entry. + */ + print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding); + printf(".\n"); + + printf(_(" AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "), + acpi_cppc_get_data(cpu, NOMINAL_PERF)); + print_speed(acpi_cppc_get_data(cpu, NOMINAL_FREQ) * 1000, + no_rounding); + printf(".\n"); + + printf(_(" AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "), + acpi_cppc_get_data(cpu, LOWEST_NONLINEAR_PERF)); + print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ), + no_rounding); + printf(".\n"); + + printf(_(" AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "), + acpi_cppc_get_data(cpu, LOWEST_PERF)); + print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding); + printf(".\n"); +} + +/* AMD P-State Helper Functions ************************************/ #endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c index 72eb43593180..eae91f11d187 100644 --- a/tools/power/cpupower/utils/helpers/cpuid.c +++ b/tools/power/cpupower/utils/helpers/cpuid.c @@ -149,6 +149,19 @@ out: if (ext_cpuid_level >= 0x80000008 && cpuid_ebx(0x80000008) & (1 << 4)) cpu_info->caps |= CPUPOWER_CAP_AMD_RDPRU; + + if (cpupower_amd_pstate_enabled()) { + cpu_info->caps |= CPUPOWER_CAP_AMD_PSTATE; + + /* + * If AMD P-State is enabled, the firmware will treat + * AMD P-State function as high priority. + */ + cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB; + cpu_info->caps &= ~CPUPOWER_CAP_AMD_CPB_MSR; + cpu_info->caps &= ~CPUPOWER_CAP_AMD_HW_PSTATE; + cpu_info->caps &= ~CPUPOWER_CAP_AMD_PSTATEDEF; + } } if (cpu_info->vendor == X86_VENDOR_INTEL) { diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 33ffacee7fcb..96e4bede078b 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -11,6 +11,7 @@ #include <libintl.h> #include <locale.h> +#include <stdbool.h> #include "helpers/bitmask.h" #include <cpupower.h> @@ -73,6 +74,7 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, #define CPUPOWER_CAP_AMD_HW_PSTATE 0x00000100 #define CPUPOWER_CAP_AMD_PSTATEDEF 0x00000200 #define CPUPOWER_CAP_AMD_CPB_MSR 0x00000400 +#define CPUPOWER_CAP_AMD_PSTATE 0x00000800 #define CPUPOWER_AMD_CPBDIS 0x02000000 @@ -135,6 +137,16 @@ extern int decode_pstates(unsigned int cpu, int boost_states, extern int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, int * states); + +/* AMD P-State stuff **************************/ +bool cpupower_amd_pstate_enabled(void); +void amd_pstate_boost_init(unsigned int cpu, + int *support, int *active); +void amd_pstate_show_perf_and_freq(unsigned int cpu, + int no_rounding); + +/* AMD P-State stuff **************************/ + /* * CPUID functions returning a single datum */ @@ -167,6 +179,15 @@ static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, int * states) { return -1; } +static inline bool cpupower_amd_pstate_enabled(void) +{ return false; } +static inline void amd_pstate_boost_init(unsigned int cpu, int *support, + int *active) +{} +static inline void amd_pstate_show_perf_and_freq(unsigned int cpu, + int no_rounding) +{} + /* cpuid and cpuinfo helpers **************************/ static inline unsigned int cpuid_eax(unsigned int op) { return 0; }; @@ -184,5 +205,6 @@ extern struct bitmask *offline_cpus; void get_cpustate(void); void print_online_cpus(void); void print_offline_cpus(void); +void print_speed(unsigned long speed, int no_rounding); #endif /* __CPUPOWERUTILS_HELPERS__ */ diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index fc6e34511721..9547b29254a7 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -3,9 +3,11 @@ #include <stdio.h> #include <errno.h> #include <stdlib.h> +#include <string.h> #include "helpers/helpers.h" #include "helpers/sysfs.h" +#include "cpufreq.h" #if defined(__i386__) || defined(__x86_64__) @@ -39,6 +41,8 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, if (ret) return ret; } + } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATE) { + amd_pstate_boost_init(cpu, support, active); } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) *support = *active = 1; return 0; @@ -83,6 +87,22 @@ int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val) return 0; } +bool cpupower_amd_pstate_enabled(void) +{ + char *driver = cpufreq_get_driver(0); + bool ret = false; + + if (!driver) + return ret; + + if (!strcmp(driver, "amd-pstate")) + ret = true; + + cpufreq_put_driver(driver); + + return ret; +} + #endif /* #if defined(__i386__) || defined(__x86_64__) */ /* get_cpustate @@ -144,3 +164,43 @@ void print_offline_cpus(void) printf(_("cpupower set operation was not performed on them\n")); } } + +/* + * print_speed + * + * Print the exact CPU frequency with appropriate unit + */ +void print_speed(unsigned long speed, int no_rounding) +{ + unsigned long tmp; + + if (no_rounding) { + if (speed > 1000000) + printf("%u.%06u GHz", ((unsigned int)speed / 1000000), + ((unsigned int)speed % 1000000)); + else if (speed > 1000) + printf("%u.%03u MHz", ((unsigned int)speed / 1000), + (unsigned int)(speed % 1000)); + else + printf("%lu kHz", speed); + } else { + if (speed > 1000000) { + tmp = speed % 10000; + if (tmp >= 5000) + speed += 10000; + printf("%u.%02u GHz", ((unsigned int)speed / 1000000), + ((unsigned int)(speed % 1000000) / 10000)); + } else if (speed > 100000) { + tmp = speed % 1000; + if (tmp >= 500) + speed += 1000; + printf("%u MHz", ((unsigned int)speed / 1000)); + } else if (speed > 1000) { + tmp = speed % 100; + if (tmp >= 50) + speed += 100; + printf("%u.%01u MHz", ((unsigned int)speed / 1000), + ((unsigned int)(speed % 1000) / 100)); + } + } +} diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py new file mode 100755 index 000000000000..2dea4032ac56 --- /dev/null +++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# -*- coding: utf-8 -*- +# +""" This utility can be used to debug and tune the performance of the +AMD P-State driver. It imports intel_pstate_tracer to analyze AMD P-State +trace event. + +Prerequisites: + Python version 2.7.x or higher + gnuplot 5.0 or higher + gnuplot-py 1.8 or higher + (Most of the distributions have these required packages. They may be called + gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... ) + + Kernel config for Linux trace is enabled + + see print_help(): for Usage and Output details + +""" +from __future__ import print_function +from datetime import datetime +import subprocess +import os +import time +import re +import signal +import sys +import getopt +import Gnuplot +from numpy import * +from decimal import * +sys.path.append('../intel_pstate_tracer') +#import intel_pstate_tracer +import intel_pstate_tracer as ipt + +__license__ = "GPL version 2" + +MAX_CPUS = 256 +# Define the csv file columns +C_COMM = 15 +C_ELAPSED = 14 +C_SAMPLE = 13 +C_DURATION = 12 +C_LOAD = 11 +C_TSC = 10 +C_APERF = 9 +C_MPERF = 8 +C_FREQ = 7 +C_MAX_PERF = 6 +C_DES_PERF = 5 +C_MIN_PERF = 4 +C_USEC = 3 +C_SEC = 2 +C_CPU = 1 + +global sample_num, last_sec_cpu, last_usec_cpu, start_time, test_name, trace_file + +getcontext().prec = 11 + +sample_num =0 +last_sec_cpu = [0] * MAX_CPUS +last_usec_cpu = [0] * MAX_CPUS + +def plot_per_cpu_freq(cpu_index): + """ Plot per cpu frequency """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_frequency.png" % cpu_index + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set yrange [0:7]') + g_plot('set ytics 0, 1') + g_plot('set ylabel "CPU Frequency (GHz)"') + g_plot('set title "{} : frequency : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now())) + g_plot('set ylabel "CPU frequency"') + g_plot('set key off') + ipt.set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_FREQ)) + +def plot_per_cpu_des_perf(cpu_index): + """ Plot per cpu desired perf """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_des_perf.png" % cpu_index + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set yrange [0:255]') + g_plot('set ylabel "des perf"') + g_plot('set title "{} : cpu des perf : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now())) + g_plot('set key off') + ipt.set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DES_PERF)) + +def plot_per_cpu_load(cpu_index): + """ Plot per cpu load """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_load.png" % cpu_index + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set yrange [0:100]') + g_plot('set ytics 0, 10') + g_plot('set ylabel "CPU load (percent)"') + g_plot('set title "{} : cpu load : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now())) + g_plot('set key off') + ipt.set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD)) + +def plot_all_cpu_frequency(): + """ Plot all cpu frequencies """ + + output_png = 'all_cpu_frequencies.png' + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set ylabel "CPU Frequency (GHz)"') + g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(test_name, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_all_cpu_des_perf(): + """ Plot all cpu desired perf """ + + output_png = 'all_cpu_des_perf.png' + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set ylabel "des perf"') + g_plot('set title "{} : cpu des perf : {:%F %H:%M}"'.format(test_name, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_DES_PERF) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_all_cpu_load(): + """ Plot all cpu load """ + + output_png = 'all_cpu_load.png' + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set yrange [0:100]') + g_plot('set ylabel "CPU load (percent)"') + g_plot('set title "{} : cpu load : {:%F %H:%M}"'.format(test_name, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_LOAD) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask): + """ Store master csv file information """ + + global graph_data_present + + if cpu_mask[cpu_int] == 0: + return + + try: + f_handle = open('cpu.csv', 'a') + string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %.4f, %u, %u, %u, %.2f, %.3f, %u, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(min_perf), int(des_perf), int(max_perf), freq_ghz, int(mperf), int(aperf), int(tsc), load, duration_ms, sample_num, elapsed_time, common_comm) + f_handle.write(string_buffer) + f_handle.close() + except: + print('IO error cpu.csv') + return + + graph_data_present = True; + + +def cleanup_data_files(): + """ clean up existing data files """ + + if os.path.exists('cpu.csv'): + os.remove('cpu.csv') + f_handle = open('cpu.csv', 'a') + f_handle.write('common_cpu, common_secs, common_usecs, min_perf, des_perf, max_perf, freq, mperf, aperf, tsc, load, duration_ms, sample_num, elapsed_time, common_comm') + f_handle.write('\n') + f_handle.close() + +def read_trace_data(file_name, cpu_mask): + """ Read and parse trace data """ + + global current_max_cpu + global sample_num, last_sec_cpu, last_usec_cpu, start_time + + try: + data = open(file_name, 'r').read() + except: + print('Error opening ', file_name) + sys.exit(2) + + for line in data.splitlines(): + search_obj = \ + re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?amd_min_perf=)(\d+)(.*?amd_des_perf=)(\d+)(.*?amd_max_perf=)(\d+)(.*?freq=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)' + , line) + + if search_obj: + cpu = search_obj.group(3) + cpu_int = int(cpu) + cpu = str(cpu_int) + + time_pre_dec = search_obj.group(6) + time_post_dec = search_obj.group(8) + min_perf = search_obj.group(10) + des_perf = search_obj.group(12) + max_perf = search_obj.group(14) + freq = search_obj.group(16) + mperf = search_obj.group(18) + aperf = search_obj.group(20) + tsc = search_obj.group(22) + + common_comm = search_obj.group(2).replace(' ', '') + + if sample_num == 0 : + start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) + sample_num += 1 + + if last_sec_cpu[cpu_int] == 0 : + last_sec_cpu[cpu_int] = time_pre_dec + last_usec_cpu[cpu_int] = time_post_dec + else : + duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int])) + duration_ms = Decimal(duration_us) / Decimal(1000) + last_sec_cpu[cpu_int] = time_pre_dec + last_usec_cpu[cpu_int] = time_post_dec + elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time + load = Decimal(int(mperf)*100)/ Decimal(tsc) + freq_ghz = Decimal(freq)/Decimal(1000000) + store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask) + + if cpu_int > current_max_cpu: + current_max_cpu = cpu_int +# Now separate the main overall csv file into per CPU csv files. + ipt.split_csv(current_max_cpu, cpu_mask) + + +def signal_handler(signal, frame): + print(' SIGINT: Forcing cleanup before exit.') + if interval: + ipt.disable_trace(trace_file) + ipt.clear_trace_file() + ipt.free_trace_buffer() + sys.exit(0) + +trace_file = "/sys/kernel/debug/tracing/events/amd_cpu/enable" +signal.signal(signal.SIGINT, signal_handler) + +interval = "" +file_name = "" +cpu_list = "" +test_name = "" +memory = "10240" +graph_data_present = False; + +valid1 = False +valid2 = False + +cpu_mask = zeros((MAX_CPUS,), dtype=int) + + +try: + opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="]) +except getopt.GetoptError: + ipt.print_help('amd_pstate') + sys.exit(2) +for opt, arg in opts: + if opt == '-h': + print() + sys.exit() + elif opt in ("-t", "--trace_file"): + valid1 = True + location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) + file_name = os.path.join(location, arg) + elif opt in ("-i", "--interval"): + valid1 = True + interval = arg + elif opt in ("-c", "--cpu"): + cpu_list = arg + elif opt in ("-n", "--name"): + valid2 = True + test_name = arg + elif opt in ("-m", "--memory"): + memory = arg + +if not (valid1 and valid2): + ipt.print_help('amd_pstate') + sys.exit() + +if cpu_list: + for p in re.split("[,]", cpu_list): + if int(p) < MAX_CPUS : + cpu_mask[int(p)] = 1 +else: + for i in range (0, MAX_CPUS): + cpu_mask[i] = 1 + +if not os.path.exists('results'): + os.mkdir('results') + ipt.fix_ownership('results') + +os.chdir('results') +if os.path.exists(test_name): + print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.') + sys.exit() +os.mkdir(test_name) +ipt.fix_ownership(test_name) +os.chdir(test_name) + +cur_version = sys.version_info +print('python version (should be >= 2.7):') +print(cur_version) + +cleanup_data_files() + +if interval: + file_name = "/sys/kernel/debug/tracing/trace" + ipt.clear_trace_file() + ipt.set_trace_buffer_size(memory) + ipt.enable_trace(trace_file) + time.sleep(int(interval)) + ipt.disable_trace(trace_file) + +current_max_cpu = 0 + +read_trace_data(file_name, cpu_mask) + +if interval: + ipt.clear_trace_file() + ipt.free_trace_buffer() + +if graph_data_present == False: + print('No valid data to plot') + sys.exit(2) + +for cpu_no in range(0, current_max_cpu + 1): + plot_per_cpu_freq(cpu_no) + plot_per_cpu_des_perf(cpu_no) + plot_per_cpu_load(cpu_no) + +plot_all_cpu_des_perf() +plot_all_cpu_frequency() +plot_all_cpu_load() + +for root, dirs, files in os.walk('.'): + for f in files: + ipt.fix_ownership(f) + +os.chdir('../../') diff --git a/tools/power/x86/intel-speed-select/Build b/tools/power/x86/intel-speed-select/Build index b61456d75190..81e36bd578b1 100644 --- a/tools/power/x86/intel-speed-select/Build +++ b/tools/power/x86/intel-speed-select/Build @@ -1 +1 @@ -intel-speed-select-y += isst-config.o isst-core.o isst-display.o +intel-speed-select-y += isst-config.o isst-core.o isst-display.o isst-daemon.o hfi-events.o diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile index 12c6939dca2a..d2fba1297d96 100644 --- a/tools/power/x86/intel-speed-select/Makefile +++ b/tools/power/x86/intel-speed-select/Makefile @@ -13,8 +13,8 @@ endif # Do not use make's built-in rules # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r - -override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include +override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3 +override LDFLAGS += -lnl-genl-3 -lnl-3 ALL_TARGETS := intel-speed-select ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) @@ -31,7 +31,11 @@ $(OUTPUT)include/linux/isst_if.h: ../../../../include/uapi/linux/isst_if.h mkdir -p $(OUTPUT)include/linux 2>&1 || true ln -sf $(CURDIR)/../../../../include/uapi/linux/isst_if.h $@ -prepare: $(OUTPUT)include/linux/isst_if.h +$(OUTPUT)include/linux/thermal.h: ../../../../include/uapi/linux/thermal.h + mkdir -p $(OUTPUT)include/linux 2>&1 || true + ln -sf $(CURDIR)/../../../../include/uapi/linux/thermal.h $@ + +prepare: $(OUTPUT)include/linux/isst_if.h $(OUTPUT)include/linux/thermal.h ISST_IN := $(OUTPUT)intel-speed-select-in.o diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c new file mode 100644 index 000000000000..e85676711372 --- /dev/null +++ b/tools/power/x86/intel-speed-select/hfi-events.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Speed Select -- Read HFI events for OOB + * Copyright (c) 2022 Intel Corporation. + */ + +/* + * This file incorporates work covered by the following copyright and + * permission notice: + + * WPA Supplicant - driver interaction with Linux nl80211/cfg80211 + * Copyright (c) 2003-2008, Jouni Malinen <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Alternatively, this software may be distributed under the terms of + * BSD license. + * + * Requires + * libnl-genl-3-dev + * + * For Fedora/CenOS + * dnf install libnl3-devel + * For Ubuntu + * apt install libnl-3-dev libnl-genl-3-dev + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/file.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <getopt.h> +#include <signal.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/family.h> +#include <netlink/genl/ctrl.h> + +#include <linux/thermal.h> +#include "isst.h" + +struct hfi_event_data { + struct nl_sock *nl_handle; + struct nl_cb *nl_cb; +}; + +struct hfi_event_data drv; + +static int ack_handler(struct nl_msg *msg, void *arg) +{ + int *err = arg; + *err = 0; + return NL_STOP; +} + +static int finish_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + *ret = 0; + return NL_SKIP; +} + +static int error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err, + void *arg) +{ + int *ret = arg; + *ret = err->error; + return NL_SKIP; +} + +static int seq_check_handler(struct nl_msg *msg, void *arg) +{ + return NL_OK; +} + +static int send_and_recv_msgs(struct hfi_event_data *drv, + struct nl_msg *msg, + int (*valid_handler)(struct nl_msg *, void *), + void *valid_data) +{ + struct nl_cb *cb; + int err = -ENOMEM; + + cb = nl_cb_clone(drv->nl_cb); + if (!cb) + goto out; + + err = nl_send_auto_complete(drv->nl_handle, msg); + if (err < 0) + goto out; + + err = 1; + + nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &err); + nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, finish_handler, &err); + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err); + + if (valid_handler) + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, + valid_handler, valid_data); + + while (err > 0) + nl_recvmsgs(drv->nl_handle, cb); + out: + nl_cb_put(cb); + nlmsg_free(msg); + return err; +} + +struct family_data { + const char *group; + int id; +}; + +static int family_handler(struct nl_msg *msg, void *arg) +{ + struct family_data *res = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int i; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return NL_SKIP; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], i) { + struct nlattr *tb2[CTRL_ATTR_MCAST_GRP_MAX + 1]; + nla_parse(tb2, CTRL_ATTR_MCAST_GRP_MAX, nla_data(mcgrp), + nla_len(mcgrp), NULL); + if (!tb2[CTRL_ATTR_MCAST_GRP_NAME] || + !tb2[CTRL_ATTR_MCAST_GRP_ID] || + strncmp(nla_data(tb2[CTRL_ATTR_MCAST_GRP_NAME]), + res->group, + nla_len(tb2[CTRL_ATTR_MCAST_GRP_NAME])) != 0) + continue; + res->id = nla_get_u32(tb2[CTRL_ATTR_MCAST_GRP_ID]); + break; + }; + + return 0; +} + +static int nl_get_multicast_id(struct hfi_event_data *drv, + const char *family, const char *group) +{ + struct nl_msg *msg; + int ret = -1; + struct family_data res = { group, -ENOENT }; + + msg = nlmsg_alloc(); + if (!msg) + return -ENOMEM; + genlmsg_put(msg, 0, 0, genl_ctrl_resolve(drv->nl_handle, "nlctrl"), + 0, 0, CTRL_CMD_GETFAMILY, 0); + NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = send_and_recv_msgs(drv, msg, family_handler, &res); + msg = NULL; + if (ret == 0) + ret = res.id; + +nla_put_failure: + nlmsg_free(msg); + return ret; +} + +struct perf_cap { + int cpu; + int perf; + int eff; +}; + +static void process_hfi_event(struct perf_cap *perf_cap) +{ + process_level_change(perf_cap->cpu); +} + +static int handle_event(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + int ret; + struct perf_cap perf_cap; + + ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + debug_printf("Received event %d parse_rer:%d\n", genlhdr->cmd, ret); + if (genlhdr->cmd == THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE) { + struct nlattr *cap; + int j, index = 0; + + debug_printf("THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE\n"); + nla_for_each_nested(cap, attrs[THERMAL_GENL_ATTR_CPU_CAPABILITY], j) { + switch (index) { + case 0: + perf_cap.cpu = nla_get_u32(cap); + break; + case 1: + perf_cap.perf = nla_get_u32(cap); + break; + case 2: + perf_cap.eff = nla_get_u32(cap); + break; + default: + break; + } + ++index; + if (index == 3) { + index = 0; + process_hfi_event(&perf_cap); + } + } + } + + return 0; +} + +static int _hfi_exit; + +static int check_hf_suport(void) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + + __cpuid(6, eax, ebx, ecx, edx); + if (eax & BIT(19)) + return 1; + + return 0; +} + +int hfi_main(void) +{ + struct nl_sock *sock; + struct nl_cb *cb; + int err = 0; + int mcast_id; + int no_block = 0; + + if (!check_hf_suport()) { + fprintf(stderr, "CPU Doesn't support HFI\n"); + return -1; + } + + sock = nl_socket_alloc(); + if (!sock) { + fprintf(stderr, "nl_socket_alloc failed\n"); + return -1; + } + + if (genl_connect(sock)) { + fprintf(stderr, "genl_connect(sk_event) failed\n"); + goto free_sock; + } + + drv.nl_handle = sock; + drv.nl_cb = cb = nl_cb_alloc(NL_CB_DEFAULT); + if (drv.nl_cb == NULL) { + printf("Failed to allocate netlink callbacks"); + goto free_sock; + } + + mcast_id = nl_get_multicast_id(&drv, THERMAL_GENL_FAMILY_NAME, + THERMAL_GENL_EVENT_GROUP_NAME); + if (mcast_id < 0) { + fprintf(stderr, "nl_get_multicast_id failed\n"); + goto free_sock; + } + + if (nl_socket_add_membership(sock, mcast_id)) { + fprintf(stderr, "nl_socket_add_membership failed"); + goto free_sock; + } + + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, seq_check_handler, 0); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, handle_event, NULL); + + if (no_block) + nl_socket_set_nonblocking(sock); + + debug_printf("hfi is initialized\n"); + + while (!_hfi_exit && !err) { + err = nl_recvmsgs(sock, cb); + debug_printf("nl_recv_message err:%d\n", err); + } + + return 0; + + /* Netlink library doesn't have calls to dealloc cb or disconnect */ +free_sock: + nl_socket_free(sock); + + return -1; +} + +void hfi_exit(void) +{ + _hfi_exit = 1; +} diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index bf9fd3549a1d..060390e88e37 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,8 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.10"; +static const char *version_str = "v1.12"; + static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; @@ -368,7 +369,7 @@ int get_topo_max_cpus(void) return topo_max_cpus; } -static void set_cpu_online_offline(int cpu, int state) +void set_cpu_online_offline(int cpu, int state) { char buffer[128]; int fd, ret; @@ -409,12 +410,10 @@ static void force_all_cpus_online(void) unlink("/var/run/isst_cpu_topology.dat"); } -#define MAX_PACKAGE_COUNT 8 -#define MAX_DIE_PER_PACKAGE 2 -static void for_each_online_package_in_set(void (*callback)(int, void *, void *, - void *, void *), - void *arg1, void *arg2, void *arg3, - void *arg4) +void for_each_online_package_in_set(void (*callback)(int, void *, void *, + void *, void *), + void *arg1, void *arg2, void *arg3, + void *arg4) { int max_packages[MAX_PACKAGE_COUNT * MAX_PACKAGE_COUNT]; int pkg_index = 0, i; @@ -1599,6 +1598,7 @@ static void set_scaling_min_to_cpuinfo_max(int cpu) die_id != get_physical_die_id(i)) continue; + adjust_scaling_max_from_base_freq(i); set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0); adjust_scaling_min_from_base_freq(i); } @@ -1615,6 +1615,7 @@ static void set_scaling_min_to_cpuinfo_min(int cpu) die_id != get_physical_die_id(i)) continue; + adjust_scaling_max_from_base_freq(i); set_cpufreq_scaling_min_max_from_cpuinfo(i, 0, 0); } } @@ -2801,7 +2802,9 @@ static void usage(void) printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n"); printf("\t[-r|--retry] : Retry count for mail box commands on failure, default 3\n"); printf("\t[-v|--version] : Print version\n"); - + printf("\t[-b|--oob : Start a daemon to process HFI events for perf profile change from Out of Band agent.\n"); + printf("\t[-n|--no-daemon : Don't run as daemon. By default --oob will turn on daemon mode\n"); + printf("\t[-w|--delay : Delay for reading config level state change in OOB poll mode.\n"); printf("\nResult format\n"); printf("\tResult display uses a common format for each command:\n"); printf("\tResults are formatted in text/JSON with\n"); @@ -2835,6 +2838,9 @@ static void cmdline(int argc, char **argv) int opt, force_cpus_online = 0; int option_index = 0; int ret; + int oob_mode = 0; + int poll_interval = -1; + int no_daemon = 0; static struct option long_options[] = { { "all-cpus-online", no_argument, 0, 'a' }, @@ -2847,6 +2853,9 @@ static void cmdline(int argc, char **argv) { "out", required_argument, 0, 'o' }, { "retry", required_argument, 0, 'r' }, { "version", no_argument, 0, 'v' }, + { "oob", no_argument, 0, 'b' }, + { "no-daemon", no_argument, 0, 'n' }, + { "poll-interval", required_argument, 0, 'w' }, { 0, 0, 0, 0 } }; @@ -2873,7 +2882,7 @@ static void cmdline(int argc, char **argv) } progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+c:df:hio:va", long_options, + while ((opt = getopt_long_only(argc, argv, "+c:df:hio:vabw:n", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -2918,12 +2927,26 @@ static void cmdline(int argc, char **argv) case 'v': print_version(); break; + case 'b': + oob_mode = 1; + break; + case 'n': + no_daemon = 1; + break; + case 'w': + ret = strtol(optarg, &ptr, 10); + if (!ret) { + fprintf(stderr, "Invalid poll interval count\n"); + exit(0); + } + poll_interval = ret; + break; default: usage(); } } - if (optind > (argc - 2)) { + if (optind > (argc - 2) && !oob_mode) { usage(); exit(0); } @@ -2934,6 +2957,17 @@ static void cmdline(int argc, char **argv) set_cpu_present_cpu_mask(); set_cpu_target_cpu_mask(); + if (oob_mode) { + create_cpu_map(); + if (debug_flag) + fprintf(stderr, "OOB mode is enabled in debug mode\n"); + + ret = isst_daemon(debug_flag, poll_interval, no_daemon); + if (ret) + fprintf(stderr, "OOB mode enable failed\n"); + goto out; + } + if (!is_clx_n_platform()) { ret = isst_fill_platform_info(); if (ret) diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c new file mode 100644 index 000000000000..dd372924bc82 --- /dev/null +++ b/tools/power/x86/intel-speed-select/isst-daemon.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Speed Select -- Allow speed select to daemonize + * Copyright (c) 2022 Intel Corporation. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/file.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <getopt.h> +#include <signal.h> +#include <time.h> + +#include "isst.h" + +static int per_package_levels_info[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE]; +static time_t per_package_levels_tm[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE]; + +static void init_levels(void) +{ + int i, j; + + for (i = 0; i < MAX_PACKAGE_COUNT; ++i) + for (j = 0; j < MAX_DIE_PER_PACKAGE; ++j) + per_package_levels_info[i][j] = -1; +} + +void process_level_change(int cpu) +{ + struct isst_pkg_ctdp_level_info ctdp_level; + int pkg_id = get_physical_package_id(cpu); + int die_id = get_physical_die_id(cpu); + struct isst_pkg_ctdp pkg_dev; + time_t tm; + int ret; + + if (pkg_id >= MAX_PACKAGE_COUNT || die_id > MAX_DIE_PER_PACKAGE) { + debug_printf("Invalid package/die info for cpu:%d\n", cpu); + return; + } + + tm = time(NULL); + if (tm - per_package_levels_tm[pkg_id][die_id] < 2 ) + return; + + per_package_levels_tm[pkg_id][die_id] = tm; + + ret = isst_get_ctdp_levels(cpu, &pkg_dev); + if (ret) { + debug_printf("Can't get tdp levels for cpu:%d\n", cpu); + return; + } + + debug_printf("Get Config level %d pkg:%d die:%d current_level:%d \n", cpu, + pkg_id, die_id, pkg_dev.current_level); + + if (pkg_dev.locked) { + debug_printf("config TDP s locked \n"); + return; + } + + if (per_package_levels_info[pkg_id][die_id] == pkg_dev.current_level) + return; + + debug_printf("**Config level change for cpu:%d pkg:%d die:%d from %d to %d\n", + cpu, pkg_id, die_id, per_package_levels_info[pkg_id][die_id], + pkg_dev.current_level); + + per_package_levels_info[pkg_id][die_id] = pkg_dev.current_level; + + ctdp_level.core_cpumask_size = + alloc_cpu_set(&ctdp_level.core_cpumask); + ret = isst_get_coremask_info(cpu, pkg_dev.current_level, &ctdp_level); + if (ret) { + free_cpu_set(ctdp_level.core_cpumask); + debug_printf("Can't get core_mask:%d\n", cpu); + return; + } + + if (ctdp_level.cpu_count) { + int i, max_cpus = get_topo_max_cpus(); + for (i = 0; i < max_cpus; ++i) { + if (pkg_id != get_physical_package_id(i) || die_id != get_physical_die_id(i)) + continue; + if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) { + fprintf(stderr, "online cpu %d\n", i); + set_cpu_online_offline(i, 1); + } else { + fprintf(stderr, "offline cpu %d\n", i); + set_cpu_online_offline(i, 0); + } + } + } + + free_cpu_set(ctdp_level.core_cpumask); +} + +static void _poll_for_config_change(int cpu, void *arg1, void *arg2, + void *arg3, void *arg4) +{ + process_level_change(cpu); +} + +static void poll_for_config_change(void) +{ + for_each_online_package_in_set(_poll_for_config_change, NULL, NULL, + NULL, NULL); +} + +static int done = 0; +static int pid_file_handle; + +static void signal_handler(int sig) +{ + switch (sig) { + case SIGINT: + case SIGTERM: + done = 1; + hfi_exit(); + exit(0); + break; + default: + break; + } +} + +static void daemonize(char *rundir, char *pidfile) +{ + int pid, sid, i; + char str[10]; + struct sigaction sig_actions; + sigset_t sig_set; + int ret; + + if (getppid() == 1) + return; + + sigemptyset(&sig_set); + sigaddset(&sig_set, SIGCHLD); + sigaddset(&sig_set, SIGTSTP); + sigaddset(&sig_set, SIGTTOU); + sigaddset(&sig_set, SIGTTIN); + sigprocmask(SIG_BLOCK, &sig_set, NULL); + + sig_actions.sa_handler = signal_handler; + sigemptyset(&sig_actions.sa_mask); + sig_actions.sa_flags = 0; + + sigaction(SIGHUP, &sig_actions, NULL); + sigaction(SIGTERM, &sig_actions, NULL); + sigaction(SIGINT, &sig_actions, NULL); + + pid = fork(); + if (pid < 0) { + /* Could not fork */ + exit(EXIT_FAILURE); + } + if (pid > 0) + exit(EXIT_SUCCESS); + + umask(027); + + sid = setsid(); + if (sid < 0) + exit(EXIT_FAILURE); + + /* close all descriptors */ + for (i = getdtablesize(); i >= 0; --i) + close(i); + + i = open("/dev/null", O_RDWR); + ret = dup(i); + if (ret == -1) + exit(EXIT_FAILURE); + + ret = dup(i); + if (ret == -1) + exit(EXIT_FAILURE); + + ret = chdir(rundir); + if (ret == -1) + exit(EXIT_FAILURE); + + pid_file_handle = open(pidfile, O_RDWR | O_CREAT, 0600); + if (pid_file_handle == -1) { + /* Couldn't open lock file */ + exit(1); + } + /* Try to lock file */ +#ifdef LOCKF_SUPPORT + if (lockf(pid_file_handle, F_TLOCK, 0) == -1) { +#else + if (flock(pid_file_handle, LOCK_EX|LOCK_NB) < 0) { +#endif + /* Couldn't get lock on lock file */ + fprintf(stderr, "Couldn't get lock file %d\n", getpid()); + exit(1); + } + snprintf(str, sizeof(str), "%d\n", getpid()); + ret = write(pid_file_handle, str, strlen(str)); + if (ret == -1) + exit(EXIT_FAILURE); + + close(i); +} + +int isst_daemon(int debug_mode, int poll_interval, int no_daemon) +{ + int ret; + + if (!no_daemon && poll_interval < 0 && !debug_mode) { + fprintf(stderr, "OOB mode is enabled and will run as daemon\n"); + daemonize((char *) "/tmp/", + (char *)"/tmp/hfi-events.pid"); + } else { + signal(SIGINT, signal_handler); + } + + init_levels(); + + if (poll_interval < 0) { + ret = hfi_main(); + if (ret) { + fprintf(stderr, "HFI initialization failed\n"); + } + fprintf(stderr, "Must specify poll-interval\n"); + return ret; + } + + debug_printf("Starting loop\n"); + while (!done) { + sleep(poll_interval); + poll_for_config_change(); + } + + return 0; +} diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 1aa15d5ea57c..0796d8c6a882 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -76,6 +76,9 @@ #define DISP_FREQ_MULTIPLIER 100 +#define MAX_PACKAGE_COUNT 8 +#define MAX_DIE_PER_PACKAGE 2 + struct isst_clos_config { int pkg_id; int die_id; @@ -260,4 +263,14 @@ extern int is_skx_based_platform(void); extern int is_spr_platform(void); extern int is_icx_platform(void); extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl); + +extern void set_cpu_online_offline(int cpu, int state); +extern void for_each_online_package_in_set(void (*callback)(int, void *, void *, + void *, void *), + void *arg1, void *arg2, void *arg3, + void *arg4); +extern int isst_daemon(int debug_mode, int poll_interval, int no_daemon); +extern void process_level_change(int cpu); +extern int hfi_main(void); +extern void hfi_exit(void); #endif diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py index e15e20696d17..b46e9eb8f5aa 100755 --- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py @@ -63,7 +63,7 @@ C_USEC = 3 C_SEC = 2 C_CPU = 1 -global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname +global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname, trace_file # 11 digits covers uptime to 115 days getcontext().prec = 11 @@ -72,17 +72,17 @@ sample_num =0 last_sec_cpu = [0] * MAX_CPUS last_usec_cpu = [0] * MAX_CPUS -def print_help(): - print('intel_pstate_tracer.py:') +def print_help(driver_name): + print('%s_tracer.py:'%driver_name) print(' Usage:') print(' If the trace file is available, then to simply parse and plot, use (sudo not required):') - print(' ./intel_pstate_tracer.py [-c cpus] -t <trace_file> -n <test_name>') + print(' ./%s_tracer.py [-c cpus] -t <trace_file> -n <test_name>'%driver_name) print(' Or') - print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>') + print(' ./%s_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>'%driver_name) print(' To generate trace file, parse and plot, use (sudo required):') - print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>') + print(' sudo ./%s_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>'%driver_name) print(' Or') - print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>') + print(' sudo ./%s_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>'%driver_name) print(' Optional argument:') print(' cpus: comma separated list of CPUs') print(' kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240') @@ -323,7 +323,7 @@ def set_4_plot_linestyles(g_plot): g_plot('set style line 3 linetype 1 linecolor rgb "purple" pointtype -1') g_plot('set style line 4 linetype 1 linecolor rgb "blue" pointtype -1') -def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz): +def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask): """ Store master csv file information """ global graph_data_present @@ -342,11 +342,9 @@ def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _t graph_data_present = True; -def split_csv(): +def split_csv(current_max_cpu, cpu_mask): """ seperate the all csv file into per CPU csv files. """ - global current_max_cpu - if os.path.exists('cpu.csv'): for index in range(0, current_max_cpu + 1): if cpu_mask[int(index)] != 0: @@ -381,27 +379,25 @@ def clear_trace_file(): print('IO error clearing trace file ') sys.exit(2) -def enable_trace(): +def enable_trace(trace_file): """ Enable trace """ try: - open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable' - , 'w').write("1") + open(trace_file,'w').write("1") except: print('IO error enabling trace ') sys.exit(2) -def disable_trace(): +def disable_trace(trace_file): """ Disable trace """ try: - open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable' - , 'w').write("0") + open(trace_file, 'w').write("0") except: print('IO error disabling trace ') sys.exit(2) -def set_trace_buffer_size(): +def set_trace_buffer_size(memory): """ Set trace buffer size """ try: @@ -421,7 +417,7 @@ def free_trace_buffer(): print('IO error freeing trace buffer ') sys.exit(2) -def read_trace_data(filename): +def read_trace_data(filename, cpu_mask): """ Read and parse trace data """ global current_max_cpu @@ -481,135 +477,137 @@ def read_trace_data(filename): tsc_ghz = Decimal(0) if duration_ms != Decimal(0) : tsc_ghz = Decimal(tsc)/duration_ms/Decimal(1000000) - store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz) + store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask) if cpu_int > current_max_cpu: current_max_cpu = cpu_int # End of for each trace line loop # Now seperate the main overall csv file into per CPU csv files. - split_csv() + split_csv(current_max_cpu, cpu_mask) def signal_handler(signal, frame): print(' SIGINT: Forcing cleanup before exit.') if interval: - disable_trace() + disable_trace(trace_file) clear_trace_file() # Free the memory free_trace_buffer() sys.exit(0) -signal.signal(signal.SIGINT, signal_handler) +if __name__ == "__main__": + trace_file = "/sys/kernel/debug/tracing/events/power/pstate_sample/enable" + signal.signal(signal.SIGINT, signal_handler) -interval = "" -filename = "" -cpu_list = "" -testname = "" -memory = "10240" -graph_data_present = False; + interval = "" + filename = "" + cpu_list = "" + testname = "" + memory = "10240" + graph_data_present = False; -valid1 = False -valid2 = False + valid1 = False + valid2 = False -cpu_mask = zeros((MAX_CPUS,), dtype=int) + cpu_mask = zeros((MAX_CPUS,), dtype=int) -try: - opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="]) -except getopt.GetoptError: - print_help() - sys.exit(2) -for opt, arg in opts: - if opt == '-h': - print() + try: + opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="]) + except getopt.GetoptError: + print_help('intel_pstate') + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print_help('intel_pstate') + sys.exit() + elif opt in ("-t", "--trace_file"): + valid1 = True + location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) + filename = os.path.join(location, arg) + elif opt in ("-i", "--interval"): + valid1 = True + interval = arg + elif opt in ("-c", "--cpu"): + cpu_list = arg + elif opt in ("-n", "--name"): + valid2 = True + testname = arg + elif opt in ("-m", "--memory"): + memory = arg + + if not (valid1 and valid2): + print_help('intel_pstate') sys.exit() - elif opt in ("-t", "--trace_file"): - valid1 = True - location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) - filename = os.path.join(location, arg) - elif opt in ("-i", "--interval"): - valid1 = True - interval = arg - elif opt in ("-c", "--cpu"): - cpu_list = arg - elif opt in ("-n", "--name"): - valid2 = True - testname = arg - elif opt in ("-m", "--memory"): - memory = arg - -if not (valid1 and valid2): - print_help() - sys.exit() - -if cpu_list: - for p in re.split("[,]", cpu_list): - if int(p) < MAX_CPUS : - cpu_mask[int(p)] = 1 -else: - for i in range (0, MAX_CPUS): - cpu_mask[i] = 1 - -if not os.path.exists('results'): - os.mkdir('results') + + if cpu_list: + for p in re.split("[,]", cpu_list): + if int(p) < MAX_CPUS : + cpu_mask[int(p)] = 1 + else: + for i in range (0, MAX_CPUS): + cpu_mask[i] = 1 + + if not os.path.exists('results'): + os.mkdir('results') + # The regular user needs to own the directory, not root. + fix_ownership('results') + + os.chdir('results') + if os.path.exists(testname): + print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.') + sys.exit() + os.mkdir(testname) # The regular user needs to own the directory, not root. - fix_ownership('results') - -os.chdir('results') -if os.path.exists(testname): - print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.') - sys.exit() -os.mkdir(testname) -# The regular user needs to own the directory, not root. -fix_ownership(testname) -os.chdir(testname) - -# Temporary (or perhaps not) -cur_version = sys.version_info -print('python version (should be >= 2.7):') -print(cur_version) - -# Left as "cleanup" for potential future re-run ability. -cleanup_data_files() - -if interval: - filename = "/sys/kernel/debug/tracing/trace" - clear_trace_file() - set_trace_buffer_size() - enable_trace() - print('Sleeping for ', interval, 'seconds') - time.sleep(int(interval)) - disable_trace() - -current_max_cpu = 0 - -read_trace_data(filename) - -if interval: - clear_trace_file() - # Free the memory - free_trace_buffer() - -if graph_data_present == False: - print('No valid data to plot') - sys.exit(2) - -for cpu_no in range(0, current_max_cpu + 1): - plot_perf_busy_with_sample(cpu_no) - plot_perf_busy(cpu_no) - plot_durations(cpu_no) - plot_loads(cpu_no) - -plot_pstate_cpu_with_sample() -plot_pstate_cpu() -plot_load_cpu() -plot_frequency_cpu() -plot_duration_cpu() -plot_scaled_cpu() -plot_boost_cpu() -plot_ghz_cpu() - -# It is preferrable, but not necessary, that the regular user owns the files, not root. -for root, dirs, files in os.walk('.'): - for f in files: - fix_ownership(f) - -os.chdir('../../') + fix_ownership(testname) + os.chdir(testname) + + # Temporary (or perhaps not) + cur_version = sys.version_info + print('python version (should be >= 2.7):') + print(cur_version) + + # Left as "cleanup" for potential future re-run ability. + cleanup_data_files() + + if interval: + filename = "/sys/kernel/debug/tracing/trace" + clear_trace_file() + set_trace_buffer_size(memory) + enable_trace(trace_file) + print('Sleeping for ', interval, 'seconds') + time.sleep(int(interval)) + disable_trace(trace_file) + + current_max_cpu = 0 + + read_trace_data(filename, cpu_mask) + + if interval: + clear_trace_file() + # Free the memory + free_trace_buffer() + + if graph_data_present == False: + print('No valid data to plot') + sys.exit(2) + + for cpu_no in range(0, current_max_cpu + 1): + plot_perf_busy_with_sample(cpu_no) + plot_perf_busy(cpu_no) + plot_durations(cpu_no) + plot_loads(cpu_no) + + plot_pstate_cpu_with_sample() + plot_pstate_cpu() + plot_load_cpu() + plot_frequency_cpu() + plot_duration_cpu() + plot_scaled_cpu() + plot_boost_cpu() + plot_ghz_cpu() + + # It is preferrable, but not necessary, that the regular user owns the files, not root. + for root, dirs, files in os.walk('.'): + for f in files: + fix_ownership(f) + + os.chdir('../../') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 47d3ba895d6d..bc5ae0872fed 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2323,7 +2323,7 @@ int skx_pkg_cstate_limits[16] = }; int icx_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index b0be5f40a3f1..79d102304470 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -90,7 +90,7 @@ EXTRA_WARNINGS += -Wstrict-aliasing=3 else ifneq ($(CROSS_COMPILE),) CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc)) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc 2>/dev/null)) ifneq ($(GCC_TOOLCHAIN_DIR),) CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) CLANG_CROSS_FLAGS += --sysroot=$(shell $(CROSS_COMPILE)gcc -print-sysroot) diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 86deba8308a1..1acdf2fc31c5 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +ldflags-y += --wrap=acpi_table_parse_cedt ldflags-y += --wrap=is_acpi_device_node -ldflags-y += --wrap=acpi_get_table -ldflags-y += --wrap=acpi_put_table ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=pci_walk_bus diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index cb32f9e27d5d..736d99006fb7 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -182,6 +182,13 @@ static struct { }, }; +struct acpi_cedt_cfmws *mock_cfmws[4] = { + [0] = &mock_cedt.cfmws0.cfmws, + [1] = &mock_cedt.cfmws1.cfmws, + [2] = &mock_cedt.cfmws2.cfmws, + [3] = &mock_cedt.cfmws3.cfmws, +}; + struct cxl_mock_res { struct list_head list; struct range range; @@ -232,12 +239,6 @@ static struct cxl_mock_res *alloc_mock_res(resource_size_t size) static int populate_cedt(void) { - struct acpi_cedt_cfmws *cfmws[4] = { - [0] = &mock_cedt.cfmws0.cfmws, - [1] = &mock_cedt.cfmws1.cfmws, - [2] = &mock_cedt.cfmws2.cfmws, - [3] = &mock_cedt.cfmws3.cfmws, - }; struct cxl_mock_res *res; int i; @@ -257,8 +258,8 @@ static int populate_cedt(void) chbs->length = size; } - for (i = 0; i < ARRAY_SIZE(cfmws); i++) { - struct acpi_cedt_cfmws *window = cfmws[i]; + for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + struct acpi_cedt_cfmws *window = mock_cfmws[i]; res = alloc_mock_res(window->window_size); if (!res) @@ -269,21 +270,44 @@ static int populate_cedt(void) return 0; } -static acpi_status mock_acpi_get_table(char *signature, u32 instance, - struct acpi_table_header **out_table) +/* + * WARNING, this hack assumes the format of 'struct + * cxl_cfmws_context' and 'struct cxl_chbs_context' share the property that + * the first struct member is the device being probed by the cxl_acpi + * driver. + */ +struct cxl_cedt_context { + struct device *dev; +}; + +static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id, + acpi_tbl_entry_handler_arg handler_arg, + void *arg) { - if (instance < U32_MAX || strcmp(signature, ACPI_SIG_CEDT) != 0) - return acpi_get_table(signature, instance, out_table); + struct cxl_cedt_context *ctx = arg; + struct device *dev = ctx->dev; + union acpi_subtable_headers *h; + unsigned long end; + int i; - *out_table = (struct acpi_table_header *) &mock_cedt; - return AE_OK; -} + if (dev != &cxl_acpi->dev) + return acpi_table_parse_cedt(id, handler_arg, arg); -static void mock_acpi_put_table(struct acpi_table_header *table) -{ - if (table == (struct acpi_table_header *) &mock_cedt) - return; - acpi_put_table(table); + if (id == ACPI_CEDT_TYPE_CHBS) + for (i = 0; i < ARRAY_SIZE(mock_cedt.chbs); i++) { + h = (union acpi_subtable_headers *)&mock_cedt.chbs[i]; + end = (unsigned long)&mock_cedt.chbs[i + 1]; + handler_arg(h, arg, end); + } + + if (id == ACPI_CEDT_TYPE_CFMWS) + for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + h = (union acpi_subtable_headers *) mock_cfmws[i]; + end = (unsigned long) h + mock_cfmws[i]->header.length; + handler_arg(h, arg, end); + } + + return 0; } static bool is_mock_bridge(struct device *dev) @@ -388,8 +412,7 @@ static struct cxl_mock_ops cxl_mock_ops = { .is_mock_port = is_mock_port, .is_mock_dev = is_mock_dev, .mock_port = mock_cxl_root_port, - .acpi_get_table = mock_acpi_get_table, - .acpi_put_table = mock_acpi_put_table, + .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, .list = LIST_HEAD_INIT(cxl_mock_ops.list), @@ -574,3 +597,4 @@ static __exit void cxl_test_exit(void) module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(ACPI); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 12a8437a9ca0..8c2086c4caef 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -28,8 +28,24 @@ static struct cxl_cel_entry mock_cel[] = { .opcode = cpu_to_le16(CXL_MBOX_OP_SET_LSA), .effect = cpu_to_le16(EFFECT(1) | EFFECT(2)), }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO), + .effect = cpu_to_le16(0), + }, }; +/* See CXL 2.0 Table 181 Get Health Info Output Payload */ +struct cxl_mbox_health_info { + u8 health_status; + u8 media_status; + u8 ext_status; + u8 life_used; + __le16 temperature; + __le32 dirty_shutdowns; + __le32 volatile_errors; + __le32 pmem_errors; +} __packed; + static struct { struct cxl_mbox_get_supported_logs gsl; struct cxl_gsl_entry entry; @@ -54,7 +70,7 @@ static int mock_gsl(struct cxl_mbox_cmd *cmd) return 0; } -static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_log *gl = cmd->payload_in; u32 offset = le32_to_cpu(gl->offset); @@ -64,7 +80,7 @@ static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) if (cmd->size_in < sizeof(*gl)) return -EINVAL; - if (length > cxlm->payload_size) + if (length > cxlds->payload_size) return -EINVAL; if (offset + length > sizeof(mock_cel)) return -EINVAL; @@ -78,9 +94,9 @@ static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_id(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { - struct platform_device *pdev = to_platform_device(cxlm->dev); + struct platform_device *pdev = to_platform_device(cxlds->dev); struct cxl_mbox_identify id = { .fw_revision = { "mock fw v1 " }, .lsa_size = cpu_to_le32(LSA_SIZE), @@ -120,10 +136,10 @@ static int mock_id(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_get_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlm->dev); + void *lsa = dev_get_drvdata(cxlds->dev); u32 offset, length; if (sizeof(*get_lsa) > cmd->size_in) @@ -139,10 +155,10 @@ static int mock_get_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_set_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlm->dev); + void *lsa = dev_get_drvdata(cxlds->dev); u32 offset, length; if (sizeof(*set_lsa) > cmd->size_in) @@ -156,9 +172,39 @@ static int mock_set_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) return 0; } -static int cxl_mock_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_health_info(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) { - struct device *dev = cxlm->dev; + struct cxl_mbox_health_info health_info = { + /* set flags for maint needed, perf degraded, hw replacement */ + .health_status = 0x7, + /* set media status to "All Data Lost" */ + .media_status = 0x3, + /* + * set ext_status flags for: + * ext_life_used: normal, + * ext_temperature: critical, + * ext_corrected_volatile: warning, + * ext_corrected_persistent: normal, + */ + .ext_status = 0x18, + .life_used = 15, + .temperature = cpu_to_le16(25), + .dirty_shutdowns = cpu_to_le32(10), + .volatile_errors = cpu_to_le32(20), + .pmem_errors = cpu_to_le32(30), + }; + + if (cmd->size_out < sizeof(health_info)) + return -EINVAL; + + memcpy(cmd->payload_out, &health_info, sizeof(health_info)); + return 0; +} + +static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct device *dev = cxlds->dev; int rc = -EIO; switch (cmd->opcode) { @@ -166,16 +212,19 @@ static int cxl_mock_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) rc = mock_gsl(cmd); break; case CXL_MBOX_OP_GET_LOG: - rc = mock_get_log(cxlm, cmd); + rc = mock_get_log(cxlds, cmd); break; case CXL_MBOX_OP_IDENTIFY: - rc = mock_id(cxlm, cmd); + rc = mock_id(cxlds, cmd); break; case CXL_MBOX_OP_GET_LSA: - rc = mock_get_lsa(cxlm, cmd); + rc = mock_get_lsa(cxlds, cmd); break; case CXL_MBOX_OP_SET_LSA: - rc = mock_set_lsa(cxlm, cmd); + rc = mock_set_lsa(cxlds, cmd); + break; + case CXL_MBOX_OP_GET_HEALTH_INFO: + rc = mock_health_info(cxlds, cmd); break; default: break; @@ -196,7 +245,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct cxl_memdev *cxlmd; - struct cxl_mem *cxlm; + struct cxl_dev_state *cxlds; void *lsa; int rc; @@ -208,30 +257,30 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) return rc; dev_set_drvdata(dev, lsa); - cxlm = cxl_mem_create(dev); - if (IS_ERR(cxlm)) - return PTR_ERR(cxlm); + cxlds = cxl_dev_state_create(dev); + if (IS_ERR(cxlds)) + return PTR_ERR(cxlds); - cxlm->mbox_send = cxl_mock_mbox_send; - cxlm->payload_size = SZ_4K; + cxlds->mbox_send = cxl_mock_mbox_send; + cxlds->payload_size = SZ_4K; - rc = cxl_mem_enumerate_cmds(cxlm); + rc = cxl_enumerate_cmds(cxlds); if (rc) return rc; - rc = cxl_mem_identify(cxlm); + rc = cxl_dev_state_identify(cxlds); if (rc) return rc; - rc = cxl_mem_create_range_info(cxlm); + rc = cxl_mem_create_range_info(cxlds); if (rc) return rc; - cxlmd = devm_cxl_add_memdev(cxlm); + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) + if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) rc = devm_cxl_add_nvdimm(dev, cxlmd); return 0; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index b8c108abcf07..17408f892df4 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -58,36 +58,23 @@ bool __wrap_is_acpi_device_node(const struct fwnode_handle *fwnode) } EXPORT_SYMBOL(__wrap_is_acpi_device_node); -acpi_status __wrap_acpi_get_table(char *signature, u32 instance, - struct acpi_table_header **out_table) +int __wrap_acpi_table_parse_cedt(enum acpi_cedt_type id, + acpi_tbl_entry_handler_arg handler_arg, + void *arg) { - int index; + int index, rc; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - acpi_status status; if (ops) - status = ops->acpi_get_table(signature, instance, out_table); + rc = ops->acpi_table_parse_cedt(id, handler_arg, arg); else - status = acpi_get_table(signature, instance, out_table); + rc = acpi_table_parse_cedt(id, handler_arg, arg); put_cxl_mock_ops(index); - return status; -} -EXPORT_SYMBOL(__wrap_acpi_get_table); - -void __wrap_acpi_put_table(struct acpi_table_header *table) -{ - int index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops) - ops->acpi_put_table(table); - else - acpi_put_table(table); - put_cxl_mock_ops(index); + return rc; } -EXPORT_SYMBOL(__wrap_acpi_put_table); +EXPORT_SYMBOL_NS_GPL(__wrap_acpi_table_parse_cedt, ACPI); acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, @@ -169,3 +156,4 @@ __wrap_nvdimm_bus_register(struct device *dev, EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(ACPI); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 805a94cb3fbe..15ed0fd877e4 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -6,9 +6,9 @@ struct cxl_mock_ops { struct list_head list; bool (*is_mock_adev)(struct acpi_device *dev); - acpi_status (*acpi_get_table)(char *signature, u32 instance, - struct acpi_table_header **out_table); - void (*acpi_put_table)(struct acpi_table_header *table); + int (*acpi_table_parse_cedt)(enum acpi_cedt_type id, + acpi_tbl_entry_handler_arg handler_arg, + void *arg); bool (*is_mock_bridge)(struct device *dev); acpi_status (*acpi_evaluate_integer)(acpi_handle handle, acpi_string pathname, diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 44bbe54f25f1..3c4196cef3ed 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -6,6 +6,7 @@ # Author: Felix Guo <[email protected]> # Author: Brendan Higgins <[email protected]> +import importlib.abc import importlib.util import logging import subprocess diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py index 4f32133ed77c..13d854afca9d 100755 --- a/tools/testing/kunit/run_checks.py +++ b/tools/testing/kunit/run_checks.py @@ -61,7 +61,7 @@ def main(argv: Sequence[str]) -> None: elif isinstance(ex, subprocess.CalledProcessError): print(f'{name}: FAILED') else: - print('{name}: unexpected exception: {ex}') + print(f'{name}: unexpected exception: {ex}') continue output = ex.output diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 47f9cc9dcd94..c57d9e9d4480 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -35,8 +35,6 @@ obj-$(CONFIG_DAX) += dax.o endif obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o -obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o -obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o nfit-y := $(ACPI_SRC)/core.o nfit-y += $(ACPI_SRC)/intel.o @@ -67,12 +65,8 @@ device_dax-y += dax-dev.o device_dax-y += device_dax_test.o device_dax-y += config_check.o -dax_pmem-y := $(DAX_SRC)/pmem/pmem.o +dax_pmem-y := $(DAX_SRC)/pmem.o dax_pmem-y += dax_pmem_test.o -dax_pmem_core-y := $(DAX_SRC)/pmem/core.o -dax_pmem_core-y += dax_pmem_core_test.o -dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o -dax_pmem_compat-y += dax_pmem_compat_test.o dax_pmem-y += config_check.o libnvdimm-y := $(NVDIMM_SRC)/core.o diff --git a/tools/testing/nvdimm/dax_pmem_compat_test.c b/tools/testing/nvdimm/dax_pmem_compat_test.c deleted file mode 100644 index 7cd1877f3765..000000000000 --- a/tools/testing/nvdimm/dax_pmem_compat_test.c +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright(c) 2019 Intel Corporation. All rights reserved. - -#include <linux/module.h> -#include <linux/printk.h> -#include "watermark.h" - -nfit_test_watermark(dax_pmem_compat); diff --git a/tools/testing/nvdimm/dax_pmem_core_test.c b/tools/testing/nvdimm/dax_pmem_core_test.c deleted file mode 100644 index a4249cdbeec1..000000000000 --- a/tools/testing/nvdimm/dax_pmem_core_test.c +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright(c) 2019 Intel Corporation. All rights reserved. - -#include <linux/module.h> -#include <linux/printk.h> -#include "watermark.h" - -nfit_test_watermark(dax_pmem_core); diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index ed563bdd88f3..b752ce47ead3 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -100,25 +100,17 @@ static void nfit_test_kill(void *_pgmap) { struct dev_pagemap *pgmap = _pgmap; - WARN_ON(!pgmap || !pgmap->ref); - - if (pgmap->ops && pgmap->ops->kill) - pgmap->ops->kill(pgmap); - else - percpu_ref_kill(pgmap->ref); - - if (pgmap->ops && pgmap->ops->cleanup) { - pgmap->ops->cleanup(pgmap); - } else { - wait_for_completion(&pgmap->done); - percpu_ref_exit(pgmap->ref); - } + WARN_ON(!pgmap); + + percpu_ref_kill(&pgmap->ref); + + wait_for_completion(&pgmap->done); + percpu_ref_exit(&pgmap->ref); } static void dev_pagemap_percpu_release(struct percpu_ref *ref) { - struct dev_pagemap *pgmap = - container_of(ref, struct dev_pagemap, internal_ref); + struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref); complete(&pgmap->done); } @@ -132,22 +124,11 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) if (!nfit_res) return devm_memremap_pages(dev, pgmap); - if (!pgmap->ref) { - if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup)) - return ERR_PTR(-EINVAL); - - init_completion(&pgmap->done); - error = percpu_ref_init(&pgmap->internal_ref, - dev_pagemap_percpu_release, 0, GFP_KERNEL); - if (error) - return ERR_PTR(error); - pgmap->ref = &pgmap->internal_ref; - } else { - if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) { - WARN(1, "Missing reference count teardown definition\n"); - return ERR_PTR(-EINVAL); - } - } + init_completion(&pgmap->done); + error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0, + GFP_KERNEL); + if (error) + return ERR_PTR(error); error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap); if (error) diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index 6862915f1fb0..3ca7c32e9362 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -1054,10 +1054,6 @@ static __init int ndtest_init(void) libnvdimm_test(); device_dax_test(); dax_pmem_test(); - dax_pmem_core_test(); -#ifdef CONFIG_DEV_DAX_PMEM_COMPAT - dax_pmem_compat_test(); -#endif nfit_test_setup(ndtest_resource_lookup, NULL); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index b1bff5fb0f65..0bc91ffee257 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -3300,10 +3300,6 @@ static __init int nfit_test_init(void) acpi_nfit_test(); device_dax_test(); dax_pmem_test(); - dax_pmem_core_test(); -#ifdef CONFIG_DEV_DAX_PMEM_COMPAT - dax_pmem_compat_test(); -#endif nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 16ec895bbe5f..5bd9e6e80625 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -74,7 +74,7 @@ static inline unsigned long page_to_phys(struct page *page) __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ x, y) -#define preemptible() (1) +#define pagefault_disabled() (0) static inline void *kmap(struct page *page) { @@ -127,6 +127,7 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) #define kmemleak_free(a) #define PageSlab(p) (0) +#define flush_dcache_page(p) #define MAX_ERRNO 4095 diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c852eb40c4f7..d08fe4cfe811 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TARGETS = arm64 +TARGETS += alsa +TARGETS += arm64 TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore new file mode 100644 index 000000000000..3bb7c41266a8 --- /dev/null +++ b/tools/testing/selftests/alsa/.gitignore @@ -0,0 +1 @@ +mixer-test diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile new file mode 100644 index 000000000000..f64d9090426d --- /dev/null +++ b/tools/testing/selftests/alsa/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# + +CFLAGS += $(shell pkg-config --cflags alsa) +LDLIBS += $(shell pkg-config --libs alsa) + +TEST_GEN_PROGS := mixer-test + +include ../lib.mk diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c new file mode 100644 index 000000000000..17f158d7a767 --- /dev/null +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -0,0 +1,705 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// kselftest for the ALSA mixer API +// +// Original author: Mark Brown <[email protected]> +// Copyright (c) 2021 Arm Limited + +// This test will iterate over all cards detected in the system, exercising +// every mixer control it can find. This may conflict with other system +// software if there is audio activity so is best run on a system with a +// minimal active userspace. + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <getopt.h> +#include <stdarg.h> +#include <ctype.h> +#include <math.h> +#include <errno.h> +#include <assert.h> +#include <alsa/asoundlib.h> +#include <poll.h> +#include <stdint.h> + +#include "../kselftest.h" + +#define TESTS_PER_CONTROL 3 + +struct card_data { + snd_ctl_t *handle; + int card; + int num_ctls; + snd_ctl_elem_list_t *ctls; + struct card_data *next; +}; + +struct ctl_data { + const char *name; + snd_ctl_elem_id_t *id; + snd_ctl_elem_info_t *info; + snd_ctl_elem_value_t *def_val; + int elem; + struct card_data *card; + struct ctl_data *next; +}; + +static const char *alsa_config = +"ctl.hw {\n" +" @args [ CARD ]\n" +" @args.CARD.type string\n" +" type hw\n" +" card $CARD\n" +"}\n" +; + +int num_cards = 0; +int num_controls = 0; +struct card_data *card_list = NULL; +struct ctl_data *ctl_list = NULL; + +#ifdef SND_LIB_VER +#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6) +#define LIB_HAS_LOAD_STRING +#endif +#endif + +#ifndef LIB_HAS_LOAD_STRING +int snd_config_load_string(snd_config_t **config, const char *s, size_t size) +{ + snd_input_t *input; + snd_config_t *dst; + int err; + + assert(config && s); + if (size == 0) + size = strlen(s); + err = snd_input_buffer_open(&input, s, size); + if (err < 0) + return err; + err = snd_config_top(&dst); + if (err < 0) { + snd_input_close(input); + return err; + } + err = snd_config_load(dst, input); + snd_input_close(input); + if (err < 0) { + snd_config_delete(dst); + return err; + } + *config = dst; + return 0; +} +#endif + +void find_controls(void) +{ + char name[32]; + int card, ctl, err; + struct card_data *card_data; + struct ctl_data *ctl_data; + snd_config_t *config; + + card = -1; + if (snd_card_next(&card) < 0 || card < 0) + return; + + err = snd_config_load_string(&config, alsa_config, strlen(alsa_config)); + if (err < 0) { + ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n", + snd_strerror(err)); + ksft_exit_fail(); + } + + while (card >= 0) { + sprintf(name, "hw:%d", card); + + card_data = malloc(sizeof(*card_data)); + if (!card_data) + ksft_exit_fail_msg("Out of memory\n"); + + err = snd_ctl_open_lconf(&card_data->handle, name, 0, config); + if (err < 0) { + ksft_print_msg("Failed to get hctl for card %d: %s\n", + card, snd_strerror(err)); + goto next_card; + } + + /* Count controls */ + snd_ctl_elem_list_malloc(&card_data->ctls); + snd_ctl_elem_list(card_data->handle, card_data->ctls); + card_data->num_ctls = snd_ctl_elem_list_get_count(card_data->ctls); + + /* Enumerate control information */ + snd_ctl_elem_list_alloc_space(card_data->ctls, card_data->num_ctls); + snd_ctl_elem_list(card_data->handle, card_data->ctls); + + card_data->card = num_cards++; + card_data->next = card_list; + card_list = card_data; + + num_controls += card_data->num_ctls; + + for (ctl = 0; ctl < card_data->num_ctls; ctl++) { + ctl_data = malloc(sizeof(*ctl_data)); + if (!ctl_data) + ksft_exit_fail_msg("Out of memory\n"); + + ctl_data->card = card_data; + ctl_data->elem = ctl; + ctl_data->name = snd_ctl_elem_list_get_name(card_data->ctls, + ctl); + + err = snd_ctl_elem_id_malloc(&ctl_data->id); + if (err < 0) + ksft_exit_fail_msg("Out of memory\n"); + + err = snd_ctl_elem_info_malloc(&ctl_data->info); + if (err < 0) + ksft_exit_fail_msg("Out of memory\n"); + + err = snd_ctl_elem_value_malloc(&ctl_data->def_val); + if (err < 0) + ksft_exit_fail_msg("Out of memory\n"); + + snd_ctl_elem_list_get_id(card_data->ctls, ctl, + ctl_data->id); + snd_ctl_elem_info_set_id(ctl_data->info, ctl_data->id); + err = snd_ctl_elem_info(card_data->handle, + ctl_data->info); + if (err < 0) { + ksft_print_msg("%s getting info for %d\n", + snd_strerror(err), + ctl_data->name); + } + + snd_ctl_elem_value_set_id(ctl_data->def_val, + ctl_data->id); + + ctl_data->next = ctl_list; + ctl_list = ctl_data; + } + + next_card: + if (snd_card_next(&card) < 0) { + ksft_print_msg("snd_card_next"); + break; + } + } + + snd_config_delete(config); +} + +bool ctl_value_index_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val, + int index) +{ + long int_val; + long long int64_val; + + switch (snd_ctl_elem_info_get_type(ctl->info)) { + case SND_CTL_ELEM_TYPE_NONE: + ksft_print_msg("%s.%d Invalid control type NONE\n", + ctl->name, index); + return false; + + case SND_CTL_ELEM_TYPE_BOOLEAN: + int_val = snd_ctl_elem_value_get_boolean(val, index); + switch (int_val) { + case 0: + case 1: + break; + default: + ksft_print_msg("%s.%d Invalid boolean value %ld\n", + ctl->name, index, int_val); + return false; + } + break; + + case SND_CTL_ELEM_TYPE_INTEGER: + int_val = snd_ctl_elem_value_get_integer(val, index); + + if (int_val < snd_ctl_elem_info_get_min(ctl->info)) { + ksft_print_msg("%s.%d value %ld less than minimum %ld\n", + ctl->name, index, int_val, + snd_ctl_elem_info_get_min(ctl->info)); + return false; + } + + if (int_val > snd_ctl_elem_info_get_max(ctl->info)) { + ksft_print_msg("%s.%d value %ld more than maximum %ld\n", + ctl->name, index, int_val, + snd_ctl_elem_info_get_max(ctl->info)); + return false; + } + + /* Only check step size if there is one and we're in bounds */ + if (snd_ctl_elem_info_get_step(ctl->info) && + (int_val - snd_ctl_elem_info_get_min(ctl->info) % + snd_ctl_elem_info_get_step(ctl->info))) { + ksft_print_msg("%s.%d value %ld invalid for step %ld minimum %ld\n", + ctl->name, index, int_val, + snd_ctl_elem_info_get_step(ctl->info), + snd_ctl_elem_info_get_min(ctl->info)); + return false; + } + break; + + case SND_CTL_ELEM_TYPE_INTEGER64: + int64_val = snd_ctl_elem_value_get_integer64(val, index); + + if (int64_val < snd_ctl_elem_info_get_min64(ctl->info)) { + ksft_print_msg("%s.%d value %lld less than minimum %lld\n", + ctl->name, index, int64_val, + snd_ctl_elem_info_get_min64(ctl->info)); + return false; + } + + if (int64_val > snd_ctl_elem_info_get_max64(ctl->info)) { + ksft_print_msg("%s.%d value %lld more than maximum %lld\n", + ctl->name, index, int64_val, + snd_ctl_elem_info_get_max(ctl->info)); + return false; + } + + /* Only check step size if there is one and we're in bounds */ + if (snd_ctl_elem_info_get_step64(ctl->info) && + (int64_val - snd_ctl_elem_info_get_min64(ctl->info)) % + snd_ctl_elem_info_get_step64(ctl->info)) { + ksft_print_msg("%s.%d value %lld invalid for step %lld minimum %lld\n", + ctl->name, index, int64_val, + snd_ctl_elem_info_get_step64(ctl->info), + snd_ctl_elem_info_get_min64(ctl->info)); + return false; + } + break; + + case SND_CTL_ELEM_TYPE_ENUMERATED: + int_val = snd_ctl_elem_value_get_enumerated(val, index); + + if (int_val < 0) { + ksft_print_msg("%s.%d negative value %ld for enumeration\n", + ctl->name, index, int_val); + return false; + } + + if (int_val >= snd_ctl_elem_info_get_items(ctl->info)) { + ksft_print_msg("%s.%d value %ld more than item count %ld\n", + ctl->name, index, int_val, + snd_ctl_elem_info_get_items(ctl->info)); + return false; + } + break; + + default: + /* No tests for other types */ + break; + } + + return true; +} + +/* + * Check that the provided value meets the constraints for the + * provided control. + */ +bool ctl_value_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val) +{ + int i; + bool valid = true; + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) + if (!ctl_value_index_valid(ctl, val, i)) + valid = false; + + return valid; +} + +/* + * Check that we can read the default value and it is valid. Write + * tests use the read value to restore the default. + */ +void test_ctl_get_value(struct ctl_data *ctl) +{ + int err; + + /* If the control is turned off let's be polite */ + if (snd_ctl_elem_info_is_inactive(ctl->info)) { + ksft_print_msg("%s is inactive\n", ctl->name); + ksft_test_result_skip("get_value.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + /* Can't test reading on an unreadable control */ + if (!snd_ctl_elem_info_is_readable(ctl->info)) { + ksft_print_msg("%s is not readable\n", ctl->name); + ksft_test_result_skip("get_value.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + err = snd_ctl_elem_read(ctl->card->handle, ctl->def_val); + if (err < 0) { + ksft_print_msg("snd_ctl_elem_read() failed: %s\n", + snd_strerror(err)); + goto out; + } + + if (!ctl_value_valid(ctl, ctl->def_val)) + err = -EINVAL; + +out: + ksft_test_result(err >= 0, "get_value.%d.%d\n", + ctl->card->card, ctl->elem); +} + +bool show_mismatch(struct ctl_data *ctl, int index, + snd_ctl_elem_value_t *read_val, + snd_ctl_elem_value_t *expected_val) +{ + long long expected_int, read_int; + + /* + * We factor out the code to compare values representable as + * integers, ensure that check doesn't log otherwise. + */ + expected_int = 0; + read_int = 0; + + switch (snd_ctl_elem_info_get_type(ctl->info)) { + case SND_CTL_ELEM_TYPE_BOOLEAN: + expected_int = snd_ctl_elem_value_get_boolean(expected_val, + index); + read_int = snd_ctl_elem_value_get_boolean(read_val, index); + break; + + case SND_CTL_ELEM_TYPE_INTEGER: + expected_int = snd_ctl_elem_value_get_integer(expected_val, + index); + read_int = snd_ctl_elem_value_get_integer(read_val, index); + break; + + case SND_CTL_ELEM_TYPE_INTEGER64: + expected_int = snd_ctl_elem_value_get_integer64(expected_val, + index); + read_int = snd_ctl_elem_value_get_integer64(read_val, + index); + break; + + case SND_CTL_ELEM_TYPE_ENUMERATED: + expected_int = snd_ctl_elem_value_get_enumerated(expected_val, + index); + read_int = snd_ctl_elem_value_get_enumerated(read_val, + index); + break; + + default: + break; + } + + if (expected_int != read_int) { + /* + * NOTE: The volatile attribute means that the hardware + * can voluntarily change the state of control element + * independent of any operation by software. + */ + bool is_volatile = snd_ctl_elem_info_is_volatile(ctl->info); + ksft_print_msg("%s.%d expected %lld but read %lld, is_volatile %d\n", + ctl->name, index, expected_int, read_int, is_volatile); + return !is_volatile; + } else { + return false; + } +} + +/* + * Write a value then if possible verify that we get the expected + * result. An optional expected value can be provided if we expect + * the write to fail, for verifying that invalid writes don't corrupt + * anything. + */ +int write_and_verify(struct ctl_data *ctl, + snd_ctl_elem_value_t *write_val, + snd_ctl_elem_value_t *expected_val) +{ + int err, i; + bool error_expected, mismatch_shown; + snd_ctl_elem_value_t *read_val, *w_val; + snd_ctl_elem_value_alloca(&read_val); + snd_ctl_elem_value_alloca(&w_val); + + /* + * We need to copy the write value since writing can modify + * the value which causes surprises, and allocate an expected + * value if we expect to read back what we wrote. + */ + snd_ctl_elem_value_copy(w_val, write_val); + if (expected_val) { + error_expected = true; + } else { + error_expected = false; + snd_ctl_elem_value_alloca(&expected_val); + snd_ctl_elem_value_copy(expected_val, write_val); + } + + /* + * Do the write, if we have an expected value ignore the error + * and carry on to validate the expected value. + */ + err = snd_ctl_elem_write(ctl->card->handle, w_val); + if (err < 0 && !error_expected) { + ksft_print_msg("snd_ctl_elem_write() failed: %s\n", + snd_strerror(err)); + return err; + } + + /* Can we do the verification part? */ + if (!snd_ctl_elem_info_is_readable(ctl->info)) + return err; + + snd_ctl_elem_value_set_id(read_val, ctl->id); + + err = snd_ctl_elem_read(ctl->card->handle, read_val); + if (err < 0) { + ksft_print_msg("snd_ctl_elem_read() failed: %s\n", + snd_strerror(err)); + return err; + } + + /* + * Use the libray to compare values, if there's a mismatch + * carry on and try to provide a more useful diagnostic than + * just "mismatch". + */ + if (!snd_ctl_elem_value_compare(expected_val, read_val)) + return 0; + + mismatch_shown = false; + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) + if (show_mismatch(ctl, i, read_val, expected_val)) + mismatch_shown = true; + + if (!mismatch_shown) + ksft_print_msg("%s read and written values differ\n", + ctl->name); + + return -1; +} + +/* + * Make sure we can write the default value back to the control, this + * should validate that at least some write works. + */ +void test_ctl_write_default(struct ctl_data *ctl) +{ + int err; + + /* If the control is turned off let's be polite */ + if (snd_ctl_elem_info_is_inactive(ctl->info)) { + ksft_print_msg("%s is inactive\n", ctl->name); + ksft_test_result_skip("write_default.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + if (!snd_ctl_elem_info_is_writable(ctl->info)) { + ksft_print_msg("%s is not writeable\n", ctl->name); + ksft_test_result_skip("write_default.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + /* No idea what the default was for unreadable controls */ + if (!snd_ctl_elem_info_is_readable(ctl->info)) { + ksft_print_msg("%s couldn't read default\n", ctl->name); + ksft_test_result_skip("write_default.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + err = write_and_verify(ctl, ctl->def_val, NULL); + + ksft_test_result(err >= 0, "write_default.%d.%d\n", + ctl->card->card, ctl->elem); +} + +bool test_ctl_write_valid_boolean(struct ctl_data *ctl) +{ + int err, i, j; + bool fail = false; + snd_ctl_elem_value_t *val; + snd_ctl_elem_value_alloca(&val); + + snd_ctl_elem_value_set_id(val, ctl->id); + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) { + for (j = 0; j < 2; j++) { + snd_ctl_elem_value_set_boolean(val, i, j); + err = write_and_verify(ctl, val, NULL); + if (err != 0) + fail = true; + } + } + + return !fail; +} + +bool test_ctl_write_valid_integer(struct ctl_data *ctl) +{ + int err; + int i; + long j, step; + bool fail = false; + snd_ctl_elem_value_t *val; + snd_ctl_elem_value_alloca(&val); + + snd_ctl_elem_value_set_id(val, ctl->id); + + step = snd_ctl_elem_info_get_step(ctl->info); + if (!step) + step = 1; + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) { + for (j = snd_ctl_elem_info_get_min(ctl->info); + j <= snd_ctl_elem_info_get_max(ctl->info); j += step) { + + snd_ctl_elem_value_set_integer(val, i, j); + err = write_and_verify(ctl, val, NULL); + if (err != 0) + fail = true; + } + } + + + return !fail; +} + +bool test_ctl_write_valid_integer64(struct ctl_data *ctl) +{ + int err, i; + long long j, step; + bool fail = false; + snd_ctl_elem_value_t *val; + snd_ctl_elem_value_alloca(&val); + + snd_ctl_elem_value_set_id(val, ctl->id); + + step = snd_ctl_elem_info_get_step64(ctl->info); + if (!step) + step = 1; + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) { + for (j = snd_ctl_elem_info_get_min64(ctl->info); + j <= snd_ctl_elem_info_get_max64(ctl->info); j += step) { + + snd_ctl_elem_value_set_integer64(val, i, j); + err = write_and_verify(ctl, val, NULL); + if (err != 0) + fail = true; + } + } + + return !fail; +} + +bool test_ctl_write_valid_enumerated(struct ctl_data *ctl) +{ + int err, i, j; + bool fail = false; + snd_ctl_elem_value_t *val; + snd_ctl_elem_value_alloca(&val); + + snd_ctl_elem_value_set_id(val, ctl->id); + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) { + for (j = 0; j < snd_ctl_elem_info_get_items(ctl->info); j++) { + snd_ctl_elem_value_set_enumerated(val, i, j); + err = write_and_verify(ctl, val, NULL); + if (err != 0) + fail = true; + } + } + + return !fail; +} + +void test_ctl_write_valid(struct ctl_data *ctl) +{ + bool pass; + int err; + + /* If the control is turned off let's be polite */ + if (snd_ctl_elem_info_is_inactive(ctl->info)) { + ksft_print_msg("%s is inactive\n", ctl->name); + ksft_test_result_skip("write_valid.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + if (!snd_ctl_elem_info_is_writable(ctl->info)) { + ksft_print_msg("%s is not writeable\n", ctl->name); + ksft_test_result_skip("write_valid.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + switch (snd_ctl_elem_info_get_type(ctl->info)) { + case SND_CTL_ELEM_TYPE_BOOLEAN: + pass = test_ctl_write_valid_boolean(ctl); + break; + + case SND_CTL_ELEM_TYPE_INTEGER: + pass = test_ctl_write_valid_integer(ctl); + break; + + case SND_CTL_ELEM_TYPE_INTEGER64: + pass = test_ctl_write_valid_integer64(ctl); + break; + + case SND_CTL_ELEM_TYPE_ENUMERATED: + pass = test_ctl_write_valid_enumerated(ctl); + break; + + default: + /* No tests for this yet */ + ksft_test_result_skip("write_valid.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + /* Restore the default value to minimise disruption */ + err = write_and_verify(ctl, ctl->def_val, NULL); + if (err < 0) + pass = false; + + ksft_test_result(pass, "write_valid.%d.%d\n", + ctl->card->card, ctl->elem); +} + +int main(void) +{ + struct ctl_data *ctl; + + ksft_print_header(); + + find_controls(); + + ksft_set_plan(num_controls * TESTS_PER_CONTROL); + + for (ctl = ctl_list; ctl != NULL; ctl = ctl->next) { + /* + * Must test get_value() before we write anything, the + * test stores the default value for later cleanup. + */ + test_ctl_get_value(ctl); + test_ctl_write_default(ctl); + test_ctl_write_valid(ctl); + } + + ksft_exit_pass(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index d8eeeafb50dc..1e13b7523918 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -18,7 +18,6 @@ #include "../../kselftest.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1) extern void do_syscall(int sve_vl); diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index af798b9d232c..4c418b2021e0 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -21,8 +21,6 @@ #include "../../kselftest.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) - /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 @@ -261,7 +259,7 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) } ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "Set FPSIMD registers via %s\n", type->name); + "Got FPSIMD registers via %s\n", type->name); if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) goto out; @@ -489,6 +487,8 @@ static int do_parent(pid_t child) unsigned int vq, vl; bool vl_supported; + ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); + /* Attach to the child */ while (1) { int sig; @@ -557,7 +557,14 @@ static int do_parent(pid_t child) } /* prctl() flags */ - ptrace_set_get_inherit(child, &vec_types[i]); + if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { + ptrace_set_get_inherit(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n", + vec_types[i].name); + ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n", + vec_types[i].name); + } /* Step through every possible VQ */ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c index a876db1f096a..325bca0de0f6 100644 --- a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c +++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c @@ -19,17 +19,6 @@ #include "kselftest.h" #include "mte_common_util.h" -#define PR_SET_TAGGED_ADDR_CTRL 55 -#define PR_GET_TAGGED_ADDR_CTRL 56 -# define PR_TAGGED_ADDR_ENABLE (1UL << 0) -# define PR_MTE_TCF_SHIFT 1 -# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TAG_SHIFT 3 -# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) - #include "mte_def.h" #define NUM_ITERATIONS 1024 diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 1de7a0abd0ae..f4ae5f87a3b7 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE +#include <assert.h> #include <errno.h> #include <fcntl.h> #include <signal.h> @@ -11,6 +12,7 @@ #include <string.h> #include <ucontext.h> #include <unistd.h> +#include <sys/uio.h> #include <sys/mman.h> #include "kselftest.h" @@ -19,14 +21,28 @@ static size_t page_sz; -static int check_usermem_access_fault(int mem_type, int mode, int mapping) +#define TEST_NAME_MAX 100 + +enum test_type { + READ_TEST, + WRITE_TEST, + READV_TEST, + WRITEV_TEST, + LAST_TEST, +}; + +static int check_usermem_access_fault(int mem_type, int mode, int mapping, + int tag_offset, int tag_len, + enum test_type test_type) { int fd, i, err; char val = 'A'; - size_t len, read_len; + ssize_t len, syscall_len; void *ptr, *ptr_next; + int fileoff, ptroff, size; + int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz}; - err = KSFT_FAIL; + err = KSFT_PASS; len = 2 * page_sz; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); fd = create_temp_file(); @@ -43,9 +59,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping) } mte_initialize_current_context(mode, (uintptr_t)ptr, len); /* Copy from file into buffer with valid tag */ - read_len = read(fd, ptr, len); + syscall_len = read(fd, ptr, len); mte_wait_after_trig(); - if (cur_mte_cxt.fault_valid || read_len < len) + if (cur_mte_cxt.fault_valid || syscall_len < len) goto usermem_acc_err; /* Verify same pattern is read */ for (i = 0; i < len; i++) @@ -54,36 +70,136 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping) if (i < len) goto usermem_acc_err; - /* Tag the next half of memory with different value */ - ptr_next = (void *)((unsigned long)ptr + page_sz); + if (!tag_len) + tag_len = len - tag_offset; + /* Tag a part of memory with different value */ + ptr_next = (void *)((unsigned long)ptr + tag_offset); ptr_next = mte_insert_new_tag(ptr_next); - mte_set_tag_address_range(ptr_next, page_sz); + mte_set_tag_address_range(ptr_next, tag_len); - lseek(fd, 0, 0); - /* Copy from file into buffer with invalid tag */ - read_len = read(fd, ptr, len); - mte_wait_after_trig(); - /* - * Accessing user memory in kernel with invalid tag should fail in sync - * mode without fault but may not fail in async mode as per the - * implemented MTE userspace support in Arm64 kernel. - */ - if (mode == MTE_SYNC_ERR && - !cur_mte_cxt.fault_valid && read_len < len) { - err = KSFT_PASS; - } else if (mode == MTE_ASYNC_ERR && - !cur_mte_cxt.fault_valid && read_len == len) { - err = KSFT_PASS; + for (fileoff = 0; fileoff < 16; fileoff++) { + for (ptroff = 0; ptroff < 16; ptroff++) { + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + size = sizes[i]; + lseek(fd, 0, 0); + + /* perform file operation on buffer with invalid tag */ + switch (test_type) { + case READ_TEST: + syscall_len = read(fd, ptr + ptroff, size); + break; + case WRITE_TEST: + syscall_len = write(fd, ptr + ptroff, size); + break; + case READV_TEST: { + struct iovec iov[1]; + iov[0].iov_base = ptr + ptroff; + iov[0].iov_len = size; + syscall_len = readv(fd, iov, 1); + break; + } + case WRITEV_TEST: { + struct iovec iov[1]; + iov[0].iov_base = ptr + ptroff; + iov[0].iov_len = size; + syscall_len = writev(fd, iov, 1); + break; + } + case LAST_TEST: + goto usermem_acc_err; + } + + mte_wait_after_trig(); + /* + * Accessing user memory in kernel with invalid tag should fail in sync + * mode without fault but may not fail in async mode as per the + * implemented MTE userspace support in Arm64 kernel. + */ + if (cur_mte_cxt.fault_valid) { + goto usermem_acc_err; + } + if (mode == MTE_SYNC_ERR && syscall_len < len) { + /* test passed */ + } else if (mode == MTE_ASYNC_ERR && syscall_len == size) { + /* test passed */ + } else { + goto usermem_acc_err; + } + } + } } + + goto exit; + usermem_acc_err: + err = KSFT_FAIL; +exit: mte_free_memory((void *)ptr, len, mem_type, true); close(fd); return err; } +void format_test_name(char* name, int name_len, int type, int sync, int map, int len, int offset) { + const char* test_type; + const char* mte_type; + const char* map_type; + + switch (type) { + case READ_TEST: + test_type = "read"; + break; + case WRITE_TEST: + test_type = "write"; + break; + case READV_TEST: + test_type = "readv"; + break; + case WRITEV_TEST: + test_type = "writev"; + break; + default: + assert(0); + break; + } + + switch (sync) { + case MTE_SYNC_ERR: + mte_type = "MTE_SYNC_ERR"; + break; + case MTE_ASYNC_ERR: + mte_type = "MTE_ASYNC_ERR"; + break; + default: + assert(0); + break; + } + + switch (map) { + case MAP_SHARED: + map_type = "MAP_SHARED"; + break; + case MAP_PRIVATE: + map_type = "MAP_PRIVATE"; + break; + default: + assert(0); + break; + } + + snprintf(name, name_len, + "test type: %s, %s, %s, tag len: %d, tag offset: %d\n", + test_type, mte_type, map_type, len, offset); +} + int main(int argc, char *argv[]) { int err; + int t, s, m, l, o; + int mte_sync[] = {MTE_SYNC_ERR, MTE_ASYNC_ERR}; + int maps[] = {MAP_SHARED, MAP_PRIVATE}; + int tag_lens[] = {0, MT_GRANULE_SIZE}; + int tag_offsets[] = {page_sz, MT_GRANULE_SIZE}; + char test_name[TEST_NAME_MAX]; page_sz = getpagesize(); if (!page_sz) { @@ -98,17 +214,28 @@ int main(int argc, char *argv[]) mte_register_signal(SIGSEGV, mte_default_handler); /* Set test plan */ - ksft_set_plan(4); + ksft_set_plan(64); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), - "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), - "Check memory access from kernel in sync mode, shared mapping and mmap memory\n"); - - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), - "Check memory access from kernel in async mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), - "Check memory access from kernel in async mode, shared mapping and mmap memory\n"); + for (t = 0; t < LAST_TEST; t++) { + for (s = 0; s < ARRAY_SIZE(mte_sync); s++) { + for (m = 0; m < ARRAY_SIZE(maps); m++) { + for (l = 0; l < ARRAY_SIZE(tag_lens); l++) { + for (o = 0; o < ARRAY_SIZE(tag_offsets); o++) { + int sync = mte_sync[s]; + int map = maps[m]; + int offset = tag_offsets[o]; + int tag_len = tag_lens[l]; + int res = check_usermem_access_fault(USE_MMAP, sync, + map, offset, + tag_len, t); + format_test_name(test_name, TEST_NAME_MAX, + t, sync, map, tag_len, offset); + evaluate_test(res, test_name); + } + } + } + } + } mte_restore_setup(); ksft_print_cnts(); diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index ebe8694dbef0..f909b70d9e98 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -53,6 +53,7 @@ struct tdescr { char *name; char *descr; unsigned long feats_required; + unsigned long feats_incompatible; /* bitmask of effectively supported feats: populated at run-time */ unsigned long feats_supported; bool initialized; diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 2f8c23af3b5e..5743897984b0 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -36,6 +36,8 @@ static inline char *feats_to_string(unsigned long feats) { size_t flen = MAX_FEATS_SZ - 1; + feats_string[0] = '\0'; + for (int i = 0; i < FMAX_END; i++) { if (feats & (1UL << i)) { size_t tlen = strlen(feats_names[i]); @@ -256,7 +258,7 @@ int test_init(struct tdescr *td) td->minsigstksz = MINSIGSTKSZ; fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz); - if (td->feats_required) { + if (td->feats_required || td->feats_incompatible) { td->feats_supported = 0; /* * Checking for CPU required features using both the @@ -267,15 +269,29 @@ int test_init(struct tdescr *td) if (getauxval(AT_HWCAP) & HWCAP_SVE) td->feats_supported |= FEAT_SVE; if (feats_ok(td)) { - fprintf(stderr, - "Required Features: [%s] supported\n", - feats_to_string(td->feats_required & - td->feats_supported)); + if (td->feats_required & td->feats_supported) + fprintf(stderr, + "Required Features: [%s] supported\n", + feats_to_string(td->feats_required & + td->feats_supported)); + if (!(td->feats_incompatible & td->feats_supported)) + fprintf(stderr, + "Incompatible Features: [%s] absent\n", + feats_to_string(td->feats_incompatible)); } else { - fprintf(stderr, - "Required Features: [%s] NOT supported\n", - feats_to_string(td->feats_required & - ~td->feats_supported)); + if ((td->feats_required & td->feats_supported) != + td->feats_supported) + fprintf(stderr, + "Required Features: [%s] NOT supported\n", + feats_to_string(td->feats_required & + ~td->feats_supported)); + if (td->feats_incompatible & td->feats_supported) + fprintf(stderr, + "Incompatible Features: [%s] supported\n", + feats_to_string(td->feats_incompatible & + ~td->feats_supported)); + + td->result = KSFT_SKIP; return 0; } diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 6772b5c8d274..f3aa99ba67bb 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -18,6 +18,8 @@ void test_result(struct tdescr *td); static inline bool feats_ok(struct tdescr *td) { + if (td->feats_incompatible & td->feats_supported) + return false; return (td->feats_required & td->feats_supported) == td->feats_required; } diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index 32fc5b3b5cf6..911345c526e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -10,6 +10,7 @@ #include "test_d_path.skel.h" #include "test_d_path_check_rdonly_mem.skel.h" +#include "test_d_path_check_types.skel.h" static int duration; @@ -167,6 +168,16 @@ static void test_d_path_check_rdonly_mem(void) test_d_path_check_rdonly_mem__destroy(skel); } +static void test_d_path_check_types(void) +{ + struct test_d_path_check_types *skel; + + skel = test_d_path_check_types__open_and_load(); + ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type"); + + test_d_path_check_types__destroy(skel); +} + void test_d_path(void) { if (test__start_subtest("basic")) @@ -174,4 +185,7 @@ void test_d_path(void) if (test__start_subtest("check_rdonly_mem")) test_d_path_check_rdonly_mem(); + + if (test__start_subtest("check_alloc_mem")) + test_d_path_check_types(); } diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c new file mode 100644 index 000000000000..f74b82305da8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "timer_crash.skel.h" + +enum { + MODE_ARRAY, + MODE_HASH, +}; + +static void test_timer_crash_mode(int mode) +{ + struct timer_crash *skel; + + skel = timer_crash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load")) + return; + skel->bss->pid = getpid(); + skel->bss->crash_map = mode; + if (!ASSERT_OK(timer_crash__attach(skel), "timer_crash__attach")) + goto end; + usleep(1); +end: + timer_crash__destroy(skel); +} + +void test_timer_crash(void) +{ + if (test__start_subtest("array")) + test_timer_crash_mode(MODE_ARRAY); + if (test__start_subtest("hash")) + test_timer_crash_mode(MODE_HASH); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index 983ab0b47d30..b2b357f8c74c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -8,46 +8,47 @@ void serial_test_xdp_link(void) { - __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); struct test_xdp_link *skel1 = NULL, *skel2 = NULL; + __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2; struct bpf_link_info link_info; struct bpf_prog_info prog_info; struct bpf_link *link; + int err; __u32 link_info_len = sizeof(link_info); __u32 prog_info_len = sizeof(prog_info); skel1 = test_xdp_link__open_and_load(); - if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n")) + if (!ASSERT_OK_PTR(skel1, "skel_load")) goto cleanup; prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler); skel2 = test_xdp_link__open_and_load(); - if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n")) + if (!ASSERT_OK_PTR(skel2, "skel_load")) goto cleanup; prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler); memset(&prog_info, 0, sizeof(prog_info)); err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len); - if (CHECK(err, "fd_info1", "failed %d\n", -errno)) + if (!ASSERT_OK(err, "fd_info1")) goto cleanup; id1 = prog_info.id; memset(&prog_info, 0, sizeof(prog_info)); err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len); - if (CHECK(err, "fd_info2", "failed %d\n", -errno)) + if (!ASSERT_OK(err, "fd_info2")) goto cleanup; id2 = prog_info.id; /* set initial prog attachment */ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts); - if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err)) + if (!ASSERT_OK(err, "fd_attach")) goto cleanup; /* validate prog ID */ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - CHECK(err || id0 != id1, "id1_check", - "loaded prog id %u != id1 %u, err %d", id0, id1, err); + if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) + goto cleanup; /* BPF link is not allowed to replace prog attachment */ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); @@ -62,7 +63,7 @@ void serial_test_xdp_link(void) /* detach BPF program */ opts.old_fd = prog_fd1; err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); - if (CHECK(err, "prog_detach", "failed %d\n", err)) + if (!ASSERT_OK(err, "prog_detach")) goto cleanup; /* now BPF link should attach successfully */ @@ -73,24 +74,23 @@ void serial_test_xdp_link(void) /* validate prog ID */ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - if (CHECK(err || id0 != id1, "id1_check", - "loaded prog id %u != id1 %u, err %d", id0, id1, err)) + if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) goto cleanup; /* BPF prog attach is not allowed to replace BPF link */ opts.old_fd = prog_fd1; err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts); - if (CHECK(!err, "prog_attach_fail", "unexpected success\n")) + if (!ASSERT_ERR(err, "prog_attach_fail")) goto cleanup; /* Can't force-update when BPF link is active */ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0); - if (CHECK(!err, "prog_update_fail", "unexpected success\n")) + if (!ASSERT_ERR(err, "prog_update_fail")) goto cleanup; /* Can't force-detach when BPF link is active */ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); - if (CHECK(!err, "prog_detach_fail", "unexpected success\n")) + if (!ASSERT_ERR(err, "prog_detach_fail")) goto cleanup; /* BPF link is not allowed to replace another BPF link */ @@ -110,40 +110,39 @@ void serial_test_xdp_link(void) skel2->links.xdp_handler = link; err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - if (CHECK(err || id0 != id2, "id2_check", - "loaded prog id %u != id2 %u, err %d", id0, id1, err)) + if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val")) goto cleanup; /* updating program under active BPF link works as expected */ err = bpf_link__update_program(link, skel1->progs.xdp_handler); - if (CHECK(err, "link_upd", "failed: %d\n", err)) + if (!ASSERT_OK(err, "link_upd")) goto cleanup; memset(&link_info, 0, sizeof(link_info)); err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); - if (CHECK(err, "link_info", "failed: %d\n", err)) + if (!ASSERT_OK(err, "link_info")) goto cleanup; - CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type", - "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP); - CHECK(link_info.prog_id != id1, "link_prog_id", - "got %u != exp %u\n", link_info.prog_id, id1); - CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex", - "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO); + ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type"); + ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); + ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex"); + + /* updating program under active BPF link with different type fails */ + err = bpf_link__update_program(link, skel1->progs.tc_handler); + if (!ASSERT_ERR(err, "link_upd_invalid")) + goto cleanup; err = bpf_link__detach(link); - if (CHECK(err, "link_detach", "failed %d\n", err)) + if (!ASSERT_OK(err, "link_detach")) goto cleanup; memset(&link_info, 0, sizeof(link_info)); err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); - if (CHECK(err, "link_info", "failed: %d\n", err)) - goto cleanup; - CHECK(link_info.prog_id != id1, "link_prog_id", - "got %u != exp %u\n", link_info.prog_id, id1); + + ASSERT_OK(err, "link_info"); + ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); /* ifindex should be zeroed out */ - CHECK(link_info.xdp.ifindex != 0, "link_ifindex", - "got %u != exp %u\n", link_info.xdp.ifindex, 0); + ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex"); cleanup: test_xdp_link__destroy(skel1); diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c new file mode 100644 index 000000000000..7e02b7361307 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern const int bpf_prog_active __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} ringbuf SEC(".maps"); + +SEC("fentry/security_inode_getattr") +int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat, + __u32 request_mask, unsigned int query_flags) +{ + void *active; + u32 cpu; + + cpu = bpf_get_smp_processor_id(); + active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) { + /* FAIL here! 'active' points to 'regular' memory. It + * cannot be submitted to ring buffer. + */ + bpf_ringbuf_submit(active, 0); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 2966564b8497..6c85b00f27b2 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -235,7 +235,7 @@ SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) { int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; - int *start, *end, *start_push, *end_push, *start_pop, *pop; + int *start, *end, *start_push, *end_push, *start_pop, *pop, err = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -249,8 +249,11 @@ int bpf_prog4(struct sk_msg_md *msg) bpf_msg_pull_data(msg, *start, *end, 0); start_push = bpf_map_lookup_elem(&sock_bytes, &two); end_push = bpf_map_lookup_elem(&sock_bytes, &three); - if (start_push && end_push) - bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (start_push && end_push) { + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + return SK_DROP; + } start_pop = bpf_map_lookup_elem(&sock_bytes, &four); pop = bpf_map_lookup_elem(&sock_bytes, &five); if (start_pop && pop) @@ -263,6 +266,7 @@ int bpf_prog6(struct sk_msg_md *msg) { int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; + int err = 0; __u64 flags = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -279,8 +283,11 @@ int bpf_prog6(struct sk_msg_md *msg) start_push = bpf_map_lookup_elem(&sock_bytes, &two); end_push = bpf_map_lookup_elem(&sock_bytes, &three); - if (start_push && end_push) - bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (start_push && end_push) { + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + return SK_DROP; + } start_pop = bpf_map_lookup_elem(&sock_bytes, &four); pop = bpf_map_lookup_elem(&sock_bytes, &five); @@ -338,7 +345,7 @@ SEC("sk_msg5") int bpf_prog10(struct sk_msg_md *msg) { int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; - int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, err = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -352,8 +359,11 @@ int bpf_prog10(struct sk_msg_md *msg) bpf_msg_pull_data(msg, *start, *end, 0); start_push = bpf_map_lookup_elem(&sock_bytes, &two); end_push = bpf_map_lookup_elem(&sock_bytes, &three); - if (start_push && end_push) - bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (start_push && end_push) { + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + return SK_PASS; + } start_pop = bpf_map_lookup_elem(&sock_bytes, &four); pop = bpf_map_lookup_elem(&sock_bytes, &five); if (start_pop && pop) diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index a8233e7f173b..728dbd39eff0 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018 Facebook -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #ifndef PERF_MAX_STACK_DEPTH @@ -41,11 +41,11 @@ struct { /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; - char prev_comm[16]; + char prev_comm[TASK_COMM_LEN]; int prev_pid; int prev_prio; long long prev_state; - char next_comm[16]; + char next_comm[TASK_COMM_LEN]; int next_pid; int next_prio; }; diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index ce6974016f53..43bd7a20cc50 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -1,17 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2017 Facebook -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; - char prev_comm[16]; + char prev_comm[TASK_COMM_LEN]; int prev_pid; int prev_prio; long long prev_state; - char next_comm[16]; + char next_comm[TASK_COMM_LEN]; int next_pid; int next_prio; }; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c index ee7d6ac0f615..64ff32eaae92 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_link.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c @@ -10,3 +10,9 @@ int xdp_handler(struct xdp_md *xdp) { return 0; } + +SEC("tc") +int tc_handler(struct __sk_buff *skb) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/timer_crash.c b/tools/testing/selftests/bpf/progs/timer_crash.c new file mode 100644 index 000000000000..f8f7944e70da --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_crash.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +struct map_elem { + struct bpf_timer timer; + struct bpf_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct map_elem); +} amap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct map_elem); +} hmap SEC(".maps"); + +int pid = 0; +int crash_map = 0; /* 0 for amap, 1 for hmap */ + +SEC("fentry/do_nanosleep") +int sys_enter(void *ctx) +{ + struct map_elem *e, value = {}; + void *map = crash_map ? (void *)&hmap : (void *)&amap; + + if (bpf_get_current_task_btf()->tgid != pid) + return 0; + + *(void **)&value = (void *)0xdeadcaf3; + + bpf_map_update_elem(map, &(int){0}, &value, 0); + /* For array map, doing bpf_map_update_elem will do a + * check_and_free_timer_in_array, which will trigger the crash if timer + * pointer was overwritten, for hmap we need to use bpf_timer_cancel. + */ + if (crash_map == 1) { + e = bpf_map_lookup_elem(map, &(int){0}); + if (!e) + return 0; + bpf_timer_cancel(&e->timer); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index ebf68dce5504..2893e9f2f1e0 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -28,7 +28,6 @@ // 5. We can read keycode from same /dev/lirc device #include <linux/bpf.h> -#include <linux/lirc.h> #include <linux/input.h> #include <errno.h> #include <stdio.h> diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c new file mode 100644 index 000000000000..b64d33e4833c --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ringbuf.c @@ -0,0 +1,95 @@ +{ + "ringbuf: invalid reservation offset 1", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* add invalid offset to reserved ringbuf memory */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xcafe), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, + .errstr = "dereference of modified alloc_mem ptr R1", +}, +{ + "ringbuf: invalid reservation offset 2", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* add invalid offset to reserved ringbuf memory */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0xcafe), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, + .errstr = "R7 min value is outside of the allowed memory range", +}, +{ + "ringbuf: check passing rb mem to helpers", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + /* pass allocated ring buffer memory to fib lookup */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup), + /* submit the ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 2 }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 1a8eb9672bd1..8cfc5349d2a8 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -84,7 +84,7 @@ }, .fixup_map_ringbuf = { 1 }, .result = REJECT, - .errstr = "R0 pointer arithmetic on mem_or_null prohibited", + .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited", }, { "check corrupted spill/fill", diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 0cf7e90c0052..dbaa7aabbb4a 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -583,7 +583,7 @@ int clone_into_cgroup_run_wait(const char *cgroup) return 0; } -int cg_prepare_for_wait(const char *cgroup) +static int __prepare_for_wait(const char *cgroup, const char *filename) { int fd, ret = -1; @@ -591,8 +591,7 @@ int cg_prepare_for_wait(const char *cgroup) if (fd == -1) return fd; - ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"), - IN_MODIFY); + ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY); if (ret == -1) { close(fd); fd = -1; @@ -601,6 +600,16 @@ int cg_prepare_for_wait(const char *cgroup) return fd; } +int cg_prepare_for_wait(const char *cgroup) +{ + return __prepare_for_wait(cgroup, "cgroup.events"); +} + +int memcg_prepare_for_wait(const char *cgroup) +{ + return __prepare_for_wait(cgroup, "memory.events"); +} + int cg_wait_for(int fd) { int ret = -1; diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 4f66d10626d2..628738532ac9 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -55,4 +55,5 @@ extern int clone_reap(pid_t pid, int options); extern int clone_into_cgroup_run_wait(const char *cgroup); extern int dirfd_open_opath(const char *dir); extern int cg_prepare_for_wait(const char *cgroup); +extern int memcg_prepare_for_wait(const char *cgroup); extern int cg_wait_for(int fd); diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index c19a97dd02d4..36ccf2322e21 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -16,6 +16,7 @@ #include <netinet/in.h> #include <netdb.h> #include <errno.h> +#include <sys/mman.h> #include "../kselftest.h" #include "cgroup_util.h" @@ -628,6 +629,82 @@ cleanup: return ret; } +static int alloc_anon_mlock(const char *cgroup, void *arg) +{ + size_t size = (size_t)arg; + void *buf; + + buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + 0, 0); + if (buf == MAP_FAILED) + return -1; + + mlock(buf, size); + munmap(buf, size); + return 0; +} + +/* + * This test checks that memory.high is able to throttle big single shot + * allocation i.e. large allocation within one kernel entry. + */ +static int test_memcg_high_sync(const char *root) +{ + int ret = KSFT_FAIL, pid, fd = -1; + char *memcg; + long pre_high, pre_max; + long post_high, post_max; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + pre_high = cg_read_key_long(memcg, "memory.events", "high "); + pre_max = cg_read_key_long(memcg, "memory.events", "max "); + if (pre_high < 0 || pre_max < 0) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "0")) + goto cleanup; + + if (cg_write(memcg, "memory.high", "30M")) + goto cleanup; + + if (cg_write(memcg, "memory.max", "140M")) + goto cleanup; + + fd = memcg_prepare_for_wait(memcg); + if (fd < 0) + goto cleanup; + + pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200)); + if (pid < 0) + goto cleanup; + + cg_wait_for(fd); + + post_high = cg_read_key_long(memcg, "memory.events", "high "); + post_max = cg_read_key_long(memcg, "memory.events", "max "); + if (post_high < 0 || post_max < 0) + goto cleanup; + + if (pre_high == post_high || pre_max != post_max) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (fd >= 0) + close(fd); + cg_destroy(memcg); + free(memcg); + + return ret; +} + /* * This test checks that memory.max limits the amount of * memory which can be consumed by either anonymous memory @@ -1180,6 +1257,7 @@ struct memcg_test { T(test_memcg_min), T(test_memcg_low), T(test_memcg_high), + T(test_memcg_high_sync), T(test_memcg_max), T(test_memcg_oom_events), T(test_memcg_swap_max), diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index 076cf4325f78..cd4582129c7d 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -126,8 +126,6 @@ static void test_clone3(uint64_t flags, size_t size, int expected, int main(int argc, char *argv[]) { - pid_t pid; - uid_t uid = getuid(); ksft_print_header(); diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh index 31f8c9a76c5f..60ce18ed0666 100755 --- a/tools/testing/selftests/cpufreq/main.sh +++ b/tools/testing/selftests/cpufreq/main.sh @@ -194,5 +194,5 @@ prerequisite # Run requested functions clear_dumps $OUTFILE -do_test >> $OUTFILE.txt +do_test | tee -a $OUTFILE.txt dmesg_dumps $OUTFILE diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 937d36ae9a69..0470c5f3e690 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -6,5 +6,6 @@ TEST_GEN_FILES += huge_count_read_write TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh +TEST_PROGS += sysfs.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh new file mode 100644 index 000000000000..2e3ae77cb6db --- /dev/null +++ b/tools/testing/selftests/damon/sysfs.sh @@ -0,0 +1,306 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest frmework requirement - SKIP code is 4. +ksft_skip=4 + +ensure_write_succ() +{ + file=$1 + content=$2 + reason=$3 + + if ! echo "$content" > "$file" + then + echo "writing $content to $file failed" + echo "expected success because $reason" + exit 1 + fi +} + +ensure_write_fail() +{ + file=$1 + content=$2 + reason=$3 + + if echo "$content" > "$file" + then + echo "writing $content to $file succeed ($fail_reason)" + echo "expected failure because $reason" + exit 1 + fi +} + +ensure_dir() +{ + dir=$1 + to_ensure=$2 + if [ "$to_ensure" = "exist" ] && [ ! -d "$dir" ] + then + echo "$dir dir is expected but not found" + exit 1 + elif [ "$to_ensure" = "not_exist" ] && [ -d "$dir" ] + then + echo "$dir dir is not expected but found" + exit 1 + fi +} + +ensure_file() +{ + file=$1 + to_ensure=$2 + permission=$3 + if [ "$to_ensure" = "exist" ] + then + if [ ! -f "$file" ] + then + echo "$file is expected but not found" + exit 1 + fi + perm=$(stat -c "%a" "$file") + if [ ! "$perm" = "$permission" ] + then + echo "$file permission: expected $permission but $perm" + exit 1 + fi + elif [ "$to_ensure" = "not_exist" ] && [ -f "$dir" ] + then + echo "$file is not expected but found" + exit 1 + fi +} + +test_range() +{ + range_dir=$1 + ensure_dir "$range_dir" "exist" + ensure_file "$range_dir/min" "exist" 600 + ensure_file "$range_dir/max" "exist" 600 +} + +test_stats() +{ + stats_dir=$1 + ensure_dir "$stats_dir" "exist" + for f in nr_tried sz_tried nr_applied sz_applied qt_exceeds + do + ensure_file "$stats_dir/$f" "exist" "400" + done +} + +test_watermarks() +{ + watermarks_dir=$1 + ensure_dir "$watermarks_dir" "exist" + ensure_file "$watermarks_dir/metric" "exist" "600" + ensure_file "$watermarks_dir/interval_us" "exist" "600" + ensure_file "$watermarks_dir/high" "exist" "600" + ensure_file "$watermarks_dir/mid" "exist" "600" + ensure_file "$watermarks_dir/low" "exist" "600" +} + +test_weights() +{ + weights_dir=$1 + ensure_dir "$weights_dir" "exist" + ensure_file "$weights_dir/sz_permil" "exist" "600" + ensure_file "$weights_dir/nr_accesses_permil" "exist" "600" + ensure_file "$weights_dir/age_permil" "exist" "600" +} + +test_quotas() +{ + quotas_dir=$1 + ensure_dir "$quotas_dir" "exist" + ensure_file "$quotas_dir/ms" "exist" 600 + ensure_file "$quotas_dir/bytes" "exist" 600 + ensure_file "$quotas_dir/reset_interval_ms" "exist" 600 + test_weights "$quotas_dir/weights" +} + +test_access_pattern() +{ + access_pattern_dir=$1 + ensure_dir "$access_pattern_dir" "exist" + test_range "$access_pattern_dir/age" + test_range "$access_pattern_dir/nr_accesses" + test_range "$access_pattern_dir/sz" +} + +test_scheme() +{ + scheme_dir=$1 + ensure_dir "$scheme_dir" "exist" + ensure_file "$scheme_dir/action" "exist" "600" + test_access_pattern "$scheme_dir/access_pattern" + test_quotas "$scheme_dir/quotas" + test_watermarks "$scheme_dir/watermarks" + test_stats "$scheme_dir/stats" +} + +test_schemes() +{ + schemes_dir=$1 + ensure_dir "$schemes_dir" "exist" + ensure_file "$schemes_dir/nr_schemes" "exist" 600 + + ensure_write_succ "$schemes_dir/nr_schemes" "1" "valid input" + test_scheme "$schemes_dir/0" + + ensure_write_succ "$schemes_dir/nr_schemes" "2" "valid input" + test_scheme "$schemes_dir/0" + test_scheme "$schemes_dir/1" + + ensure_write_succ "$schemes_dir/nr_schemes" "0" "valid input" + ensure_dir "$schemes_dir/0" "not_exist" + ensure_dir "$schemes_dir/1" "not_exist" +} + +test_region() +{ + region_dir=$1 + ensure_dir "$region_dir" "exist" + ensure_file "$region_dir/start" "exist" 600 + ensure_file "$region_dir/end" "exist" 600 +} + +test_regions() +{ + regions_dir=$1 + ensure_dir "$regions_dir" "exist" + ensure_file "$regions_dir/nr_regions" "exist" 600 + + ensure_write_succ "$regions_dir/nr_regions" "1" "valid input" + test_region "$regions_dir/0" + + ensure_write_succ "$regions_dir/nr_regions" "2" "valid input" + test_region "$regions_dir/0" + test_region "$regions_dir/1" + + ensure_write_succ "$regions_dir/nr_regions" "0" "valid input" + ensure_dir "$regions_dir/0" "not_exist" + ensure_dir "$regions_dir/1" "not_exist" +} + +test_target() +{ + target_dir=$1 + ensure_dir "$target_dir" "exist" + ensure_file "$target_dir/pid_target" "exist" "600" + test_regions "$target_dir/regions" +} + +test_targets() +{ + targets_dir=$1 + ensure_dir "$targets_dir" "exist" + ensure_file "$targets_dir/nr_targets" "exist" 600 + + ensure_write_succ "$targets_dir/nr_targets" "1" "valid input" + test_target "$targets_dir/0" + + ensure_write_succ "$targets_dir/nr_targets" "2" "valid input" + test_target "$targets_dir/0" + test_target "$targets_dir/1" + + ensure_write_succ "$targets_dir/nr_targets" "0" "valid input" + ensure_dir "$targets_dir/0" "not_exist" + ensure_dir "$targets_dir/1" "not_exist" +} + +test_intervals() +{ + intervals_dir=$1 + ensure_dir "$intervals_dir" "exist" + ensure_file "$intervals_dir/aggr_us" "exist" "600" + ensure_file "$intervals_dir/sample_us" "exist" "600" + ensure_file "$intervals_dir/update_us" "exist" "600" +} + +test_monitoring_attrs() +{ + monitoring_attrs_dir=$1 + ensure_dir "$monitoring_attrs_dir" "exist" + test_intervals "$monitoring_attrs_dir/intervals" + test_range "$monitoring_attrs_dir/nr_regions" +} + +test_context() +{ + context_dir=$1 + ensure_dir "$context_dir" "exist" + ensure_file "$context_dir/operations" "exist" 600 + test_monitoring_attrs "$context_dir/monitoring_attrs" + test_targets "$context_dir/targets" + test_schemes "$context_dir/schemes" +} + +test_contexts() +{ + contexts_dir=$1 + ensure_dir "$contexts_dir" "exist" + ensure_file "$contexts_dir/nr_contexts" "exist" 600 + + ensure_write_succ "$contexts_dir/nr_contexts" "1" "valid input" + test_context "$contexts_dir/0" + + ensure_write_fail "$contexts_dir/nr_contexts" "2" "only 0/1 are supported" + test_context "$contexts_dir/0" + + ensure_write_succ "$contexts_dir/nr_contexts" "0" "valid input" + ensure_dir "$contexts_dir/0" "not_exist" +} + +test_kdamond() +{ + kdamond_dir=$1 + ensure_dir "$kdamond_dir" "exist" + ensure_file "$kdamond_dir/state" "exist" "600" + ensure_file "$kdamond_dir/pid" "exist" 400 + test_contexts "$kdamond_dir/contexts" +} + +test_kdamonds() +{ + kdamonds_dir=$1 + ensure_dir "$kdamonds_dir" "exist" + + ensure_file "$kdamonds_dir/nr_kdamonds" "exist" "600" + + ensure_write_succ "$kdamonds_dir/nr_kdamonds" "1" "valid input" + test_kdamond "$kdamonds_dir/0" + + ensure_write_succ "$kdamonds_dir/nr_kdamonds" "2" "valid input" + test_kdamond "$kdamonds_dir/0" + test_kdamond "$kdamonds_dir/1" + + ensure_write_succ "$kdamonds_dir/nr_kdamonds" "0" "valid input" + ensure_dir "$kdamonds_dir/0" "not_exist" + ensure_dir "$kdamonds_dir/1" "not_exist" +} + +test_damon_sysfs() +{ + damon_sysfs=$1 + if [ ! -d "$damon_sysfs" ] + then + echo "$damon_sysfs not found" + exit $ksft_skip + fi + + test_kdamonds "$damon_sysfs/kdamonds" +} + +check_dependencies() +{ + if [ $EUID -ne 0 ] + then + echo "Run as root" + exit $ksft_skip + fi +} + +check_dependencies +test_damon_sysfs "/sys/kernel/mm/damon/admin" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index bcb110e830ce..dea33dc93790 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -50,8 +50,8 @@ for current_test in ${TESTS:-$ALL_TESTS}; do else log_test "'$current_test' [$profile] overflow $target" fi + RET_FIN=$(( RET_FIN || RET )) done - RET_FIN=$(( RET_FIN || RET )) done done current_test="" diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh index 3e3e06ea5703..86e787895f78 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh @@ -60,7 +60,8 @@ __tc_police_test() tc_police_rules_create $count $should_fail - offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l) + offload_count=$(tc -j filter show dev $swp1 ingress | + jq "[.[] | select(.options.in_hw == true)] | length") ((offload_count == count)) check_err_fail $should_fail $? "tc police offload count" } diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index dd61118df66e..a89ba6de7987 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,13 +3,14 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE -TEST_PROGS := binfmt_script non-regular -TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 -TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe +TEST_PROGS := binfmt_script +TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular +TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile TEST_GEN_PROGS += recursion-depth +TEST_GEN_PROGS += null-argv EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \ $(OUTPUT)/S_I*.test diff --git a/tools/testing/selftests/exec/null-argv.c b/tools/testing/selftests/exec/null-argv.c new file mode 100644 index 000000000000..c19726e710d1 --- /dev/null +++ b/tools/testing/selftests/exec/null-argv.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Test that empty argvs are swapped out for a single empty string. */ +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "../kselftest.h" + +#define FORK(exec) \ +do { \ + pid = fork(); \ + if (pid == 0) { \ + /* Child */ \ + exec; /* Some kind of exec */ \ + perror("# " #exec); \ + return 1; \ + } \ + check_result(pid, #exec); \ +} while (0) + +void check_result(pid_t pid, const char *msg) +{ + int wstatus; + + if (pid == (pid_t)-1) { + perror("# fork"); + ksft_test_result_fail("fork failed: %s\n", msg); + return; + } + if (waitpid(pid, &wstatus, 0) < 0) { + perror("# waitpid"); + ksft_test_result_fail("waitpid failed: %s\n", msg); + return; + } + if (!WIFEXITED(wstatus)) { + ksft_test_result_fail("child did not exit: %s\n", msg); + return; + } + if (WEXITSTATUS(wstatus) != 0) { + ksft_test_result_fail("non-zero exit: %s\n", msg); + return; + } + ksft_test_result_pass("%s\n", msg); +} + +int main(int argc, char *argv[], char *envp[]) +{ + pid_t pid; + static char * const args[] = { NULL }; + static char * const str[] = { "", NULL }; + + /* argc counting checks */ + if (argc < 1) { + fprintf(stderr, "# FAIL: saw argc == 0 (old kernel?)\n"); + return 1; + } + if (argc != 1) { + fprintf(stderr, "# FAIL: unknown argc (%d)\n", argc); + return 1; + } + if (argv[0][0] == '\0') { + /* Good, we found a NULL terminated string at argv[0]! */ + return 0; + } + + /* Test runner. */ + ksft_print_header(); + ksft_set_plan(5); + + FORK(execve(argv[0], str, NULL)); + FORK(execve(argv[0], NULL, NULL)); + FORK(execve(argv[0], NULL, envp)); + FORK(execve(argv[0], args, NULL)); + FORK(execve(argv[0], args, envp)); + + ksft_exit(ksft_cnt.ksft_pass == ksft_plan); +} diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index e96e279e0533..25432b8cd5bd 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -19,7 +19,7 @@ fail() { # mesg FILTER=set_ftrace_filter FUNC1="schedule" -FUNC2="do_softirq" +FUNC2="scheduler_tick" ALL_FUNCS="#### all functions enabled ####" diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index 12631f0076a1..11e157d7533b 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -11,7 +11,7 @@ all: @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ if [ -e $$DIR/$(TEST_PROGS) ]; then \ rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \ fi \ @@ -32,6 +32,6 @@ override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ done endef diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore index a4969f7ee020..ededb077a3a6 100644 --- a/tools/testing/selftests/gpio/.gitignore +++ b/tools/testing/selftests/gpio/.gitignore @@ -1,2 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only gpio-mockup-cdev +gpio-chip-info +gpio-line-name diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index d7b312b44a62..71b306602368 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := gpio-mockup.sh +TEST_PROGS := gpio-mockup.sh gpio-sim.sh TEST_FILES := gpio-mockup-sysfs.sh -TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev +TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name CFLAGS += -O2 -g -Wall -I../../../../usr/include/ include ../lib.mk diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config index ce100342c20b..409a8532facc 100644 --- a/tools/testing/selftests/gpio/config +++ b/tools/testing/selftests/gpio/config @@ -1,3 +1,4 @@ CONFIG_GPIOLIB=y CONFIG_GPIO_CDEV=y CONFIG_GPIO_MOCKUP=m +CONFIG_GPIO_SIM=m diff --git a/tools/testing/selftests/gpio/gpio-chip-info.c b/tools/testing/selftests/gpio/gpio-chip-info.c new file mode 100644 index 000000000000..fdc07e742fba --- /dev/null +++ b/tools/testing/selftests/gpio/gpio-chip-info.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * GPIO character device helper for reading chip information. + * + * Copyright (C) 2021 Bartosz Golaszewski <[email protected]> + */ + +#include <fcntl.h> +#include <linux/gpio.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/types.h> + +static void print_usage(void) +{ + printf("usage:\n"); + printf(" gpio-chip-info <chip path> [name|label|num-lines]\n"); +} + +int main(int argc, char **argv) +{ + struct gpiochip_info info; + int fd, ret; + + if (argc != 3) { + print_usage(); + return EXIT_FAILURE; + } + + fd = open(argv[1], O_RDWR); + if (fd < 0) { + perror("unable to open the GPIO chip"); + return EXIT_FAILURE; + } + + memset(&info, 0, sizeof(info)); + ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, &info); + if (ret) { + perror("chip info ioctl failed"); + return EXIT_FAILURE; + } + + if (strcmp(argv[2], "name") == 0) { + printf("%s\n", info.name); + } else if (strcmp(argv[2], "label") == 0) { + printf("%s\n", info.label); + } else if (strcmp(argv[2], "num-lines") == 0) { + printf("%u\n", info.lines); + } else { + fprintf(stderr, "unknown command: %s\n", argv[2]); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/gpio/gpio-line-name.c b/tools/testing/selftests/gpio/gpio-line-name.c new file mode 100644 index 000000000000..e635cfadbded --- /dev/null +++ b/tools/testing/selftests/gpio/gpio-line-name.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * GPIO character device helper for reading line names. + * + * Copyright (C) 2021 Bartosz Golaszewski <[email protected]> + */ + +#include <fcntl.h> +#include <linux/gpio.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/types.h> + +static void print_usage(void) +{ + printf("usage:\n"); + printf(" gpio-line-name <chip path> <line offset>\n"); +} + +int main(int argc, char **argv) +{ + struct gpio_v2_line_info info; + int fd, ret; + char *endp; + + if (argc != 3) { + print_usage(); + return EXIT_FAILURE; + } + + fd = open(argv[1], O_RDWR); + if (fd < 0) { + perror("unable to open the GPIO chip"); + return EXIT_FAILURE; + } + + memset(&info, 0, sizeof(info)); + info.offset = strtoul(argv[2], &endp, 10); + if (*endp != '\0') { + print_usage(); + return EXIT_FAILURE; + } + + ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &info); + if (ret) { + perror("line info ioctl failed"); + return EXIT_FAILURE; + } + + printf("%s\n", info.name); + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh new file mode 100755 index 000000000000..341e3de00896 --- /dev/null +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -0,0 +1,396 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 Bartosz Golaszewski <[email protected]> + +BASE_DIR=`dirname $0` +CONFIGFS_DIR="/sys/kernel/config/gpio-sim" +MODULE="gpio-sim" + +fail() { + echo "$*" >&2 + echo "GPIO $MODULE test FAIL" + exit 1 +} + +skip() { + echo "$*" >&2 + echo "GPIO $MODULE test SKIP" + exit 4 +} + +remove_chip() { + local CHIP=$1 + + for FILE in $CONFIGFS_DIR/$CHIP/*; do + BANK=`basename $FILE` + if [ "$BANK" = "live" -o "$BANK" = "dev_name" ]; then + continue + fi + + LINES=`ls $CONFIGFS_DIR/$CHIP/$BANK/ | egrep ^line` + if [ "$?" = 0 ]; then + for LINE in $LINES; do + if [ -e $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog ]; then + rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog || \ + fail "Unable to remove the hog" + fi + + rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE || \ + fail "Unable to remove the line" + done + fi + + rmdir $CONFIGFS_DIR/$CHIP/$BANK + done + + rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip" +} + +configfs_cleanup() { + for CHIP in `ls $CONFIGFS_DIR/`; do + remove_chip $CHIP + done +} + +create_chip() { + local CHIP=$1 + + mkdir $CONFIGFS_DIR/$CHIP +} + +create_bank() { + local CHIP=$1 + local BANK=$2 + + mkdir $CONFIGFS_DIR/$CHIP/$BANK +} + +set_label() { + local CHIP=$1 + local BANK=$2 + local LABEL=$3 + + echo $LABEL > $CONFIGFS_DIR/$CHIP/$BANK/label || fail "Unable to set the chip label" +} + +set_num_lines() { + local CHIP=$1 + local BANK=$2 + local NUM_LINES=$3 + + echo $NUM_LINES > $CONFIGFS_DIR/$CHIP/$BANK/num_lines || \ + fail "Unable to set the number of lines" +} + +set_line_name() { + local CHIP=$1 + local BANK=$2 + local OFFSET=$3 + local NAME=$4 + local LINE_DIR=$CONFIGFS_DIR/$CHIP/$BANK/line$OFFSET + + test -d $LINE_DIR || mkdir $LINE_DIR + echo $NAME > $LINE_DIR/name || fail "Unable to set the line name" +} + +enable_chip() { + local CHIP=$1 + + echo 1 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to enable the chip" +} + +disable_chip() { + local CHIP=$1 + + echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip" +} + +configfs_chip_name() { + local CHIP=$1 + local BANK=$2 + + cat $CONFIGFS_DIR/$CHIP/$BANK/chip_name 2> /dev/null || \ + fail "unable to read the chip name from configfs" +} + +configfs_dev_name() { + local CHIP=$1 + + cat $CONFIGFS_DIR/$CHIP/dev_name 2> /dev/null || \ + fail "unable to read the device name from configfs" +} + +get_chip_num_lines() { + local CHIP=$1 + local BANK=$2 + + $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` num-lines || \ + fail "unable to read the number of lines from the character device" +} + +get_chip_label() { + local CHIP=$1 + local BANK=$2 + + $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` label || \ + fail "unable to read the chip label from the character device" +} + +get_line_name() { + local CHIP=$1 + local BANK=$2 + local OFFSET=$3 + + $BASE_DIR/gpio-line-name /dev/`configfs_chip_name $CHIP $BANK` $OFFSET || \ + fail "unable to read the line name from the character device" +} + +sysfs_set_pull() { + local DEV=$1 + local BANK=$2 + local OFFSET=$3 + local PULL=$4 + local DEVNAME=`configfs_dev_name $DEV` + local CHIPNAME=`configfs_chip_name $DEV $BANK` + local SYSFSPATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio$OFFSET/pull" + + echo $PULL > $SYSFSPATH || fail "Unable to set line pull in sysfs" +} + +# Load the gpio-sim module. This will pull in configfs if needed too. +modprobe gpio-sim || skip "unable to load the gpio-sim module" +# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed. +for IDX in `seq 5`; do + if [ "$IDX" -eq "5" ]; then + skip "configfs not mounted at /sys/kernel/config" + fi + + mountpoint -q /sys/kernel/config && break + sleep 0.1 +done +# If the module was already loaded: remove all previous chips +configfs_cleanup + +trap "exit 1" SIGTERM SIGINT +trap configfs_cleanup EXIT + +echo "1. chip_name and dev_name attributes" + +echo "1.1. Chip name is communicated to user" +create_chip chip +create_bank chip bank +enable_chip chip +test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work" +remove_chip chip + +echo "1.2. chip_name returns 'none' if the chip is still pending" +create_chip chip +create_bank chip bank +test "`cat $CONFIGFS_DIR/chip/bank/chip_name`" = "none" || \ + fail "chip_name doesn't return 'none' for a pending chip" +remove_chip chip + +echo "1.3. Device name is communicated to user" +create_chip chip +create_bank chip bank +enable_chip chip +test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work" +remove_chip chip + +echo "2. Creating and configuring simulated chips" + +echo "2.1. Default number of lines is 1" +create_chip chip +create_bank chip bank +enable_chip chip +test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1" +remove_chip chip + +echo "2.2. Number of lines can be specified" +create_chip chip +create_bank chip bank +set_num_lines chip bank 16 +enable_chip chip +test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16" +remove_chip chip + +echo "2.3. Label can be set" +create_chip chip +create_bank chip bank +set_label chip bank foobar +enable_chip chip +test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect" +remove_chip chip + +echo "2.4. Label can be left empty" +create_chip chip +create_bank chip bank +enable_chip chip +test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty" +remove_chip chip + +echo "2.5. Line names can be configured" +create_chip chip +create_bank chip bank +set_num_lines chip bank 16 +set_line_name chip bank 0 foo +set_line_name chip bank 2 bar +enable_chip chip +test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect" +test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect" +remove_chip chip + +echo "2.6. Line config can remain unused if offset is greater than number of lines" +create_chip chip +create_bank chip bank +set_num_lines chip bank 2 +set_line_name chip bank 5 foobar +enable_chip chip +test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect" +test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect" +remove_chip chip + +echo "2.7. Line configfs directory names are sanitized" +create_chip chip +create_bank chip bank +mkdir $CONFIGFS_DIR/chip/bank/line12foobar 2> /dev/null && \ + fail "invalid configfs line name accepted" +mkdir $CONFIGFS_DIR/chip/bank/line_no_offset 2> /dev/null && \ + fail "invalid configfs line name accepted" +remove_chip chip + +echo "2.8. Multiple chips can be created" +CHIPS="chip0 chip1 chip2" +for CHIP in $CHIPS; do + create_chip $CHIP + create_bank $CHIP bank + enable_chip $CHIP +done +for CHIP in $CHIPS; do + remove_chip $CHIP +done + +echo "2.9. Can't modify settings when chip is live" +create_chip chip +create_bank chip bank +enable_chip chip +echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \ + fail "Setting label of a live chip should fail" +echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \ + fail "Setting number of lines of a live chip should fail" +remove_chip chip + +echo "2.10. Can't create line items when chip is live" +create_chip chip +create_bank chip bank +enable_chip chip +mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail" +remove_chip chip + +echo "2.11. Probe errors are propagated to user-space" +create_chip chip +create_bank chip bank +set_num_lines chip bank 99999 +echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Probe error was not propagated" +remove_chip chip + +echo "2.12. Cannot enable a chip without any GPIO banks" +create_chip chip +echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Chip enabled without any GPIO banks" +remove_chip chip + +echo "2.13. Duplicate chip labels are not allowed" +create_chip chip +create_bank chip bank0 +set_label chip bank0 foobar +create_bank chip bank1 +set_label chip bank1 foobar +echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Duplicate chip labels were not rejected" +remove_chip chip + +echo "2.14. Lines can be hogged" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog +enable_chip chip +$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \ + fail "Setting the value of a hogged line shouldn't succeed" +remove_chip chip + +echo "3. Controlling simulated chips" + +echo "3.1. Pull can be set over sysfs" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +sysfs_set_pull chip bank 0 pull-up +$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 0 +test "$?" = "1" || fail "pull set incorrectly" +sysfs_set_pull chip bank 0 pull-down +$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1 +test "$?" = "0" || fail "pull set incorrectly" +remove_chip chip + +echo "3.2. Pull can be read from sysfs" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +DEVNAME=`configfs_dev_name chip` +CHIPNAME=`configfs_chip_name chip bank` +SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull +test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed" +sysfs_set_pull chip bank 0 pull-up +test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed" +remove_chip chip + +echo "3.3. Incorrect input in sysfs is rejected" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +DEVNAME=`configfs_dev_name chip` +CHIPNAME=`configfs_chip_name chip bank` +SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull" +echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected" +remove_chip chip + +echo "3.4. Can't write to value" +create_chip chip +create_bank chip bank +enable_chip chip +DEVNAME=`configfs_dev_name chip` +CHIPNAME=`configfs_chip_name chip bank` +SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" +echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly" +remove_chip chip + +echo "4. Simulated GPIO chips are functional" + +echo "4.1. Values can be read from sysfs" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +DEVNAME=`configfs_dev_name chip` +CHIPNAME=`configfs_chip_name chip bank` +SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" +test `cat $SYSFS_PATH` = "0" || fail "incorrect value read from sysfs" +$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 & +sleep 0.1 # FIXME Any better way? +test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs" +kill $! +remove_chip chip + +echo "4.2. Bias settings work correctly" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +$BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0 +test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work" +remove_chip chip + +echo "GPIO $MODULE test PASS" diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c index 06256c96df12..f4a15cbdd5ea 100644 --- a/tools/testing/selftests/ir/ir_loopback.c +++ b/tools/testing/selftests/ir/ir_loopback.c @@ -29,6 +29,16 @@ #define SYSFS_PATH_MAX 256 #define DNAME_PATH_MAX 256 +/* + * Support ancient lirc.h which does not have these values. Can be removed + * once RHEL 8 is no longer a relevant testing platform. + */ +#if RC_PROTO_MAX < 26 +#define RC_PROTO_RCMM12 24 +#define RC_PROTO_RCMM24 25 +#define RC_PROTO_RCMM32 26 +#endif + static const struct { enum rc_proto proto; const char *name; diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile index aa91d2063249..806a150648c3 100644 --- a/tools/testing/selftests/kexec/Makefile +++ b/tools/testing/selftests/kexec/Makefile @@ -4,7 +4,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -ifeq ($(ARCH),x86) +ifeq ($(ARCH),$(filter $(ARCH),x86 ppc64le)) TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh TEST_FILES := kexec_common_lib.sh diff --git a/tools/testing/selftests/kexec/kexec_common_lib.sh b/tools/testing/selftests/kexec/kexec_common_lib.sh index 43017cfe88f7..0e114b34d5d7 100755 --- a/tools/testing/selftests/kexec/kexec_common_lib.sh +++ b/tools/testing/selftests/kexec/kexec_common_lib.sh @@ -91,6 +91,27 @@ get_efi_var_secureboot_mode() return 0; } +# On powerpc platform, check device-tree property +# /proc/device-tree/ibm,secureboot/os-secureboot-enforcing +# to detect secureboot state. +get_ppc64_secureboot_mode() +{ + local secure_boot_file="/proc/device-tree/ibm,secureboot/os-secureboot-enforcing" + # Check for secure boot file existence + if [ -f $secure_boot_file ]; then + log_info "Secureboot is enabled (Device tree)" + return 1; + fi + log_info "Secureboot is not enabled (Device tree)" + return 0; +} + +# Return the architecture of the system +get_arch() +{ + echo $(arch) +} + # Check efivar SecureBoot-$(the UUID) and SetupMode-$(the UUID). # The secure boot mode can be accessed either as the last integer # of "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*" or from @@ -100,14 +121,19 @@ get_efi_var_secureboot_mode() get_secureboot_mode() { local secureboot_mode=0 + local system_arch=$(get_arch) - get_efivarfs_secureboot_mode - secureboot_mode=$? - - # fallback to using the efi_var files - if [ $secureboot_mode -eq 0 ]; then - get_efi_var_secureboot_mode + if [ "$system_arch" == "ppc64le" ]; then + get_ppc64_secureboot_mode + secureboot_mode=$? + else + get_efivarfs_secureboot_mode secureboot_mode=$? + # fallback to using the efi_var files + if [ $secureboot_mode -eq 0 ]; then + get_efi_var_secureboot_mode + secureboot_mode=$? + fi fi if [ $secureboot_mode -eq 0 ]; then @@ -138,15 +164,20 @@ kconfig_enabled() return 0 } -# Attempt to get the kernel config first via proc, and then by -# extracting it from the kernel image or the configs.ko using -# scripts/extract-ikconfig. +# Attempt to get the kernel config first by checking the modules directory +# then via proc, and finally by extracting it from the kernel image or the +# configs.ko using scripts/extract-ikconfig. # Return 1 for found. get_kconfig() { local proc_config="/proc/config.gz" local module_dir="/lib/modules/`uname -r`" - local configs_module="$module_dir/kernel/kernel/configs.ko" + local configs_module="$module_dir/kernel/kernel/configs.ko*" + + if [ -f $module_dir/config ]; then + IKCONFIG=$module_dir/config + return 1 + fi if [ ! -f $proc_config ]; then modprobe configs > /dev/null 2>&1 diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh index 2ff600388c30..c9ccb3c93d72 100755 --- a/tools/testing/selftests/kexec/test_kexec_file_load.sh +++ b/tools/testing/selftests/kexec/test_kexec_file_load.sh @@ -97,10 +97,11 @@ check_for_imasig() check_for_modsig() { local module_sig_string="~Module signature appended~" - local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)" local ret=0 - if [ "$sig" == "$module_sig_string" ]; then + tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE | \ + grep -q "$module_sig_string" + if [ $? -eq 0 ]; then ret=1 log_info "kexec kernel image modsig signed" else @@ -225,8 +226,12 @@ get_secureboot_mode secureboot=$? # Are there pe and ima signatures -check_for_pesig -pe_signed=$? +if [ "$(get_arch)" == 'ppc64le' ]; then + pe_signed=0 +else + check_for_pesig + pe_signed=$? +fi check_for_imasig ima_signed=$? diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 471eaa7b3a3f..11779405dc80 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -877,7 +877,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true; - kill(t->pid, SIGKILL); + // signal process group + kill(-(t->pid), SIGKILL); } void __wait_for_test(struct __test_metadata *t) @@ -987,6 +988,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { + setpgrp(); t->fn(t, variant); if (t->skip) _exit(255); diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 3cb5ac5da087..dce7de7755e6 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -4,14 +4,16 @@ /aarch64/get-reg-list /aarch64/psci_cpu_on_test /aarch64/vgic_init +/aarch64/vgic_irq /s390x/memop /s390x/resets /s390x/sync_regs_test +/x86_64/amx_test +/x86_64/cpuid_test /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test /x86_64/emulator_error_test -/x86_64/get_cpuid_test /x86_64/get_msr_index_features /x86_64/kvm_clock_test /x86_64/kvm_pv_test @@ -21,6 +23,7 @@ /x86_64/mmio_warning_test /x86_64/mmu_role_test /x86_64/platform_info_test +/x86_64/pmu_event_filter_test /x86_64/set_boot_cpu_id /x86_64/set_sregs_test /x86_64/sev_migrate_tests @@ -35,6 +38,7 @@ /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test +/x86_64/vmx_exception_with_invalid_guest_state /x86_64/vmx_invalid_nested_guest_state /x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 17342b575e85..17c3f0749f05 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -32,17 +32,22 @@ endif ifeq ($(ARCH),s390) UNAME_M := s390x endif +# Set UNAME_M riscv compile/install to work +ifeq ($(ARCH),riscv) + UNAME_M := riscv +endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c +LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c -TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test +TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test -TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features @@ -51,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test @@ -64,6 +70,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test @@ -75,8 +82,9 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests +TEST_GEN_PROGS_x86_64 += x86_64/amx_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test @@ -96,6 +104,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test TEST_GEN_PROGS_aarch64 += aarch64/vgic_init +TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test @@ -119,6 +128,13 @@ TEST_GEN_PROGS_s390x += rseq_test TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS_s390x += kvm_binary_stats_test +TEST_GEN_PROGS_riscv += demand_paging_test +TEST_GEN_PROGS_riscv += dirty_log_test +TEST_GEN_PROGS_riscv += kvm_create_max_vcpus +TEST_GEN_PROGS_riscv += kvm_page_table_test +TEST_GEN_PROGS_riscv += set_memory_region_test +TEST_GEN_PROGS_riscv += kvm_binary_stats_test + TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) @@ -133,7 +149,7 @@ endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$(<D) -Iinclude/$(UNAME_M) -I.. + -I$(<D) -Iinclude/$(UNAME_M) -I.. $(EXTRA_CFLAGS) no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index bf6a45b0b8dc..b08d30bf71c5 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -366,6 +366,7 @@ static struct kvm_vm *test_vm_create(void) { struct kvm_vm *vm; unsigned int i; + int ret; int nr_vcpus = test_args.nr_vcpus; vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); @@ -382,7 +383,11 @@ static struct kvm_vm *test_vm_create(void) ucall_init(vm, NULL); test_init_timer_irq(vm); - vgic_v3_setup(vm, nr_vcpus, GICD_BASE_GPA, GICR_BASE_GPA); + ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + if (ret < 0) { + print_skip("Failed to create vgic-v3"); + exit(KSFT_SKIP); + } /* Make all the test's cmdline args visible to the guest */ sync_global_to_guest(vm, test_args); diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index cc898181faab..f769fc6cd927 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -1014,6 +1014,22 @@ static __u64 sve_rejects_set[] = { KVM_REG_ARM64_SVE_VLS, }; +static __u64 pauth_addr_regs[] = { + ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */ + ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */ + ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */ + ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */ +}; + +static __u64 pauth_generic_regs[] = { + ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */ + ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */ +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -1025,6 +1041,21 @@ static __u64 sve_rejects_set[] = { { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), } +#define PAUTH_SUBLIST \ + { \ + .name = "pauth_address", \ + .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \ + .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \ + .regs = pauth_addr_regs, \ + .regs_n = ARRAY_SIZE(pauth_addr_regs), \ + }, \ + { \ + .name = "pauth_generic", \ + .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \ + .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \ + .regs = pauth_generic_regs, \ + .regs_n = ARRAY_SIZE(pauth_generic_regs), \ + } static struct vcpu_config vregs_config = { .sublists = { @@ -1056,11 +1087,30 @@ static struct vcpu_config sve_pmu_config = { {0}, }, }; +static struct vcpu_config pauth_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; +static struct vcpu_config pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; static struct vcpu_config *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, &sve_config, &sve_pmu_config, + &pauth_config, + &pauth_pmu_config, }; static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c new file mode 100644 index 000000000000..7eca97799917 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -0,0 +1,857 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vgic_irq.c - Test userspace injection of IRQs + * + * This test validates the injection of IRQs from userspace using various + * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the + * host to inject a specific intid via a GUEST_SYNC call, and then checks that + * it received it. + */ + +#include <asm/kvm.h> +#include <asm/kvm_para.h> +#include <sys/eventfd.h> +#include <linux/sizes.h> + +#include "processor.h" +#include "test_util.h" +#include "kvm_util.h" +#include "gic.h" +#include "gic_v3.h" +#include "vgic.h" + +#define GICD_BASE_GPA 0x08000000ULL +#define GICR_BASE_GPA 0x080A0000ULL +#define VCPU_ID 0 + +/* + * Stores the user specified args; it's passed to the guest and to every test + * function. + */ +struct test_args { + uint32_t nr_irqs; /* number of KVM supported IRQs. */ + bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */ + bool level_sensitive; /* 1 is level, 0 is edge */ + int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */ + bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */ +}; + +/* + * KVM implements 32 priority levels: + * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8 + * + * Note that these macros will still be correct in the case that KVM implements + * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2. + */ +#define KVM_NUM_PRIOS 32 +#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */ +#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */ +#define LOWEST_PRIO (KVM_NUM_PRIOS - 1) +#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */ +#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1) +#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */ + +static void *dist = (void *)GICD_BASE_GPA; +static void *redist = (void *)GICR_BASE_GPA; + +/* + * The kvm_inject_* utilities are used by the guest to ask the host to inject + * interrupts (e.g., using the KVM_IRQ_LINE ioctl). + */ + +typedef enum { + KVM_INJECT_EDGE_IRQ_LINE = 1, + KVM_SET_IRQ_LINE, + KVM_SET_IRQ_LINE_HIGH, + KVM_SET_LEVEL_INFO_HIGH, + KVM_INJECT_IRQFD, + KVM_WRITE_ISPENDR, + KVM_WRITE_ISACTIVER, +} kvm_inject_cmd; + +struct kvm_inject_args { + kvm_inject_cmd cmd; + uint32_t first_intid; + uint32_t num; + int level; + bool expect_failure; +}; + +/* Used on the guest side to perform the hypercall. */ +static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, + uint32_t num, int level, bool expect_failure); + +/* Used on the host side to get the hypercall info. */ +static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, + struct kvm_inject_args *args); + +#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \ + kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure) + +#define KVM_INJECT_MULTI(cmd, intid, num) \ + _KVM_INJECT_MULTI(cmd, intid, num, false) + +#define _KVM_INJECT(cmd, intid, expect_failure) \ + _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure) + +#define KVM_INJECT(cmd, intid) \ + _KVM_INJECT_MULTI(cmd, intid, 1, false) + +#define KVM_ACTIVATE(cmd, intid) \ + kvm_inject_call(cmd, intid, 1, 1, false); + +struct kvm_inject_desc { + kvm_inject_cmd cmd; + /* can inject PPIs, PPIs, and/or SPIs. */ + bool sgi, ppi, spi; +}; + +static struct kvm_inject_desc inject_edge_fns[] = { + /* sgi ppi spi */ + { KVM_INJECT_EDGE_IRQ_LINE, false, false, true }, + { KVM_INJECT_IRQFD, false, false, true }, + { KVM_WRITE_ISPENDR, true, false, true }, + { 0, }, +}; + +static struct kvm_inject_desc inject_level_fns[] = { + /* sgi ppi spi */ + { KVM_SET_IRQ_LINE_HIGH, false, true, true }, + { KVM_SET_LEVEL_INFO_HIGH, false, true, true }, + { KVM_INJECT_IRQFD, false, false, true }, + { KVM_WRITE_ISPENDR, false, true, true }, + { 0, }, +}; + +static struct kvm_inject_desc set_active_fns[] = { + /* sgi ppi spi */ + { KVM_WRITE_ISACTIVER, true, true, true }, + { 0, }, +}; + +#define for_each_inject_fn(t, f) \ + for ((f) = (t); (f)->cmd; (f)++) + +#define for_each_supported_inject_fn(args, t, f) \ + for_each_inject_fn(t, f) \ + if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD) + +#define for_each_supported_activate_fn(args, t, f) \ + for_each_supported_inject_fn((args), (t), (f)) + +/* Shared between the guest main thread and the IRQ handlers. */ +volatile uint64_t irq_handled; +volatile uint32_t irqnr_received[MAX_SPI + 1]; + +static void reset_stats(void) +{ + int i; + + irq_handled = 0; + for (i = 0; i <= MAX_SPI; i++) + irqnr_received[i] = 0; +} + +static uint64_t gic_read_ap1r0(void) +{ + uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1); + + dsb(sy); + return reg; +} + +static void gic_write_ap1r0(uint64_t val) +{ + write_sysreg_s(val, SYS_ICV_AP1R0_EL1); + isb(); +} + +static void guest_set_irq_line(uint32_t intid, uint32_t level); + +static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive) +{ + uint32_t intid = gic_get_and_ack_irq(); + + if (intid == IAR_SPURIOUS) + return; + + GUEST_ASSERT(gic_irq_get_active(intid)); + + if (!level_sensitive) + GUEST_ASSERT(!gic_irq_get_pending(intid)); + + if (level_sensitive) + guest_set_irq_line(intid, 0); + + GUEST_ASSERT(intid < MAX_SPI); + irqnr_received[intid] += 1; + irq_handled += 1; + + gic_set_eoi(intid); + GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); + if (eoi_split) + gic_set_dir(intid); + + GUEST_ASSERT(!gic_irq_get_active(intid)); + GUEST_ASSERT(!gic_irq_get_pending(intid)); +} + +static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, + uint32_t num, int level, bool expect_failure) +{ + struct kvm_inject_args args = { + .cmd = cmd, + .first_intid = first_intid, + .num = num, + .level = level, + .expect_failure = expect_failure, + }; + GUEST_SYNC(&args); +} + +#define GUEST_ASSERT_IAR_EMPTY() \ +do { \ + uint32_t _intid; \ + _intid = gic_get_and_ack_irq(); \ + GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \ +} while (0) + +#define CAT_HELPER(a, b) a ## b +#define CAT(a, b) CAT_HELPER(a, b) +#define PREFIX guest_irq_handler_ +#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev)) +#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \ +static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \ +{ \ + guest_irq_generic_handler(split, lev); \ +} + +GENERATE_GUEST_IRQ_HANDLER(0, 0); +GENERATE_GUEST_IRQ_HANDLER(0, 1); +GENERATE_GUEST_IRQ_HANDLER(1, 0); +GENERATE_GUEST_IRQ_HANDLER(1, 1); + +static void (*guest_irq_handlers[2][2])(struct ex_regs *) = { + {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),}, + {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),}, +}; + +static void reset_priorities(struct test_args *args) +{ + int i; + + for (i = 0; i < args->nr_irqs; i++) + gic_set_priority(i, IRQ_DEFAULT_PRIO_REG); +} + +static void guest_set_irq_line(uint32_t intid, uint32_t level) +{ + kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false); +} + +static void test_inject_fail(struct test_args *args, + uint32_t intid, kvm_inject_cmd cmd) +{ + reset_stats(); + + _KVM_INJECT(cmd, intid, true); + /* no IRQ to handle on entry */ + + GUEST_ASSERT_EQ(irq_handled, 0); + GUEST_ASSERT_IAR_EMPTY(); +} + +static void guest_inject(struct test_args *args, + uint32_t first_intid, uint32_t num, + kvm_inject_cmd cmd) +{ + uint32_t i; + + reset_stats(); + + /* Cycle over all priorities to make things more interesting. */ + for (i = first_intid; i < num + first_intid; i++) + gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3); + + asm volatile("msr daifset, #2" : : : "memory"); + KVM_INJECT_MULTI(cmd, first_intid, num); + + while (irq_handled < num) { + asm volatile("wfi\n" + "msr daifclr, #2\n" + /* handle IRQ */ + "msr daifset, #2\n" + : : : "memory"); + } + asm volatile("msr daifclr, #2" : : : "memory"); + + GUEST_ASSERT_EQ(irq_handled, num); + for (i = first_intid; i < num + first_intid; i++) + GUEST_ASSERT_EQ(irqnr_received[i], 1); + GUEST_ASSERT_IAR_EMPTY(); + + reset_priorities(args); +} + +/* + * Restore the active state of multiple concurrent IRQs (given by + * concurrent_irqs). This does what a live-migration would do on the + * destination side assuming there are some active IRQs that were not + * deactivated yet. + */ +static void guest_restore_active(struct test_args *args, + uint32_t first_intid, uint32_t num, + kvm_inject_cmd cmd) +{ + uint32_t prio, intid, ap1r; + int i; + + /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs + * in descending order, so intid+1 can preempt intid. + */ + for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) { + GUEST_ASSERT(prio >= 0); + intid = i + first_intid; + gic_set_priority(intid, prio); + } + + /* In a real migration, KVM would restore all GIC state before running + * guest code. + */ + for (i = 0; i < num; i++) { + intid = i + first_intid; + KVM_ACTIVATE(cmd, intid); + ap1r = gic_read_ap1r0(); + ap1r |= 1U << i; + gic_write_ap1r0(ap1r); + } + + /* This is where the "migration" would occur. */ + + /* finish handling the IRQs starting with the highest priority one. */ + for (i = 0; i < num; i++) { + intid = num - i - 1 + first_intid; + gic_set_eoi(intid); + if (args->eoi_split) + gic_set_dir(intid); + } + + for (i = 0; i < num; i++) + GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); + GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); + GUEST_ASSERT_IAR_EMPTY(); +} + +/* + * Polls the IAR until it's not a spurious interrupt. + * + * This function should only be used in test_inject_preemption (with IRQs + * masked). + */ +static uint32_t wait_for_and_activate_irq(void) +{ + uint32_t intid; + + do { + asm volatile("wfi" : : : "memory"); + intid = gic_get_and_ack_irq(); + } while (intid == IAR_SPURIOUS); + + return intid; +} + +/* + * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and + * handle them without handling the actual exceptions. This is done by masking + * interrupts for the whole test. + */ +static void test_inject_preemption(struct test_args *args, + uint32_t first_intid, int num, + kvm_inject_cmd cmd) +{ + uint32_t intid, prio, step = KVM_PRIO_STEPS; + int i; + + /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs + * in descending order, so intid+1 can preempt intid. + */ + for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) { + GUEST_ASSERT(prio >= 0); + intid = i + first_intid; + gic_set_priority(intid, prio); + } + + local_irq_disable(); + + for (i = 0; i < num; i++) { + uint32_t tmp; + intid = i + first_intid; + KVM_INJECT(cmd, intid); + /* Each successive IRQ will preempt the previous one. */ + tmp = wait_for_and_activate_irq(); + GUEST_ASSERT_EQ(tmp, intid); + if (args->level_sensitive) + guest_set_irq_line(intid, 0); + } + + /* finish handling the IRQs starting with the highest priority one. */ + for (i = 0; i < num; i++) { + intid = num - i - 1 + first_intid; + gic_set_eoi(intid); + if (args->eoi_split) + gic_set_dir(intid); + } + + local_irq_enable(); + + for (i = 0; i < num; i++) + GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); + GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); + GUEST_ASSERT_IAR_EMPTY(); + + reset_priorities(args); +} + +static void test_injection(struct test_args *args, struct kvm_inject_desc *f) +{ + uint32_t nr_irqs = args->nr_irqs; + + if (f->sgi) { + guest_inject(args, MIN_SGI, 1, f->cmd); + guest_inject(args, 0, 16, f->cmd); + } + + if (f->ppi) + guest_inject(args, MIN_PPI, 1, f->cmd); + + if (f->spi) { + guest_inject(args, MIN_SPI, 1, f->cmd); + guest_inject(args, nr_irqs - 1, 1, f->cmd); + guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd); + } +} + +static void test_injection_failure(struct test_args *args, + struct kvm_inject_desc *f) +{ + uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, }; + int i; + + for (i = 0; i < ARRAY_SIZE(bad_intid); i++) + test_inject_fail(args, bad_intid[i], f->cmd); +} + +static void test_preemption(struct test_args *args, struct kvm_inject_desc *f) +{ + /* + * Test up to 4 levels of preemption. The reason is that KVM doesn't + * currently implement the ability to have more than the number-of-LRs + * number of concurrently active IRQs. The number of LRs implemented is + * IMPLEMENTATION DEFINED, however, it seems that most implement 4. + */ + if (f->sgi) + test_inject_preemption(args, MIN_SGI, 4, f->cmd); + + if (f->ppi) + test_inject_preemption(args, MIN_PPI, 4, f->cmd); + + if (f->spi) + test_inject_preemption(args, MIN_SPI, 4, f->cmd); +} + +static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f) +{ + /* Test up to 4 active IRQs. Same reason as in test_preemption. */ + if (f->sgi) + guest_restore_active(args, MIN_SGI, 4, f->cmd); + + if (f->ppi) + guest_restore_active(args, MIN_PPI, 4, f->cmd); + + if (f->spi) + guest_restore_active(args, MIN_SPI, 4, f->cmd); +} + +static void guest_code(struct test_args args) +{ + uint32_t i, nr_irqs = args.nr_irqs; + bool level_sensitive = args.level_sensitive; + struct kvm_inject_desc *f, *inject_fns; + + gic_init(GIC_V3, 1, dist, redist); + + for (i = 0; i < nr_irqs; i++) + gic_irq_enable(i); + + for (i = MIN_SPI; i < nr_irqs; i++) + gic_irq_set_config(i, !args.level_sensitive); + + gic_set_eoi_split(args.eoi_split); + + reset_priorities(&args); + gic_set_priority_mask(CPU_PRIO_MASK); + + inject_fns = level_sensitive ? inject_level_fns + : inject_edge_fns; + + local_irq_enable(); + + /* Start the tests. */ + for_each_supported_inject_fn(&args, inject_fns, f) { + test_injection(&args, f); + test_preemption(&args, f); + test_injection_failure(&args, f); + } + + /* Restore the active state of IRQs. This would happen when live + * migrating IRQs in the middle of being handled. + */ + for_each_supported_activate_fn(&args, set_active_fns, f) + test_restore_active(&args, f); + + GUEST_DONE(); +} + +static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level, + struct test_args *test_args, bool expect_failure) +{ + int ret; + + if (!expect_failure) { + kvm_arm_irq_line(vm, intid, level); + } else { + /* The interface doesn't allow larger intid's. */ + if (intid > KVM_ARM_IRQ_NUM_MASK) + return; + + ret = _kvm_arm_irq_line(vm, intid, level); + TEST_ASSERT(ret != 0 && errno == EINVAL, + "Bad intid %i did not cause KVM_IRQ_LINE " + "error: rc: %i errno: %i", intid, ret, errno); + } +} + +void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level, + bool expect_failure) +{ + if (!expect_failure) { + kvm_irq_set_level_info(gic_fd, intid, level); + } else { + int ret = _kvm_irq_set_level_info(gic_fd, intid, level); + /* + * The kernel silently fails for invalid SPIs and SGIs (which + * are not level-sensitive). It only checks for intid to not + * spill over 1U << 10 (the max reserved SPI). Also, callers + * are supposed to mask the intid with 0x3ff (1023). + */ + if (intid > VGIC_MAX_RESERVED) + TEST_ASSERT(ret != 0 && errno == EINVAL, + "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO " + "error: rc: %i errno: %i", intid, ret, errno); + else + TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO " + "for intid %i failed, rc: %i errno: %i", + intid, ret, errno); + } +} + +static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, + uint32_t intid, uint32_t num, uint32_t kvm_max_routes, + bool expect_failure) +{ + struct kvm_irq_routing *routing; + int ret; + uint64_t i; + + assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES); + + routing = kvm_gsi_routing_create(); + for (i = intid; i < (uint64_t)intid + num; i++) + kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI); + + if (!expect_failure) { + kvm_gsi_routing_write(vm, routing); + } else { + ret = _kvm_gsi_routing_write(vm, routing); + /* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */ + if (intid >= KVM_IRQCHIP_NUM_PINS) + TEST_ASSERT(ret != 0 && errno == EINVAL, + "Bad intid %u did not cause KVM_SET_GSI_ROUTING " + "error: rc: %i errno: %i", intid, ret, errno); + else + TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING " + "for intid %i failed, rc: %i errno: %i", + intid, ret, errno); + } +} + +static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid, + uint32_t vcpu, bool expect_failure) +{ + /* + * Ignore this when expecting failure as invalid intids will lead to + * either trying to inject SGIs when we configured the test to be + * level_sensitive (or the reverse), or inject large intids which + * will lead to writing above the ISPENDR register space (and we + * don't want to do that either). + */ + if (!expect_failure) + kvm_irq_write_ispendr(gic_fd, intid, vcpu); +} + +static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, + uint32_t intid, uint32_t num, uint32_t kvm_max_routes, + bool expect_failure) +{ + int fd[MAX_SPI]; + uint64_t val; + int ret, f; + uint64_t i; + + /* + * There is no way to try injecting an SGI or PPI as the interface + * starts counting from the first SPI (above the private ones), so just + * exit. + */ + if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid)) + return; + + kvm_set_gsi_routing_irqchip_check(vm, intid, num, + kvm_max_routes, expect_failure); + + /* + * If expect_failure, then just to inject anyway. These + * will silently fail. And in any case, the guest will check + * that no actual interrupt was injected for those cases. + */ + + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { + fd[f] = eventfd(0, 0); + TEST_ASSERT(fd[f] != -1, + "eventfd failed, errno: %i\n", errno); + } + + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { + struct kvm_irqfd irqfd = { + .fd = fd[f], + .gsi = i - MIN_SPI, + }; + assert(i <= (uint64_t)UINT_MAX); + vm_ioctl(vm, KVM_IRQFD, &irqfd); + } + + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { + val = 1; + ret = write(fd[f], &val, sizeof(uint64_t)); + TEST_ASSERT(ret == sizeof(uint64_t), + "Write to KVM_IRQFD failed with ret: %d\n", ret); + } + + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) + close(fd[f]); +} + +/* handles the valid case: intid=0xffffffff num=1 */ +#define for_each_intid(first, num, tmp, i) \ + for ((tmp) = (i) = (first); \ + (tmp) < (uint64_t)(first) + (uint64_t)(num); \ + (tmp)++, (i)++) + +static void run_guest_cmd(struct kvm_vm *vm, int gic_fd, + struct kvm_inject_args *inject_args, + struct test_args *test_args) +{ + kvm_inject_cmd cmd = inject_args->cmd; + uint32_t intid = inject_args->first_intid; + uint32_t num = inject_args->num; + int level = inject_args->level; + bool expect_failure = inject_args->expect_failure; + uint64_t tmp; + uint32_t i; + + /* handles the valid case: intid=0xffffffff num=1 */ + assert(intid < UINT_MAX - num || num == 1); + + switch (cmd) { + case KVM_INJECT_EDGE_IRQ_LINE: + for_each_intid(intid, num, tmp, i) + kvm_irq_line_check(vm, i, 1, test_args, + expect_failure); + for_each_intid(intid, num, tmp, i) + kvm_irq_line_check(vm, i, 0, test_args, + expect_failure); + break; + case KVM_SET_IRQ_LINE: + for_each_intid(intid, num, tmp, i) + kvm_irq_line_check(vm, i, level, test_args, + expect_failure); + break; + case KVM_SET_IRQ_LINE_HIGH: + for_each_intid(intid, num, tmp, i) + kvm_irq_line_check(vm, i, 1, test_args, + expect_failure); + break; + case KVM_SET_LEVEL_INFO_HIGH: + for_each_intid(intid, num, tmp, i) + kvm_irq_set_level_info_check(gic_fd, i, 1, + expect_failure); + break; + case KVM_INJECT_IRQFD: + kvm_routing_and_irqfd_check(vm, intid, num, + test_args->kvm_max_routes, + expect_failure); + break; + case KVM_WRITE_ISPENDR: + for (i = intid; i < intid + num; i++) + kvm_irq_write_ispendr_check(gic_fd, i, + VCPU_ID, expect_failure); + break; + case KVM_WRITE_ISACTIVER: + for (i = intid; i < intid + num; i++) + kvm_irq_write_isactiver(gic_fd, i, VCPU_ID); + break; + default: + break; + } +} + +static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, + struct kvm_inject_args *args) +{ + struct kvm_inject_args *kvm_args_hva; + vm_vaddr_t kvm_args_gva; + + kvm_args_gva = uc->args[1]; + kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva); + memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args)); +} + +static void print_args(struct test_args *args) +{ + printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n", + args->nr_irqs, args->level_sensitive, + args->eoi_split); +} + +static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) +{ + struct ucall uc; + int gic_fd; + struct kvm_vm *vm; + struct kvm_inject_args inject_args; + + struct test_args args = { + .nr_irqs = nr_irqs, + .level_sensitive = level_sensitive, + .eoi_split = eoi_split, + .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING), + .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD), + }; + + print_args(&args); + + vm = vm_create_default(VCPU_ID, 0, guest_code); + ucall_init(vm, NULL); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + /* Setup the guest args page (so it gets the args). */ + vcpu_args_set(vm, 0, 1, args); + + gic_fd = vgic_v3_setup(vm, 1, nr_irqs, + GICD_BASE_GPA, GICR_BASE_GPA); + if (gic_fd < 0) { + print_skip("Failed to create vgic-v3, skipping"); + exit(KSFT_SKIP); + } + + vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, + guest_irq_handlers[args.eoi_split][args.level_sensitive]); + + while (1) { + vcpu_run(vm, VCPU_ID); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + kvm_inject_get_call(vm, &uc, &inject_args); + run_guest_cmd(vm, gic_fd, &inject_args, &args); + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx", + (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2], uc.args[3]); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + close(gic_fd); + kvm_vm_free(vm); +} + +static void help(const char *name) +{ + printf( + "\n" + "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name); + printf(" -n: specify number of IRQs to setup the vgic with. " + "It has to be a multiple of 32 and between 64 and 1024.\n"); + printf(" -e: if 1 then EOI is split into a write to DIR on top " + "of writing EOI.\n"); + printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0)."); + puts(""); + exit(1); +} + +int main(int argc, char **argv) +{ + uint32_t nr_irqs = 64; + bool default_args = true; + bool level_sensitive = false; + int opt; + bool eoi_split = false; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { + switch (opt) { + case 'n': + nr_irqs = atoi(optarg); + if (nr_irqs > 1024 || nr_irqs % 32) + help(argv[0]); + break; + case 'e': + eoi_split = (bool)atoi(optarg); + default_args = false; + break; + case 'l': + level_sensitive = (bool)atoi(optarg); + default_args = false; + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + /* If the user just specified nr_irqs and/or gic_version, then run all + * combinations. + */ + if (default_args) { + test_vgic(nr_irqs, false /* level */, false /* eoi_split */); + test_vgic(nr_irqs, false /* level */, true /* eoi_split */); + test_vgic(nr_irqs, true /* level */, false /* eoi_split */); + test_vgic(nr_irqs, true /* level */, true /* eoi_split */); + } else { + test_vgic(nr_irqs, level_sensitive, eoi_split); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h index 85dd1e53048e..b217ea17cac5 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic.h +++ b/tools/testing/selftests/kvm/include/aarch64/gic.h @@ -11,11 +11,37 @@ enum gic_type { GIC_TYPE_MAX, }; +#define MIN_SGI 0 +#define MIN_PPI 16 +#define MIN_SPI 32 +#define MAX_SPI 1019 +#define IAR_SPURIOUS 1023 + +#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI) +#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI) +#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI) + void gic_init(enum gic_type type, unsigned int nr_cpus, void *dist_base, void *redist_base); void gic_irq_enable(unsigned int intid); void gic_irq_disable(unsigned int intid); unsigned int gic_get_and_ack_irq(void); void gic_set_eoi(unsigned int intid); +void gic_set_dir(unsigned int intid); + +/* + * Sets the EOI mode. When split is false, EOI just drops the priority. When + * split is true, EOI drops the priority and deactivates the interrupt. + */ +void gic_set_eoi_split(bool split); +void gic_set_priority_mask(uint64_t mask); +void gic_set_priority(uint32_t intid, uint32_t prio); +void gic_irq_set_active(unsigned int intid); +void gic_irq_clear_active(unsigned int intid); +bool gic_irq_get_active(unsigned int intid); +void gic_irq_set_pending(unsigned int intid); +void gic_irq_clear_pending(unsigned int intid); +bool gic_irq_get_pending(unsigned int intid); +void gic_irq_set_config(unsigned int intid, bool is_edge); #endif /* SELFTEST_KVM_GIC_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h index b51536d469a6..ba0886e8a2bb 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h +++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h @@ -16,8 +16,12 @@ #define GICD_IGROUPR 0x0080 #define GICD_ISENABLER 0x0100 #define GICD_ICENABLER 0x0180 +#define GICD_ISPENDR 0x0200 +#define GICD_ICPENDR 0x0280 #define GICD_ICACTIVER 0x0380 +#define GICD_ISACTIVER 0x0300 #define GICD_IPRIORITYR 0x0400 +#define GICD_ICFGR 0x0C00 /* * The assumption is that the guest runs in a non-secure mode. @@ -49,16 +53,24 @@ #define GICR_IGROUPR0 GICD_IGROUPR #define GICR_ISENABLER0 GICD_ISENABLER #define GICR_ICENABLER0 GICD_ICENABLER +#define GICR_ISPENDR0 GICD_ISPENDR +#define GICR_ISACTIVER0 GICD_ISACTIVER #define GICR_ICACTIVER0 GICD_ICACTIVER +#define GICR_ICENABLER GICD_ICENABLER +#define GICR_ICACTIVER GICD_ICACTIVER #define GICR_IPRIORITYR0 GICD_IPRIORITYR /* CPU interface registers */ #define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) #define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) #define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) +#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) #define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) #define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0) + #define ICC_PMR_DEF_PRIO 0xf0 #define ICC_SRE_EL1_SRE (1U << 0) diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 27d8e1bb5b36..8f9f46979a00 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -113,6 +113,9 @@ enum { #define ESR_EC_WP_CURRENT 0x35 #define ESR_EC_BRK_INS 0x3c +void aarch64_get_supported_page_sizes(uint32_t ipa, + bool *ps4k, bool *ps16k, bool *ps64k); + void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h index 0ecfb253893c..4442081221a0 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h @@ -14,7 +14,21 @@ ((uint64_t)(flags) << 12) | \ index) -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, uint64_t gicd_base_gpa, uint64_t gicr_base_gpa); -#endif /* SELFTEST_KVM_VGIC_H */ +#define VGIC_MAX_RESERVED 1023 + +void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); +int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); + +void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); +int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); + +/* The vcpu arg only applies to private interrupts. */ +void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu); +void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu); + +#define KVM_IRQCHIP_NUM_PINS (1020 - 32) + +#endif // SELFTEST_KVM_VGIC_H diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 2d62edc49d67..c9286811a4cb 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -7,412 +7,7 @@ #ifndef SELFTEST_KVM_UTIL_H #define SELFTEST_KVM_UTIL_H -#include "test_util.h" - -#include "asm/kvm.h" -#include "linux/list.h" -#include "linux/kvm.h" -#include <sys/ioctl.h> - -#include "sparsebit.h" - -#define KVM_DEV_PATH "/dev/kvm" -#define KVM_MAX_VCPUS 512 - -#define NSEC_PER_SEC 1000000000L - -/* - * Callers of kvm_util only have an incomplete/opaque description of the - * structure kvm_util is using to maintain the state of a VM. - */ -struct kvm_vm; - -typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ -typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ - -/* Minimum allocated guest virtual and physical addresses */ -#define KVM_UTIL_MIN_VADDR 0x2000 -#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 - -#define DEFAULT_GUEST_PHY_PAGES 512 -#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 -#define DEFAULT_STACK_PGS 5 - -enum vm_guest_mode { - VM_MODE_P52V48_4K, - VM_MODE_P52V48_64K, - VM_MODE_P48V48_4K, - VM_MODE_P48V48_64K, - VM_MODE_P40V48_4K, - VM_MODE_P40V48_64K, - VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ - VM_MODE_P47V64_4K, - VM_MODE_P44V64_4K, - NUM_VM_MODES, -}; - -#if defined(__aarch64__) - -#define VM_MODE_DEFAULT VM_MODE_P40V48_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#elif defined(__x86_64__) - -#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#elif defined(__s390x__) - -#define VM_MODE_DEFAULT VM_MODE_P44V64_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 16) - -#endif - -#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) -#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) - -struct vm_guest_mode_params { - unsigned int pa_bits; - unsigned int va_bits; - unsigned int page_size; - unsigned int page_shift; -}; -extern const struct vm_guest_mode_params vm_guest_mode_params[]; - -int open_path_or_exit(const char *path, int flags); -int open_kvm_dev_path_or_exit(void); -int kvm_check_cap(long cap); -int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); -int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, - struct kvm_enable_cap *cap); -void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); -const char *vm_guest_mode_string(uint32_t i); - -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -void kvm_vm_free(struct kvm_vm *vmp); -void kvm_vm_restart(struct kvm_vm *vmp, int perm); -void kvm_vm_release(struct kvm_vm *vmp); -void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); -void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, - uint64_t first_page, uint32_t num_pages); -uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm); - -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, - size_t len); - -void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); - -void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); - -/* - * VM VCPU Dump - * - * Input Args: - * stream - Output FILE stream - * vm - Virtual Machine - * vcpuid - VCPU ID - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps the current state of the VCPU specified by @vcpuid, within the VM - * given by @vm, to the FILE stream given by @stream. - */ -void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, - uint8_t indent); - -void vm_create_irqchip(struct kvm_vm *vm); - -void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags); - -void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, - void *arg); -int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, - void *arg); -void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); -int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg); -void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); -int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); -void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); -void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); -void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); -vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); - -void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - unsigned int npages); -void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); -void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); -vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); -void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); - -/* - * Address Guest Virtual to Guest Physical - * - * Input Args: - * vm - Virtual Machine - * gva - VM virtual address - * - * Output Args: None - * - * Return: - * Equivalent VM physical address - * - * Returns the VM physical address of the translated VM virtual - * address given by @gva. - */ -vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); - -struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); -int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); -int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_guest_debug *debug); -void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_mp_state *mp_state); -struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); -void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); - -/* - * VM VCPU Args Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * num - number of arguments - * ... - arguments, each of type uint64_t - * - * Output Args: None - * - * Return: None - * - * Sets the first @num function input registers of the VCPU with @vcpuid, - * per the C calling convention of the architecture, to the values given - * as variable args. Each of the variable args is expected to be of type - * uint64_t. The maximum @num can be is specific to the architecture. - */ -void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); - -void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_sregs *sregs); -void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_sregs *sregs); -int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_sregs *sregs); -void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_fpu *fpu); -void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_fpu *fpu); -void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); -void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); -#ifdef __KVM_HAVE_VCPU_EVENTS -void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events); -void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events); -#endif -#ifdef __x86_64__ -void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_nested_state *state); -int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_nested_state *state, bool ignore_error); -#endif -void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid); - -int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); -int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); -int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd); -int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test); -int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, - void *val, bool write); -int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, - void *val, bool write); - -int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr); -int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr); -int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr, void *val, bool write); -int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr, void *val, bool write); - -const char *exit_reason_str(unsigned int exit_reason); - -void virt_pgd_alloc(struct kvm_vm *vm); - -/* - * VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * vaddr - VM Virtual Address - * paddr - VM Physical Address - * memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * - * Within @vm, creates a virtual translation for the page starting - * at @vaddr to the page starting at @paddr. - */ -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); - -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot); -vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot); -vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); - -/* - * Create a VM with reasonable defaults - * - * Input Args: - * vcpuid - The id of the single VCPU to add to the VM. - * extra_mem_pages - The number of extra pages to add (this will - * decide how much extra space we will need to - * setup the page tables using memslot 0) - * guest_code - The vCPU's entry point - * - * Output Args: None - * - * Return: - * Pointer to opaque structure that describes the created VM. - */ -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code); - -/* Same as vm_create_default, but can be used for more than one vcpu */ -struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, - uint32_t num_percpu_pages, void *guest_code, - uint32_t vcpuids[]); - -/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */ -struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, - uint64_t slot0_mem_pages, uint64_t extra_mem_pages, - uint32_t num_percpu_pages, void *guest_code, - uint32_t vcpuids[]); - -/* - * Adds a vCPU with reasonable defaults (e.g. a stack) - * - * Input Args: - * vm - Virtual Machine - * vcpuid - The id of the VCPU to add to the VM. - * guest_code - The vCPU's entry point - */ -void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); - -bool vm_is_unrestricted_guest(struct kvm_vm *vm); - -unsigned int vm_get_page_size(struct kvm_vm *vm); -unsigned int vm_get_page_shift(struct kvm_vm *vm); -unsigned long vm_compute_max_gfn(struct kvm_vm *vm); -uint64_t vm_get_max_gfn(struct kvm_vm *vm); -int vm_get_fd(struct kvm_vm *vm); - -unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); -unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); -unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); -static inline unsigned int -vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) -{ - unsigned int n; - n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); -#ifdef __s390x__ - /* s390 requires 1M aligned guest sizes */ - n = (n + 255) & ~255; -#endif - return n; -} - -struct kvm_userspace_memory_region * -kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, - uint64_t end); - -struct kvm_dirty_log * -allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); - -int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); - -#define sync_global_to_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - memcpy(_p, &(g), sizeof(g)); \ -}) - -#define sync_global_from_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - memcpy(&(g), _p, sizeof(g)); \ -}) - -void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); - -/* Common ucalls */ -enum { - UCALL_NONE, - UCALL_SYNC, - UCALL_ABORT, - UCALL_DONE, - UCALL_UNHANDLED, -}; - -#define UCALL_MAX_ARGS 6 - -struct ucall { - uint64_t cmd; - uint64_t args[UCALL_MAX_ARGS]; -}; - -void ucall_init(struct kvm_vm *vm, void *arg); -void ucall_uninit(struct kvm_vm *vm); -void ucall(uint64_t cmd, int nargs, ...); -uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); - -#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ - ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) -#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) -#define GUEST_DONE() ucall(UCALL_DONE, 0) -#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \ - if (!(_condition)) \ - ucall(UCALL_ABORT, 2 + _nargs, \ - "Failed guest assert: " \ - _condstr, __LINE__, _args); \ -} while (0) - -#define GUEST_ASSERT(_condition) \ - __GUEST_ASSERT(_condition, #_condition, 0, 0) - -#define GUEST_ASSERT_1(_condition, arg1) \ - __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) - -#define GUEST_ASSERT_2(_condition, arg1, arg2) \ - __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) - -#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ - __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) - -#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ - __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) - -#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) - -int vm_get_stats_fd(struct kvm_vm *vm); -int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid); - -uint32_t guest_get_vcpuid(void); +#include "kvm_util_base.h" +#include "ucall_common.h" #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h new file mode 100644 index 000000000000..4ed6aa049a91 --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -0,0 +1,398 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * tools/testing/selftests/kvm/include/kvm_util_base.h + * + * Copyright (C) 2018, Google LLC. + */ +#ifndef SELFTEST_KVM_UTIL_BASE_H +#define SELFTEST_KVM_UTIL_BASE_H + +#include "test_util.h" + +#include "asm/kvm.h" +#include "linux/list.h" +#include "linux/kvm.h" +#include <sys/ioctl.h> + +#include "sparsebit.h" + +#define KVM_DEV_PATH "/dev/kvm" +#define KVM_MAX_VCPUS 512 + +#define NSEC_PER_SEC 1000000000L + +/* + * Callers of kvm_util only have an incomplete/opaque description of the + * structure kvm_util is using to maintain the state of a VM. + */ +struct kvm_vm; + +typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ +typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ + +/* Minimum allocated guest virtual and physical addresses */ +#define KVM_UTIL_MIN_VADDR 0x2000 +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +#define DEFAULT_GUEST_PHY_PAGES 512 +#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 +#define DEFAULT_STACK_PGS 5 + +enum vm_guest_mode { + VM_MODE_P52V48_4K, + VM_MODE_P52V48_64K, + VM_MODE_P48V48_4K, + VM_MODE_P48V48_16K, + VM_MODE_P48V48_64K, + VM_MODE_P40V48_4K, + VM_MODE_P40V48_16K, + VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, + VM_MODE_P36V48_4K, + VM_MODE_P36V48_16K, + VM_MODE_P36V48_64K, + VM_MODE_P36V47_16K, + NUM_VM_MODES, +}; + +#if defined(__aarch64__) + +extern enum vm_guest_mode vm_mode_default; + +#define VM_MODE_DEFAULT vm_mode_default +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__x86_64__) + +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__s390x__) + +#define VM_MODE_DEFAULT VM_MODE_P44V64_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 16) + +#elif defined(__riscv) + +#if __riscv_xlen == 32 +#error "RISC-V 32-bit kvm selftests not supported" +#endif + +#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#endif + +#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) +#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) + +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; + +int open_path_or_exit(const char *path, int flags); +int open_kvm_dev_path_or_exit(void); +int kvm_check_cap(long cap); +int vm_check_cap(struct kvm_vm *vm, long cap); +int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap); +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); +const char *vm_guest_mode_string(uint32_t i); + +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); +void kvm_vm_free(struct kvm_vm *vmp); +void kvm_vm_restart(struct kvm_vm *vmp, int perm); +void kvm_vm_release(struct kvm_vm *vmp); +void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); +void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages); +uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm); + +int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, + size_t len); + +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); + +void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +/* + * VM VCPU Dump + * + * Input Args: + * stream - Output FILE stream + * vm - Virtual Machine + * vcpuid - VCPU ID + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps the current state of the VCPU specified by @vcpuid, within the VM + * given by @vm, to the FILE stream given by @stream. + */ +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, + uint8_t indent); + +void vm_create_irqchip(struct kvm_vm *vm); + +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags); + +void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, + void *arg); +int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, + void *arg); +void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg); +void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); +void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); + +void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + unsigned int npages); +void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); +void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); +vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); + +/* + * Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gva - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Returns the VM physical address of the translated VM virtual + * address given by @gva. + */ +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); + +struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_guest_debug *debug); +void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_mp_state *mp_state); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); +void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); + +/* + * VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first @num function input registers of the VCPU with @vcpuid, + * per the C calling convention of the architecture, to the values given + * as variable args. Each of the variable args is expected to be of type + * uint64_t. The maximum @num can be is specific to the architecture. + */ +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); + +void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_fpu *fpu); +void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_fpu *fpu); +void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); +void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); +#ifdef __KVM_HAVE_VCPU_EVENTS +void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events); +void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events); +#endif +#ifdef __x86_64__ +void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_nested_state *state); +int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_nested_state *state, bool ignore_error); +#endif +void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid); + +int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); +int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); +int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd); +int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test); +int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write); +int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write); +void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); +int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); + +int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, + uint64_t attr); +int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, + uint64_t attr); +int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, + uint64_t attr, void *val, bool write); +int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, + uint64_t attr, void *val, bool write); + +#define KVM_MAX_IRQ_ROUTES 4096 + +struct kvm_irq_routing *kvm_gsi_routing_create(void); +void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, + uint32_t gsi, uint32_t pin); +int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); +void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); + +const char *exit_reason_str(unsigned int exit_reason); + +void virt_pgd_alloc(struct kvm_vm *vm); + +/* + * VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within @vm, creates a virtual translation for the page starting + * at @vaddr to the page starting at @paddr. + */ +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); + +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot); +vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot); +vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); + +/* + * Create a VM with reasonable defaults + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * extra_mem_pages - The number of extra pages to add (this will + * decide how much extra space we will need to + * setup the page tables using memslot 0) + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code); + +/* Same as vm_create_default, but can be used for more than one vcpu */ +struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]); + +/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */ +struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, + uint64_t slot0_mem_pages, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]); + +/* + * Adds a vCPU with reasonable defaults (e.g. a stack) + * + * Input Args: + * vm - Virtual Machine + * vcpuid - The id of the VCPU to add to the VM. + * guest_code - The vCPU's entry point + */ +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); + +bool vm_is_unrestricted_guest(struct kvm_vm *vm); + +unsigned int vm_get_page_size(struct kvm_vm *vm); +unsigned int vm_get_page_shift(struct kvm_vm *vm); +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); +uint64_t vm_get_max_gfn(struct kvm_vm *vm); +int vm_get_fd(struct kvm_vm *vm); + +unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); +unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); +unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); +static inline unsigned int +vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) +{ + unsigned int n; + n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); +#ifdef __s390x__ + /* s390 requires 1M aligned guest sizes */ + n = (n + 255) & ~255; +#endif + return n; +} + +struct kvm_userspace_memory_region * +kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, + uint64_t end); + +struct kvm_dirty_log * +allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); + +int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); + +#define sync_global_to_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(_p, &(g), sizeof(g)); \ +}) + +#define sync_global_from_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(&(g), _p, sizeof(g)); \ +}) + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); + +int vm_get_stats_fd(struct kvm_vm *vm); +int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid); + +uint32_t guest_get_vcpuid(void); + +#endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h new file mode 100644 index 000000000000..dc284c6bdbc3 --- /dev/null +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * RISC-V processor specific defines + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + */ +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#include "kvm_util.h" +#include <linux/stringify.h> + +static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, + uint64_t size) +{ + return KVM_REG_RISCV | type | idx | size; +} + +#if __riscv_xlen == 64 +#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U64 +#else +#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U32 +#endif + +#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, \ + KVM_REG_RISCV_CONFIG_REG(name), \ + KVM_REG_SIZE_ULONG) + +#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, \ + KVM_REG_RISCV_CORE_REG(name), \ + KVM_REG_SIZE_ULONG) + +#define RISCV_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \ + KVM_REG_RISCV_CSR_REG(name), \ + KVM_REG_SIZE_ULONG) + +#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, \ + KVM_REG_RISCV_TIMER_REG(name), \ + KVM_REG_SIZE_U64) + +static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, + unsigned long *addr) +{ + struct kvm_one_reg reg; + + reg.id = id; + reg.addr = (unsigned long)addr; + vcpu_get_reg(vm, vcpuid, ®); +} + +static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, + unsigned long val) +{ + struct kvm_one_reg reg; + + reg.id = id; + reg.addr = (unsigned long)&val; + vcpu_set_reg(vm, vcpuid, ®); +} + +/* L3 index Bit[47:39] */ +#define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL +#define PGTBL_L3_INDEX_SHIFT 39 +#define PGTBL_L3_BLOCK_SHIFT 39 +#define PGTBL_L3_BLOCK_SIZE 0x0000008000000000ULL +#define PGTBL_L3_MAP_MASK (~(PGTBL_L3_BLOCK_SIZE - 1)) +/* L2 index Bit[38:30] */ +#define PGTBL_L2_INDEX_MASK 0x0000007FC0000000ULL +#define PGTBL_L2_INDEX_SHIFT 30 +#define PGTBL_L2_BLOCK_SHIFT 30 +#define PGTBL_L2_BLOCK_SIZE 0x0000000040000000ULL +#define PGTBL_L2_MAP_MASK (~(PGTBL_L2_BLOCK_SIZE - 1)) +/* L1 index Bit[29:21] */ +#define PGTBL_L1_INDEX_MASK 0x000000003FE00000ULL +#define PGTBL_L1_INDEX_SHIFT 21 +#define PGTBL_L1_BLOCK_SHIFT 21 +#define PGTBL_L1_BLOCK_SIZE 0x0000000000200000ULL +#define PGTBL_L1_MAP_MASK (~(PGTBL_L1_BLOCK_SIZE - 1)) +/* L0 index Bit[20:12] */ +#define PGTBL_L0_INDEX_MASK 0x00000000001FF000ULL +#define PGTBL_L0_INDEX_SHIFT 12 +#define PGTBL_L0_BLOCK_SHIFT 12 +#define PGTBL_L0_BLOCK_SIZE 0x0000000000001000ULL +#define PGTBL_L0_MAP_MASK (~(PGTBL_L0_BLOCK_SIZE - 1)) + +#define PGTBL_PTE_ADDR_MASK 0x003FFFFFFFFFFC00ULL +#define PGTBL_PTE_ADDR_SHIFT 10 +#define PGTBL_PTE_RSW_MASK 0x0000000000000300ULL +#define PGTBL_PTE_RSW_SHIFT 8 +#define PGTBL_PTE_DIRTY_MASK 0x0000000000000080ULL +#define PGTBL_PTE_DIRTY_SHIFT 7 +#define PGTBL_PTE_ACCESSED_MASK 0x0000000000000040ULL +#define PGTBL_PTE_ACCESSED_SHIFT 6 +#define PGTBL_PTE_GLOBAL_MASK 0x0000000000000020ULL +#define PGTBL_PTE_GLOBAL_SHIFT 5 +#define PGTBL_PTE_USER_MASK 0x0000000000000010ULL +#define PGTBL_PTE_USER_SHIFT 4 +#define PGTBL_PTE_EXECUTE_MASK 0x0000000000000008ULL +#define PGTBL_PTE_EXECUTE_SHIFT 3 +#define PGTBL_PTE_WRITE_MASK 0x0000000000000004ULL +#define PGTBL_PTE_WRITE_SHIFT 2 +#define PGTBL_PTE_READ_MASK 0x0000000000000002ULL +#define PGTBL_PTE_READ_SHIFT 1 +#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \ + PGTBL_PTE_WRITE_MASK | \ + PGTBL_PTE_READ_MASK) +#define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL +#define PGTBL_PTE_VALID_SHIFT 0 + +#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE +#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT + +#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL) +#define SATP_MODE_39 _AC(0x8000000000000000, UL) +#define SATP_MODE_48 _AC(0x9000000000000000, UL) +#define SATP_ASID_BITS 16 +#define SATP_ASID_SHIFT 44 +#define SATP_ASID_MASK _AC(0xFFFF, UL) + +#define SBI_EXT_EXPERIMENTAL_START 0x08000000 +#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF + +#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END + +struct sbiret { + long error; + long value; +}; + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5); + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h new file mode 100644 index 000000000000..9eecc9d40b79 --- /dev/null +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * tools/testing/selftests/kvm/include/kvm_util.h + * + * Copyright (C) 2018, Google LLC. + */ +#ifndef SELFTEST_KVM_UCALL_COMMON_H +#define SELFTEST_KVM_UCALL_COMMON_H + +/* Common ucalls */ +enum { + UCALL_NONE, + UCALL_SYNC, + UCALL_ABORT, + UCALL_DONE, + UCALL_UNHANDLED, +}; + +#define UCALL_MAX_ARGS 6 + +struct ucall { + uint64_t cmd; + uint64_t args[UCALL_MAX_ARGS]; +}; + +void ucall_init(struct kvm_vm *vm, void *arg); +void ucall_uninit(struct kvm_vm *vm); +void ucall(uint64_t cmd, int nargs, ...); +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); + +#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ + ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) +#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) +#define GUEST_DONE() ucall(UCALL_DONE, 0) +#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \ + if (!(_condition)) \ + ucall(UCALL_ABORT, 2 + _nargs, \ + "Failed guest assert: " \ + _condstr, __LINE__, _args); \ +} while (0) + +#define GUEST_ASSERT(_condition) \ + __GUEST_ASSERT(_condition, #_condition, 0, 0) + +#define GUEST_ASSERT_1(_condition, arg1) \ + __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) + +#define GUEST_ASSERT_2(_condition, arg1, arg2) \ + __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) + +#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ + __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) + +#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ + __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) + +#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) + +#endif /* SELFTEST_KVM_UCALL_COMMON_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 05e65ca1c30c..8a470da7b71a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -10,8 +10,10 @@ #include <assert.h> #include <stdint.h> +#include <syscall.h> #include <asm/msr-index.h> +#include <asm/prctl.h> #include "../kvm_util.h" @@ -92,6 +94,21 @@ struct desc_ptr { uint64_t address; } __attribute__((packed)); +struct kvm_x86_state { + struct kvm_xsave *xsave; + struct kvm_vcpu_events events; + struct kvm_mp_state mp_state; + struct kvm_regs regs; + struct kvm_xcrs xcrs; + struct kvm_sregs sregs; + struct kvm_debugregs debugregs; + union { + struct kvm_nested_state nested; + char nested_[16384]; + }; + struct kvm_msrs msrs; +}; + static inline uint64_t get_desc64_base(const struct desc64 *desc) { return ((uint64_t)desc->base3 << 32) | @@ -347,17 +364,37 @@ static inline unsigned long get_xmm(int n) } bool is_intel_cpu(void); +bool is_amd_cpu(void); + +static inline unsigned int x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +static inline unsigned int x86_model(unsigned int eax) +{ + return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); +} -struct kvm_x86_state; struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); +void kvm_x86_state_cleanup(struct kvm_x86_state *state); struct kvm_msr_list *kvm_get_msr_index_list(void); uint64_t kvm_get_feature_msr(uint64_t msr_index); struct kvm_cpuid2 *kvm_get_supported_cpuid(void); struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid); +int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_cpuid2 *cpuid); void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); @@ -402,6 +439,11 @@ void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr, uint64_t pte); /* + * get_cpuid() - find matching CPUID entry and return pointer to it. + */ +struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function, + uint32_t index); +/* * set_cpuid() - overwrites a matching cpuid entry with the provided value. * matches based on ent->function && ent->index. returns true * if a match was found and successfully overwritten. @@ -416,6 +458,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); +void vm_xsave_req_perm(int bit); enum x86_page_size { X86_PAGE_SIZE_4K = 0, @@ -443,4 +486,11 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, /* VMX_EPT_VPID_CAP bits */ #define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) +#define XSTATE_XTILE_CFG_BIT 17 +#define XSTATE_XTILE_DATA_BIT 18 + +#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) +#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) +#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \ + XSTATE_XTILE_DATA_MASK) #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c index fff4fc27504d..55668631d546 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c @@ -93,3 +93,69 @@ void gic_set_eoi(unsigned int intid) GUEST_ASSERT(gic_common_ops); gic_common_ops->gic_write_eoir(intid); } + +void gic_set_dir(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_write_dir(intid); +} + +void gic_set_eoi_split(bool split) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_set_eoi_split(split); +} + +void gic_set_priority_mask(uint64_t pmr) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_set_priority_mask(pmr); +} + +void gic_set_priority(unsigned int intid, unsigned int prio) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_set_priority(intid, prio); +} + +void gic_irq_set_active(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_active(intid); +} + +void gic_irq_clear_active(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_clear_active(intid); +} + +bool gic_irq_get_active(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + return gic_common_ops->gic_irq_get_active(intid); +} + +void gic_irq_set_pending(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_pending(intid); +} + +void gic_irq_clear_pending(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_clear_pending(intid); +} + +bool gic_irq_get_pending(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + return gic_common_ops->gic_irq_get_pending(intid); +} + +void gic_irq_set_config(unsigned int intid, bool is_edge) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_config(intid, is_edge); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h index d81d739433dc..75d07313c893 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h @@ -14,6 +14,17 @@ struct gic_common_ops { void (*gic_irq_disable)(unsigned int intid); uint64_t (*gic_read_iar)(void); void (*gic_write_eoir)(uint32_t irq); + void (*gic_write_dir)(uint32_t irq); + void (*gic_set_eoi_split)(bool split); + void (*gic_set_priority_mask)(uint64_t mask); + void (*gic_set_priority)(uint32_t intid, uint32_t prio); + void (*gic_irq_set_active)(uint32_t intid); + void (*gic_irq_clear_active)(uint32_t intid); + bool (*gic_irq_get_active)(uint32_t intid); + void (*gic_irq_set_pending)(uint32_t intid); + void (*gic_irq_clear_pending)(uint32_t intid); + bool (*gic_irq_get_pending)(uint32_t intid); + void (*gic_irq_set_config)(uint32_t intid, bool is_edge); }; extern const struct gic_common_ops gicv3_ops; diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c index 2dbf3339b62e..00f613c0583c 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c @@ -19,7 +19,8 @@ struct gicv3_data { unsigned int nr_spis; }; -#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) +#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) +#define DIST_BIT (1U << 31) enum gicv3_intid_range { SGI_RANGE, @@ -50,6 +51,14 @@ static void gicv3_gicr_wait_for_rwp(void *redist_base) } } +static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) +{ + if (cpu_or_dist & DIST_BIT) + gicv3_gicd_wait_for_rwp(); + else + gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]); +} + static enum gicv3_intid_range get_intid_range(unsigned int intid) { switch (intid) { @@ -81,39 +90,175 @@ static void gicv3_write_eoir(uint32_t irq) isb(); } -static void -gicv3_config_irq(unsigned int intid, unsigned int offset) +static void gicv3_write_dir(uint32_t irq) +{ + write_sysreg_s(irq, SYS_ICC_DIR_EL1); + isb(); +} + +static void gicv3_set_priority_mask(uint64_t mask) +{ + write_sysreg_s(mask, SYS_ICC_PMR_EL1); +} + +static void gicv3_set_eoi_split(bool split) +{ + uint32_t val; + + /* All other fields are read-only, so no need to read CTLR first. In + * fact, the kernel does the same. + */ + val = split ? (1U << 1) : 0; + write_sysreg_s(val, SYS_ICC_CTLR_EL1); + isb(); +} + +uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) +{ + void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base + : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + return readl(base + offset); +} + +void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) +{ + void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base + : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + writel(reg_val, base + offset); +} + +uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask) +{ + return gicv3_reg_readl(cpu_or_dist, offset) & mask; +} + +void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, + uint32_t mask, uint32_t reg_val) +{ + uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; + + tmp |= (reg_val & mask); + gicv3_reg_writel(cpu_or_dist, offset, tmp); +} + +/* + * We use a single offset for the distributor and redistributor maps as they + * have the same value in both. The only exceptions are registers that only + * exist in one and not the other, like GICR_WAKER that doesn't exist in the + * distributor map. Such registers are conveniently marked as reserved in the + * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being + * marked as "Reserved" in the Distributor map. + */ +static void gicv3_access_reg(uint32_t intid, uint64_t offset, + uint32_t reg_bits, uint32_t bits_per_field, + bool write, uint32_t *val) { uint32_t cpu = guest_get_vcpuid(); - uint32_t mask = 1 << (intid % 32); enum gicv3_intid_range intid_range = get_intid_range(intid); - void *reg; - - /* We care about 'cpu' only for SGIs or PPIs */ - if (intid_range == SGI_RANGE || intid_range == PPI_RANGE) { - GUEST_ASSERT(cpu < gicv3_data.nr_cpus); - - reg = sgi_base_from_redist(gicv3_data.redist_base[cpu]) + - offset; - writel(mask, reg); - gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu]); - } else if (intid_range == SPI_RANGE) { - reg = gicv3_data.dist_base + offset + (intid / 32) * 4; - writel(mask, reg); - gicv3_gicd_wait_for_rwp(); - } else { - GUEST_ASSERT(0); - } + uint32_t fields_per_reg, index, mask, shift; + uint32_t cpu_or_dist; + + GUEST_ASSERT(bits_per_field <= reg_bits); + GUEST_ASSERT(*val < (1U << bits_per_field)); + /* Some registers like IROUTER are 64 bit long. Those are currently not + * supported by readl nor writel, so just asserting here until then. + */ + GUEST_ASSERT(reg_bits == 32); + + fields_per_reg = reg_bits / bits_per_field; + index = intid % fields_per_reg; + shift = index * bits_per_field; + mask = ((1U << bits_per_field) - 1) << shift; + + /* Set offset to the actual register holding intid's config. */ + offset += (intid / fields_per_reg) * (reg_bits / 8); + + cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu; + + if (write) + gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift); + *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift; +} + +static void gicv3_write_reg(uint32_t intid, uint64_t offset, + uint32_t reg_bits, uint32_t bits_per_field, uint32_t val) +{ + gicv3_access_reg(intid, offset, reg_bits, + bits_per_field, true, &val); +} + +static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset, + uint32_t reg_bits, uint32_t bits_per_field) +{ + uint32_t val; + + gicv3_access_reg(intid, offset, reg_bits, + bits_per_field, false, &val); + return val; +} + +static void gicv3_set_priority(uint32_t intid, uint32_t prio) +{ + gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio); +} + +/* Sets the intid to be level-sensitive or edge-triggered. */ +static void gicv3_irq_set_config(uint32_t intid, bool is_edge) +{ + uint32_t val; + + /* N/A for private interrupts. */ + GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE); + val = is_edge ? 2 : 0; + gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val); +} + +static void gicv3_irq_enable(uint32_t intid) +{ + bool is_spi = get_intid_range(intid) == SPI_RANGE; + uint32_t cpu = guest_get_vcpuid(); + + gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1); + gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); +} + +static void gicv3_irq_disable(uint32_t intid) +{ + bool is_spi = get_intid_range(intid) == SPI_RANGE; + uint32_t cpu = guest_get_vcpuid(); + + gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1); + gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); +} + +static void gicv3_irq_set_active(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1); +} + +static void gicv3_irq_clear_active(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1); +} + +static bool gicv3_irq_get_active(uint32_t intid) +{ + return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1); +} + +static void gicv3_irq_set_pending(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1); } -static void gicv3_irq_enable(unsigned int intid) +static void gicv3_irq_clear_pending(uint32_t intid) { - gicv3_config_irq(intid, GICD_ISENABLER); + gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1); } -static void gicv3_irq_disable(unsigned int intid) +static bool gicv3_irq_get_pending(uint32_t intid) { - gicv3_config_irq(intid, GICD_ICENABLER); + return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); } static void gicv3_enable_redist(void *redist_base) @@ -237,4 +382,15 @@ const struct gic_common_ops gicv3_ops = { .gic_irq_disable = gicv3_irq_disable, .gic_read_iar = gicv3_read_iar, .gic_write_eoir = gicv3_write_eoir, + .gic_write_dir = gicv3_write_dir, + .gic_set_priority_mask = gicv3_set_priority_mask, + .gic_set_eoi_split = gicv3_set_eoi_split, + .gic_set_priority = gicv3_set_priority, + .gic_irq_set_active = gicv3_irq_set_active, + .gic_irq_clear_active = gicv3_irq_clear_active, + .gic_irq_get_active = gicv3_irq_get_active, + .gic_irq_set_pending = gicv3_irq_set_pending, + .gic_irq_clear_pending = gicv3_irq_clear_pending, + .gic_irq_get_pending = gicv3_irq_get_pending, + .gic_irq_set_config = gicv3_irq_set_config, }; diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index b4eeeafd2a70..9343d82519b4 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -8,6 +8,7 @@ #include <linux/compiler.h> #include <assert.h> +#include "guest_modes.h" #include "kvm_util.h" #include "../kvm_util_internal.h" #include "processor.h" @@ -237,6 +238,7 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + /* Configure base granule size */ switch (vm->mode) { case VM_MODE_P52V48_4K: TEST_FAIL("AArch64 does not support 4K sized pages " @@ -245,25 +247,47 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: + case VM_MODE_P48V48_64K: + case VM_MODE_P40V48_64K: + case VM_MODE_P36V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + break; + case VM_MODE_P48V48_16K: + case VM_MODE_P40V48_16K: + case VM_MODE_P36V48_16K: + case VM_MODE_P36V47_16K: + tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ break; case VM_MODE_P48V48_4K: + case VM_MODE_P40V48_4K: + case VM_MODE_P36V48_4K: tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ break; + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + /* Configure output size */ + switch (vm->mode) { + case VM_MODE_P52V48_64K: + tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + break; + case VM_MODE_P48V48_4K: + case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ break; case VM_MODE_P40V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ - break; + case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ break; + case VM_MODE_P36V48_4K: + case VM_MODE_P36V48_16K: + case VM_MODE_P36V48_64K: + case VM_MODE_P36V47_16K: + tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } @@ -432,3 +456,47 @@ uint32_t guest_get_vcpuid(void) { return read_sysreg(tpidr_el1); } + +void aarch64_get_supported_page_sizes(uint32_t ipa, + bool *ps4k, bool *ps16k, bool *ps64k) +{ + struct kvm_vcpu_init preferred_init; + int kvm_fd, vm_fd, vcpu_fd, err; + uint64_t val; + struct kvm_one_reg reg = { + .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), + .addr = (uint64_t)&val, + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, ipa); + TEST_ASSERT(vm_fd >= 0, "Can't create VM"); + + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + TEST_ASSERT(vcpu_fd >= 0, "Can't create vcpu"); + + err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init); + TEST_ASSERT(err == 0, "Can't get target"); + err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init); + TEST_ASSERT(err == 0, "Can't get init vcpu"); + + err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); + TEST_ASSERT(err == 0, "Can't get MMFR0"); + + *ps4k = ((val >> 28) & 0xf) != 0xf; + *ps64k = ((val >> 24) & 0xf) == 0; + *ps16k = ((val >> 20) & 0xf) != 0; + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); +} + +/* + * arm64 doesn't have a true default mode, so start by computing the + * available IPA space and page sizes early. + */ +void __attribute__((constructor)) init_guest_modes(void) +{ + guest_modes_append_default(); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index b9b271ff520d..f5cd0c536d85 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -5,11 +5,14 @@ #include <linux/kvm.h> #include <linux/sizes.h> +#include <asm/kvm_para.h> #include <asm/kvm.h> #include "kvm_util.h" #include "../kvm_util_internal.h" #include "vgic.h" +#include "gic.h" +#include "gic_v3.h" /* * vGIC-v3 default host setup @@ -28,7 +31,7 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, uint64_t gicd_base_gpa, uint64_t gicr_base_gpa) { int gic_fd; @@ -49,7 +52,16 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, nr_vcpus, nr_vcpus_created); /* Distributor setup */ - gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + if (_kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, + false, &gic_fd) != 0) + return -1; + + kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + 0, &nr_irqs, true); + + kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa, true); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE); @@ -68,3 +80,94 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, return gic_fd; } + +/* should only work for level sensitive interrupts */ +int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +{ + uint64_t attr = 32 * (intid / 32); + uint64_t index = intid % 32; + uint64_t val; + int ret; + + ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + attr, &val, false); + if (ret != 0) + return ret; + + val |= 1U << index; + ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + attr, &val, true); + return ret; +} + +void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +{ + int ret = _kvm_irq_set_level_info(gic_fd, intid, level); + + TEST_ASSERT(ret == 0, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO failed, " + "rc: %i errno: %i", ret, errno); +} + +int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +{ + uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK; + + TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself " + "doesn't allow injecting SGIs. There's no mask for it."); + + if (INTID_IS_PPI(intid)) + irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT; + else + irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT; + + return _kvm_irq_line(vm, irq, level); +} + +void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +{ + int ret = _kvm_arm_irq_line(vm, intid, level); + + TEST_ASSERT(ret == 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", + ret, errno); +} + +static void vgic_poke_irq(int gic_fd, uint32_t intid, + uint32_t vcpu, uint64_t reg_off) +{ + uint64_t reg = intid / 32; + uint64_t index = intid % 32; + uint64_t attr = reg_off + reg * 4; + uint64_t val; + bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); + + /* Check that the addr part of the attr is within 32 bits. */ + assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK); + + uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS + : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; + + if (intid_is_private) { + /* TODO: only vcpu 0 implemented for now. */ + assert(vcpu == 0); + attr += SZ_64K; + } + + /* All calls will succeed, even with invalid intid's, as long as the + * addr part of the attr is within 32 bits (checked above). An invalid + * intid will just make the read/writes point to above the intended + * register space (i.e., ICPENDR after ISPENDR). + */ + kvm_device_access(gic_fd, group, attr, &val, false); + val |= 1ULL << index; + kvm_device_access(gic_fd, group, attr, &val, true); +} + +void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu) +{ + vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR); +} + +void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu) +{ + vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); +} diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index c330f414ef96..8784013b747c 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -4,22 +4,59 @@ */ #include "guest_modes.h" +#ifdef __aarch64__ +#include "processor.h" +enum vm_guest_mode vm_mode_default; +#endif + struct guest_mode guest_modes[NUM_VM_MODES]; void guest_modes_append_default(void) { +#ifndef __aarch64__ guest_mode_append(VM_MODE_DEFAULT, true, true); - -#ifdef __aarch64__ - guest_mode_append(VM_MODE_P40V48_64K, true, true); +#else { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + bool ps4k, ps16k, ps64k; + int i; + + aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k); + + vm_mode_default = NUM_VM_MODES; + if (limit >= 52) - guest_mode_append(VM_MODE_P52V48_64K, true, true); + guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k); if (limit >= 48) { - guest_mode_append(VM_MODE_P48V48_4K, true, true); - guest_mode_append(VM_MODE_P48V48_64K, true, true); + guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k); + guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k); + guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k); + } + if (limit >= 40) { + guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k); + guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k); + guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k); + if (ps4k) + vm_mode_default = VM_MODE_P40V48_4K; } + if (limit >= 36) { + guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k); + guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k); + guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k); + guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k); + } + + /* + * Pick the first supported IPA size if the default + * isn't available. + */ + for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) { + if (guest_modes[i].supported && guest_modes[i].enabled) + vm_mode_default = i; + } + + TEST_ASSERT(vm_mode_default != NUM_VM_MODES, + "No supported mode!"); } #endif #ifdef __s390x__ @@ -38,6 +75,16 @@ void guest_modes_append_default(void) guest_mode_append(VM_MODE_P47V64_4K, true, true); } #endif +#ifdef __riscv + { + unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS); + + if (sz >= 52) + guest_mode_append(VM_MODE_P52V48_4K, true, true); + if (sz >= 48) + guest_mode_append(VM_MODE_P48V48_4K, true, true); + } +#endif } void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 53d2b5d04b82..d8cf851ab119 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -85,6 +85,33 @@ int kvm_check_cap(long cap) return ret; } +/* VM Check Capability + * + * Input Args: + * vm - Virtual Machine + * cap - Capability + * + * Output Args: None + * + * Return: + * On success, the Value corresponding to the capability (KVM_CAP_*) + * specified by the value of cap. On failure a TEST_ASSERT failure + * is produced. + * + * Looks up and returns the value corresponding to the capability + * (KVM_CAP_*) given by cap. + */ +int vm_check_cap(struct kvm_vm *vm, long cap) +{ + int ret; + + ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap); + TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n" + " rc: %i errno: %i", ret, errno); + + return ret; +} + /* VM Enable Capability * * Input Args: @@ -166,12 +193,18 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", + [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", + [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", + [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", + [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", + [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", + [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -185,12 +218,18 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, + [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, + [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, + [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, + [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, + [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, + [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -252,9 +291,19 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->pgtable_levels = 3; break; case VM_MODE_P40V48_4K: + case VM_MODE_P36V48_4K: vm->pgtable_levels = 4; break; case VM_MODE_P40V48_64K: + case VM_MODE_P36V48_64K: + vm->pgtable_levels = 3; + break; + case VM_MODE_P48V48_16K: + case VM_MODE_P40V48_16K: + case VM_MODE_P36V48_16K: + vm->pgtable_levels = 4; + break; + case VM_MODE_P36V47_16K: vm->pgtable_levels = 3; break; case VM_MODE_PXXV48_4K: @@ -443,9 +492,11 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, uint64_t first_page, uint32_t num_pages) { - struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot, - .first_page = first_page, - .num_pages = num_pages }; + struct kvm_clear_dirty_log args = { + .dirty_bitmap = log, .slot = slot, + .first_page = first_page, + .num_pages = num_pages + }; int ret; ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); @@ -2087,6 +2138,78 @@ int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, } /* + * IRQ related functions. + */ + +int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +{ + struct kvm_irq_level irq_level = { + .irq = irq, + .level = level, + }; + + return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); +} + +void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +{ + int ret = _kvm_irq_line(vm, irq, level); + + TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno); +} + +struct kvm_irq_routing *kvm_gsi_routing_create(void) +{ + struct kvm_irq_routing *routing; + size_t size; + + size = sizeof(struct kvm_irq_routing); + /* Allocate space for the max number of entries: this wastes 196 KBs. */ + size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); + routing = calloc(1, size); + assert(routing); + + return routing; +} + +void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, + uint32_t gsi, uint32_t pin) +{ + int i; + + assert(routing); + assert(routing->nr < KVM_MAX_IRQ_ROUTES); + + i = routing->nr; + routing->entries[i].gsi = gsi; + routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + routing->entries[i].flags = 0; + routing->entries[i].u.irqchip.irqchip = 0; + routing->entries[i].u.irqchip.pin = pin; + routing->nr++; +} + +int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) +{ + int ret; + + assert(routing); + ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing); + free(routing); + + return ret; +} + +void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) +{ + int ret; + + ret = _kvm_gsi_routing_write(vm, routing); + TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i", + ret, errno); +} + +/* * VM Dump * * Input Args: diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c new file mode 100644 index 000000000000..d377f2603d98 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V code + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + */ + +#include <linux/compiler.h> +#include <assert.h> + +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" + +#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 + +static uint64_t page_align(struct kvm_vm *vm, uint64_t v) +{ + return (v + vm->page_size) & ~(vm->page_size - 1); +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +{ + return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) << + PGTBL_PAGE_SIZE_SHIFT; +} + +static uint64_t ptrs_per_pte(struct kvm_vm *vm) +{ + return PGTBL_PAGE_SIZE / sizeof(uint64_t); +} + +static uint64_t pte_index_mask[] = { + PGTBL_L0_INDEX_MASK, + PGTBL_L1_INDEX_MASK, + PGTBL_L2_INDEX_MASK, + PGTBL_L3_INDEX_MASK, +}; + +static uint32_t pte_index_shift[] = { + PGTBL_L0_INDEX_SHIFT, + PGTBL_L1_INDEX_SHIFT, + PGTBL_L2_INDEX_SHIFT, + PGTBL_L3_INDEX_SHIFT, +}; + +static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +{ + TEST_ASSERT(level > -1, + "Negative page table level (%d) not possible", level); + TEST_ASSERT(level < vm->pgtable_levels, + "Invalid page table level (%d)", level); + + return (gva & pte_index_mask[level]) >> pte_index_shift[level]; +} + +void virt_pgd_alloc(struct kvm_vm *vm) +{ + if (!vm->pgd_created) { + vm_paddr_t paddr = vm_phy_pages_alloc(vm, + page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + vm->pgd = paddr; + vm->pgd_created = true; + } +} + +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + uint64_t *ptep, next_ppn; + int level = vm->pgtable_levels - 1; + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8; + if (!*ptep) { + next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; + *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | + PGTBL_PTE_VALID_MASK; + } + level--; + + while (level > -1) { + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + + pte_index(vm, vaddr, level) * 8; + if (!*ptep && level > 0) { + next_ppn = vm_alloc_page_table(vm) >> + PGTBL_PAGE_SIZE_SHIFT; + *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | + PGTBL_PTE_VALID_MASK; + } + level--; + } + + paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT; + *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) | + PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; +} + +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep; + int level = vm->pgtable_levels - 1; + + if (!vm->pgd_created) + goto unmapped_gva; + + ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8; + if (!ptep) + goto unmapped_gva; + level--; + + while (level > -1) { + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + + pte_index(vm, gva, level) * 8; + if (!ptep) + goto unmapped_gva; + level--; + } + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d", + gva, level); + exit(1); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, + uint64_t page, int level) +{ +#ifdef DEBUG + static const char *const type[] = { "pte", "pmd", "pud", "p4d"}; + uint64_t pte, *ptep; + + if (level < 0) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (!*ptep) + continue; + fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", + type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, + pte_addr(vm, *ptep), level - 1); + } +#endif +} + +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + int level = vm->pgtable_levels - 1; + uint64_t pgd, *ptep; + + if (!vm->pgd_created) + return; + + for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { + ptep = addr_gpa2hva(vm, pgd); + if (!*ptep) + continue; + fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", + pgd, *ptep, ptep); + pte_dump(stream, vm, indent + 1, + pte_addr(vm, *ptep), level - 1); + } +} + +void riscv_vcpu_mmu_setup(struct kvm_vm *vm, int vcpuid) +{ + unsigned long satp; + + /* + * The RISC-V Sv48 MMU mode supports 56-bit physical address + * for 48-bit virtual address with 4KB last level page size. + */ + switch (vm->mode) { + case VM_MODE_P52V48_4K: + case VM_MODE_P48V48_4K: + case VM_MODE_P40V48_4K: + break; + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; + satp |= SATP_MODE_48; + + set_reg(vm, vcpuid, RISCV_CSR_REG(satp), satp); +} + +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +{ + struct kvm_riscv_core core; + + get_reg(vm, vcpuid, RISCV_CORE_REG(mode), &core.mode); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), &core.regs.pc); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.ra), &core.regs.ra); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), &core.regs.sp); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), &core.regs.gp); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.tp), &core.regs.tp); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t0), &core.regs.t0); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t1), &core.regs.t1); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t2), &core.regs.t2); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s0), &core.regs.s0); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s1), &core.regs.s1); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a0), &core.regs.a0); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a1), &core.regs.a1); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a2), &core.regs.a2); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a3), &core.regs.a3); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a4), &core.regs.a4); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a5), &core.regs.a5); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a6), &core.regs.a6); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a7), &core.regs.a7); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s2), &core.regs.s2); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s3), &core.regs.s3); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s4), &core.regs.s4); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s5), &core.regs.s5); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s6), &core.regs.s6); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s7), &core.regs.s7); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s8), &core.regs.s8); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s9), &core.regs.s9); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s10), &core.regs.s10); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s11), &core.regs.s11); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t3), &core.regs.t3); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t4), &core.regs.t4); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t5), &core.regs.t5); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t6), &core.regs.t6); + + fprintf(stream, + " MODE: 0x%lx\n", core.mode); + fprintf(stream, + " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n", + core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp); + fprintf(stream, + " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n", + core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2); + fprintf(stream, + " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n", + core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1); + fprintf(stream, + " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n", + core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5); + fprintf(stream, + " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n", + core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3); + fprintf(stream, + " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n", + core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7); + fprintf(stream, + " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n", + core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11); + fprintf(stream, + " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n", + core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); +} + +static void guest_hang(void) +{ + while (1) + ; +} + +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + int r; + size_t stack_size = vm->page_size == 4096 ? + DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN); + unsigned long current_gp = 0; + struct kvm_mp_state mps; + + vm_vcpu_add(vm, vcpuid); + riscv_vcpu_mmu_setup(vm, vcpuid); + + /* + * With SBI HSM support in KVM RISC-V, all secondary VCPUs are + * powered-off by default so we ensure that all secondary VCPUs + * are powered-on using KVM_SET_MP_STATE ioctl(). + */ + mps.mp_state = KVM_MP_STATE_RUNNABLE; + r = _vcpu_ioctl(vm, vcpuid, KVM_SET_MP_STATE, &mps); + TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r); + + /* Setup global pointer of guest to be same as the host */ + asm volatile ( + "add %0, gp, zero" : "=r" (current_gp) : : "memory"); + set_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), current_gp); + + /* Setup stack pointer and program counter of guest */ + set_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), + stack_vaddr + stack_size); + set_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), + (unsigned long)guest_code); + + /* Setup default exception vector of guest */ + set_reg(vm, vcpuid, RISCV_CSR_REG(stvec), + (unsigned long)guest_hang); +} + +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +{ + va_list ap; + uint64_t id = RISCV_CORE_REG(regs.a0); + int i; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + " num: %u\n", num); + + va_start(ap, num); + + for (i = 0; i < num; i++) { + switch (i) { + case 0: + id = RISCV_CORE_REG(regs.a0); + break; + case 1: + id = RISCV_CORE_REG(regs.a1); + break; + case 2: + id = RISCV_CORE_REG(regs.a2); + break; + case 3: + id = RISCV_CORE_REG(regs.a3); + break; + case 4: + id = RISCV_CORE_REG(regs.a4); + break; + case 5: + id = RISCV_CORE_REG(regs.a5); + break; + case 6: + id = RISCV_CORE_REG(regs.a6); + break; + case 7: + id = RISCV_CORE_REG(regs.a7); + break; + }; + set_reg(vm, vcpuid, id, va_arg(ap, uint64_t)); + } + + va_end(ap); +} + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c new file mode 100644 index 000000000000..9e42d8248fa6 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + */ + +#include <linux/kvm.h> + +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" + +void ucall_init(struct kvm_vm *vm, void *arg) +{ +} + +void ucall_uninit(struct kvm_vm *vm) +{ +} + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5) +{ + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + struct sbiret ret; + + asm volatile ( + "ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + return ret; +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall uc = { + .cmd = cmd, + }; + va_list va; + int i; + + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + uc.args[i] = va_arg(va, uint64_t); + va_end(va); + + sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 0, (vm_vaddr_t)&uc, + 0, 0, 0, 0, 0); +} + +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +{ + struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct ucall ucall = {}; + + if (uc) + memset(uc, 0, sizeof(*uc)); + + if (run->exit_reason == KVM_EXIT_RISCV_SBI && + run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT && + run->riscv_sbi.function_id == 0) { + memcpy(&ucall, addr_gva2hva(vm, run->riscv_sbi.args[0]), + sizeof(ucall)); + + vcpu_run_complete_io(vm, vcpu_id); + if (uc) + memcpy(uc, &ucall, sizeof(ucall)); + } + + return ucall.cmd; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index eef7b34756d5..9f000dfb5594 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -650,6 +650,60 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid) vcpu_sregs_set(vm, vcpuid, &sregs); } +#define CPUID_XFD_BIT (1 << 4) +static bool is_xfd_supported(void) +{ + int eax, ebx, ecx, edx; + const int leaf = 0xd, subleaf = 0x1; + + __asm__ __volatile__( + "cpuid" + : /* output */ "=a"(eax), "=b"(ebx), + "=c"(ecx), "=d"(edx) + : /* input */ "0"(leaf), "2"(subleaf)); + + return !!(eax & CPUID_XFD_BIT); +} + +void vm_xsave_req_perm(int bit) +{ + int kvm_fd; + u64 bitmask; + long rc; + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + close(kvm_fd); + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) + exit(KSFT_SKIP); + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); + if (!(bitmask & (1ULL << bit))) + exit(KSFT_SKIP); + + if (!is_xfd_supported()) + exit(KSFT_SKIP); + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); + + /* + * The older kernel version(<5.15) can't support + * ARCH_REQ_XCOMP_GUEST_PERM and directly return. + */ + if (rc) + return; + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); + TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); + TEST_ASSERT(bitmask & (1ULL << bit), + "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", + bitmask); +} + void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) { struct kvm_mp_state mp_state; @@ -847,6 +901,17 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) return entry; } + +int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_cpuid2 *cpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + return ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); +} + /* * VM VCPU CPUID Set * @@ -864,12 +929,9 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); int rc; - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); + rc = __vcpu_set_cpuid(vm, vcpuid, cpuid); TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", rc, errno); @@ -1017,21 +1079,6 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) sregs_dump(stream, &sregs, indent + 4); } -struct kvm_x86_state { - struct kvm_vcpu_events events; - struct kvm_mp_state mp_state; - struct kvm_regs regs; - struct kvm_xsave xsave; - struct kvm_xcrs xcrs; - struct kvm_sregs sregs; - struct kvm_debugregs debugregs; - union { - struct kvm_nested_state nested; - char nested_[16384]; - }; - struct kvm_msrs msrs; -}; - static int kvm_get_num_msrs_fd(int kvm_fd) { struct kvm_msr_list nmsrs; @@ -1069,6 +1116,22 @@ struct kvm_msr_list *kvm_get_msr_index_list(void) return list; } +static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu, + struct kvm_x86_state *state) +{ + int size; + + size = vm_check_cap(vm, KVM_CAP_XSAVE2); + if (!size) + size = sizeof(struct kvm_xsave); + + state->xsave = malloc(size); + if (size == sizeof(struct kvm_xsave)) + return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave); + else + return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave); +} + struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); @@ -1096,25 +1159,25 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); list->nmsrs = nmsrs; r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", + r); state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", + r); r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", + r); r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", + r); - r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", - r); + r = vcpu_save_xsave_state(vm, vcpu, state); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", + r); if (kvm_check_cap(KVM_CAP_XCRS)) { r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); @@ -1123,17 +1186,17 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) } r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", + r); if (nested_size) { state->nested.size = sizeof(state->nested_); r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", - r); + r); TEST_ASSERT(state->nested.size <= nested_size, - "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", - state->nested.size, nested_size); + "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", + state->nested.size, nested_size); } else state->nested.size = 0; @@ -1141,12 +1204,12 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) for (i = 0; i < nmsrs; i++) state->msrs.entries[i].index = list->indices[i]; r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", - r, r == nmsrs ? -1 : list->indices[r]); + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", + r, r == nmsrs ? -1 : list->indices[r]); r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", + r); free(list); return state; @@ -1157,9 +1220,14 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s struct vcpu *vcpu = vcpu_find(vm, vcpuid); int r; - r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", - r); + r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); + TEST_ASSERT(r == state->msrs.nmsrs, + "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", + r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); if (kvm_check_cap(KVM_CAP_XCRS)) { r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); @@ -1167,41 +1235,43 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s r); } - r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); - TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", - r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); + r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", + r); if (state->nested.size) { r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", - r); + r); } } -bool is_intel_cpu(void) +void kvm_x86_state_cleanup(struct kvm_x86_state *state) +{ + free(state->xsave); + free(state); +} + +static bool cpu_vendor_string_is(const char *vendor) { + const uint32_t *chunk = (const uint32_t *)vendor; int eax, ebx, ecx, edx; - const uint32_t *chunk; const int leaf = 0; __asm__ __volatile__( @@ -1210,10 +1280,22 @@ bool is_intel_cpu(void) "=c"(ecx), "=d"(edx) : /* input */ "0"(leaf), "2"(0)); - chunk = (const uint32_t *)("GenuineIntel"); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); } +bool is_intel_cpu(void) +{ + return cpu_vendor_string_is("GenuineIntel"); +} + +/* + * Exclude early K5 samples with a vendor string of "AMDisbetter!" + */ +bool is_amd_cpu(void) +{ + return cpu_vendor_string_is("AuthenticAMD"); +} + uint32_t kvm_get_cpuid_max_basic(void) { return kvm_get_supported_cpuid_entry(0)->eax; @@ -1337,6 +1419,23 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) } } +struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function, + uint32_t index) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; + + if (cur->function == function && cur->index == index) + return cur; + } + + TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index); + + return NULL; +} + bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent) { @@ -1432,22 +1531,6 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui return cpuid; } -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 - -static inline unsigned x86_family(unsigned int eax) -{ - unsigned int x86; - - x86 = (eax >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (eax >> 20) & 0xff; - - return x86; -} - unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ @@ -1457,11 +1540,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; /* Avoid reserved HyperTransport region on AMD processors. */ - eax = ecx = 0; - cpuid(&eax, &ebx, &ecx, &edx); - if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx || - ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx || - edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) + if (!is_amd_cpu()) return max_gfn; /* On parts with <40 physical address bits, the area is fully hidden */ @@ -1471,6 +1550,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; eax = 1; + ecx = 0; cpuid(&eax, &ebx, &ecx, &edx); if (x86_family(eax) < 0x17) goto done; diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c new file mode 100644 index 000000000000..52a3ef6629e8 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * amx tests + * + * Copyright (C) 2021, Intel, Inc. + * + * Tests for amx #NM exception and save/restore. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +#define VCPU_ID 0 +#define X86_FEATURE_XSAVE (1 << 26) +#define X86_FEATURE_OSXSAVE (1 << 27) + +#define PAGE_SIZE (1 << 12) +#define NUM_TILES 8 +#define TILE_SIZE 1024 +#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE) + +/* Tile configuration associated: */ +#define MAX_TILES 16 +#define RESERVED_BYTES 14 + +#define XFEATURE_XTILECFG 17 +#define XFEATURE_XTILEDATA 18 +#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG) +#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) +#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) + +#define TILE_CPUID 0x1d +#define XSTATE_CPUID 0xd +#define TILE_PALETTE_CPUID_SUBLEAVE 0x1 +#define XSTATE_USER_STATE_SUBLEAVE 0x0 + +#define XSAVE_HDR_OFFSET 512 + +struct xsave_data { + u8 area[XSAVE_SIZE]; +} __aligned(64); + +struct tile_config { + u8 palette_id; + u8 start_row; + u8 reserved[RESERVED_BYTES]; + u16 colsb[MAX_TILES]; + u8 rows[MAX_TILES]; +}; + +struct tile_data { + u8 data[NUM_TILES * TILE_SIZE]; +}; + +struct xtile_info { + u16 bytes_per_tile; + u16 bytes_per_row; + u16 max_names; + u16 max_rows; + u32 xsave_offset; + u32 xsave_size; +}; + +static struct xtile_info xtile; + +static inline u64 __xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile("xgetbv;" + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void __xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); +} + +static inline void __ldtilecfg(void *cfg) +{ + asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00" + : : "a"(cfg)); +} + +static inline void __tileloadd(void *tile) +{ + asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10" + : : "a"(tile), "d"(0)); +} + +static inline void __tilerelease(void) +{ + asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::); +} + +static inline void __xsavec(struct xsave_data *data, uint64_t rfbm) +{ + uint32_t rfbm_lo = rfbm; + uint32_t rfbm_hi = rfbm >> 32; + + asm volatile("xsavec (%%rdi)" + : : "D" (data), "a" (rfbm_lo), "d" (rfbm_hi) + : "memory"); +} + +static inline void check_cpuid_xsave(void) +{ + uint32_t eax, ebx, ecx, edx; + + eax = 1; + ecx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + if (!(ecx & X86_FEATURE_XSAVE)) + GUEST_ASSERT(!"cpuid: no CPU xsave support!"); + if (!(ecx & X86_FEATURE_OSXSAVE)) + GUEST_ASSERT(!"cpuid: no OS xsave support!"); +} + +static bool check_xsave_supports_xtile(void) +{ + return __xgetbv(0) & XFEATURE_MASK_XTILE; +} + +static bool enum_xtile_config(void) +{ + u32 eax, ebx, ecx, edx; + + eax = TILE_CPUID; + ecx = TILE_PALETTE_CPUID_SUBLEAVE; + + cpuid(&eax, &ebx, &ecx, &edx); + if (!eax || !ebx || !ecx) + return false; + + xtile.max_names = ebx >> 16; + if (xtile.max_names < NUM_TILES) + return false; + + xtile.bytes_per_tile = eax >> 16; + if (xtile.bytes_per_tile < TILE_SIZE) + return false; + + xtile.bytes_per_row = ebx; + xtile.max_rows = ecx; + + return true; +} + +static bool enum_xsave_tile(void) +{ + u32 eax, ebx, ecx, edx; + + eax = XSTATE_CPUID; + ecx = XFEATURE_XTILEDATA; + + cpuid(&eax, &ebx, &ecx, &edx); + if (!eax || !ebx) + return false; + + xtile.xsave_offset = ebx; + xtile.xsave_size = eax; + + return true; +} + +static bool check_xsave_size(void) +{ + u32 eax, ebx, ecx, edx; + bool valid = false; + + eax = XSTATE_CPUID; + ecx = XSTATE_USER_STATE_SUBLEAVE; + + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx && ebx <= XSAVE_SIZE) + valid = true; + + return valid; +} + +static bool check_xtile_info(void) +{ + bool ret = false; + + if (!check_xsave_size()) + return ret; + + if (!enum_xsave_tile()) + return ret; + + if (!enum_xtile_config()) + return ret; + + if (sizeof(struct tile_data) >= xtile.xsave_size) + ret = true; + + return ret; +} + +static void set_tilecfg(struct tile_config *cfg) +{ + int i; + + /* Only palette id 1 */ + cfg->palette_id = 1; + for (i = 0; i < xtile.max_names; i++) { + cfg->colsb[i] = xtile.bytes_per_row; + cfg->rows[i] = xtile.max_rows; + } +} + +static void set_xstatebv(void *data, uint64_t bv) +{ + *(uint64_t *)(data + XSAVE_HDR_OFFSET) = bv; +} + +static u64 get_xstatebv(void *data) +{ + return *(u64 *)(data + XSAVE_HDR_OFFSET); +} + +static void init_regs(void) +{ + uint64_t cr4, xcr0; + + /* turn on CR4.OSXSAVE */ + cr4 = get_cr4(); + cr4 |= X86_CR4_OSXSAVE; + set_cr4(cr4); + + xcr0 = __xgetbv(0); + xcr0 |= XFEATURE_MASK_XTILE; + __xsetbv(0x0, xcr0); +} + +static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, + struct tile_data *tiledata, + struct xsave_data *xsave_data) +{ + init_regs(); + check_cpuid_xsave(); + GUEST_ASSERT(check_xsave_supports_xtile()); + GUEST_ASSERT(check_xtile_info()); + + /* check xtile configs */ + GUEST_ASSERT(xtile.xsave_offset == 2816); + GUEST_ASSERT(xtile.xsave_size == 8192); + GUEST_ASSERT(xtile.max_names == 8); + GUEST_ASSERT(xtile.bytes_per_tile == 1024); + GUEST_ASSERT(xtile.bytes_per_row == 64); + GUEST_ASSERT(xtile.max_rows == 16); + GUEST_SYNC(1); + + /* xfd=0, enable amx */ + wrmsr(MSR_IA32_XFD, 0); + GUEST_SYNC(2); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0); + set_tilecfg(amx_cfg); + __ldtilecfg(amx_cfg); + GUEST_SYNC(3); + /* Check save/restore when trap to userspace */ + __tileloadd(tiledata); + GUEST_SYNC(4); + __tilerelease(); + GUEST_SYNC(5); + /* bit 18 not in the XCOMP_BV after xsavec() */ + set_xstatebv(xsave_data, XFEATURE_MASK_XTILEDATA); + __xsavec(xsave_data, XFEATURE_MASK_XTILEDATA); + GUEST_ASSERT((get_xstatebv(xsave_data) & XFEATURE_MASK_XTILEDATA) == 0); + + /* xfd=0x40000, disable amx tiledata */ + wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILEDATA); + GUEST_SYNC(6); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILEDATA); + set_tilecfg(amx_cfg); + __ldtilecfg(amx_cfg); + /* Trigger #NM exception */ + __tileloadd(tiledata); + GUEST_SYNC(10); + + GUEST_DONE(); +} + +void guest_nm_handler(struct ex_regs *regs) +{ + /* Check if #NM is triggered by XFEATURE_MASK_XTILEDATA */ + GUEST_SYNC(7); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA); + GUEST_SYNC(8); + GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA); + /* Clear xfd_err */ + wrmsr(MSR_IA32_XFD_ERR, 0); + /* xfd=0, enable amx */ + wrmsr(MSR_IA32_XFD, 0); + GUEST_SYNC(9); +} + +int main(int argc, char *argv[]) +{ + struct kvm_cpuid_entry2 *entry; + struct kvm_regs regs1, regs2; + bool amx_supported = false; + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_x86_state *state; + int xsave_restore_size = 0; + vm_vaddr_t amx_cfg, tiledata, xsavedata; + struct ucall uc; + u32 amx_offset; + int stage, ret; + + vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + entry = kvm_get_supported_cpuid_entry(1); + if (!(entry->ecx & X86_FEATURE_XSAVE)) { + print_skip("XSAVE feature not supported"); + exit(KSFT_SKIP); + } + + if (kvm_get_cpuid_max_basic() >= 0xd) { + entry = kvm_get_supported_cpuid_index(0xd, 0); + amx_supported = entry && !!(entry->eax & XFEATURE_MASK_XTILE); + if (!amx_supported) { + print_skip("AMX is not supported by the vCPU (eax=0x%x)", entry->eax); + exit(KSFT_SKIP); + } + /* Get xsave/restore max size */ + xsave_restore_size = entry->ecx; + } + + run = vcpu_state(vm, VCPU_ID); + vcpu_regs_get(vm, VCPU_ID, ®s1); + + /* Register #NM handler */ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler); + + /* amx cfg for guest_code */ + amx_cfg = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize()); + + /* amx tiledata for guest_code */ + tiledata = vm_vaddr_alloc_pages(vm, 2); + memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize()); + + /* xsave data for guest_code */ + xsavedata = vm_vaddr_alloc_pages(vm, 3); + memset(addr_gva2hva(vm, xsavedata), 0, 3 * getpagesize()); + vcpu_args_set(vm, VCPU_ID, 3, amx_cfg, tiledata, xsavedata); + + for (stage = 1; ; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + switch (uc.args[1]) { + case 1: + case 2: + case 3: + case 5: + case 6: + case 7: + case 8: + fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]); + break; + case 4: + case 10: + fprintf(stderr, + "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]); + + /* Compacted mode, get amx offset by xsave area + * size subtract 8K amx size. + */ + amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE; + state = vcpu_save_state(vm, VCPU_ID); + void *amx_start = (void *)state->xsave + amx_offset; + void *tiles_data = (void *)addr_gva2hva(vm, tiledata); + /* Only check TMM0 register, 1 tile */ + ret = memcmp(amx_start, tiles_data, TILE_SIZE); + TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret); + kvm_x86_state_cleanup(state); + break; + case 9: + fprintf(stderr, + "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]); + break; + } + break; + case UCALL_DONE: + fprintf(stderr, "UCALL_DONE\n"); + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + state = vcpu_save_state(vm, VCPU_ID); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_load_state(vm, VCPU_ID, state); + run = vcpu_state(vm, VCPU_ID); + kvm_x86_state_cleanup(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vm, VCPU_ID, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index a711f83749ea..16d2465c5634 100644 --- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -154,6 +154,34 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct return guest_cpuids; } +static void set_cpuid_after_run(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid) +{ + struct kvm_cpuid_entry2 *ent; + int rc; + u32 eax, ebx, x; + + /* Setting unmodified CPUID is allowed */ + rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid); + TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); + + /* Changing CPU features is forbidden */ + ent = get_cpuid(cpuid, 0x7, 0); + ebx = ent->ebx; + ent->ebx--; + rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid); + TEST_ASSERT(rc, "Changing CPU features should fail"); + ent->ebx = ebx; + + /* Changing MAXPHYADDR is forbidden */ + ent = get_cpuid(cpuid, 0x80000008, 0); + eax = ent->eax; + x = eax & 0xff; + ent->eax = (eax & ~0xffu) | (x - 1); + rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid); + TEST_ASSERT(rc, "Changing MAXPHYADDR should fail"); + ent->eax = eax; +} + int main(void) { struct kvm_cpuid2 *supp_cpuid, *cpuid2; @@ -175,5 +203,7 @@ int main(void) for (stage = 0; stage < 3; stage++) run_vcpu(vm, VCPU_ID, stage); + set_cpuid_after_run(vm, cpuid2); + kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 2b46dcca86a8..4c7841dfd481 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -129,7 +129,7 @@ static void save_restore_vm(struct kvm_vm *vm) vcpu_set_hv_cpuid(vm, VCPU_ID); vcpu_enable_evmcs(vm, VCPU_ID); vcpu_load_state(vm, VCPU_ID, state); - free(state); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); vcpu_regs_get(vm, VCPU_ID, ®s2); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c new file mode 100644 index 000000000000..c715adcbd487 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for x86 KVM_SET_PMU_EVENT_FILTER. + * + * Copyright (C) 2022, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies the expected behavior of allow lists and deny lists for + * virtual PMU events. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +/* + * In lieu of copying perf_event.h into tools... + */ +#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) +#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +union cpuid10_ebx { + struct { + unsigned int no_unhalted_core_cycles:1; + unsigned int no_instructions_retired:1; + unsigned int no_unhalted_reference_cycles:1; + unsigned int no_llc_reference:1; + unsigned int no_llc_misses:1; + unsigned int no_branch_instruction_retired:1; + unsigned int no_branch_misses_retired:1; + } split; + unsigned int full; +}; + +/* End of stuff taken from perf_event.h. */ + +/* Oddly, this isn't in perf_event.h. */ +#define ARCH_PERFMON_BRANCHES_RETIRED 5 + +#define VCPU_ID 0 +#define NUM_BRANCHES 42 + +/* + * This is how the event selector and unit mask are stored in an AMD + * core performance event-select register. Intel's format is similar, + * but the event selector is only 8 bits. + */ +#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \ + (umask & 0xff) << 8) + +/* + * "Branch instructions retired", from the Intel SDM, volume 3, + * "Pre-defined Architectural Performance Events." + */ + +#define INTEL_BR_RETIRED EVENT(0xc4, 0) + +/* + * "Retired branch instructions", from Processor Programming Reference + * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, + * Preliminary Processor Programming Reference (PPR) for AMD Family + * 17h Model 31h, Revision B0 Processors, and Preliminary Processor + * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision + * B1 Processors Volume 1 of 2. + */ + +#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0) + +/* + * This event list comprises Intel's eight architectural events plus + * AMD's "retired branch instructions" for Zen[123] (and possibly + * other AMD CPUs). + */ +static const uint64_t event_list[] = { + EVENT(0x3c, 0), + EVENT(0xc0, 0), + EVENT(0x3c, 1), + EVENT(0x2e, 0x4f), + EVENT(0x2e, 0x41), + EVENT(0xc4, 0), + EVENT(0xc5, 0), + EVENT(0xa4, 1), + AMD_ZEN_BR_RETIRED, +}; + +/* + * If we encounter a #GP during the guest PMU sanity check, then the guest + * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0). + */ +static void guest_gp_handler(struct ex_regs *regs) +{ + GUEST_SYNC(0); +} + +/* + * Check that we can write a new value to the given MSR and read it back. + * The caller should provide a non-empty set of bits that are safe to flip. + * + * Return on success. GUEST_SYNC(0) on error. + */ +static void check_msr(uint32_t msr, uint64_t bits_to_flip) +{ + uint64_t v = rdmsr(msr) ^ bits_to_flip; + + wrmsr(msr, v); + if (rdmsr(msr) != v) + GUEST_SYNC(0); + + v ^= bits_to_flip; + wrmsr(msr, v); + if (rdmsr(msr) != v) + GUEST_SYNC(0); +} + +static void intel_guest_code(void) +{ + check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1); + check_msr(MSR_P6_EVNTSEL0, 0xffff); + check_msr(MSR_IA32_PMC0, 0xffff); + GUEST_SYNC(1); + + for (;;) { + uint64_t br0, br1; + + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1); + br0 = rdmsr(MSR_IA32_PMC0); + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + br1 = rdmsr(MSR_IA32_PMC0); + GUEST_SYNC(br1 - br0); + } +} + +/* + * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23], + * this code uses the always-available, legacy K7 PMU MSRs, which alias to + * the first four of the six extended core PMU MSRs. + */ +static void amd_guest_code(void) +{ + check_msr(MSR_K7_EVNTSEL0, 0xffff); + check_msr(MSR_K7_PERFCTR0, 0xffff); + GUEST_SYNC(1); + + for (;;) { + uint64_t br0, br1; + + wrmsr(MSR_K7_EVNTSEL0, 0); + wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED); + br0 = rdmsr(MSR_K7_PERFCTR0); + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + br1 = rdmsr(MSR_K7_PERFCTR0); + GUEST_SYNC(br1 - br0); + } +} + +/* + * Run the VM to the next GUEST_SYNC(value), and return the value passed + * to the sync. Any other exit from the guest is fatal. + */ +static uint64_t run_vm_to_sync(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + get_ucall(vm, VCPU_ID, &uc); + TEST_ASSERT(uc.cmd == UCALL_SYNC, + "Received ucall other than UCALL_SYNC: %lu", uc.cmd); + return uc.args[1]; +} + +/* + * In a nested environment or if the vPMU is disabled, the guest PMU + * might not work as architected (accessing the PMU MSRs may raise + * #GP, or writes could simply be discarded). In those situations, + * there is no point in running these tests. The guest code will perform + * a sanity check and then GUEST_SYNC(success). In the case of failure, + * the behavior of the guest on resumption is undefined. + */ +static bool sanity_check_pmu(struct kvm_vm *vm) +{ + bool success; + + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + success = run_vm_to_sync(vm); + vm_install_exception_handler(vm, GP_VECTOR, NULL); + + return success; +} + +static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents) +{ + struct kvm_pmu_event_filter *f; + int size = sizeof(*f) + nevents * sizeof(f->events[0]); + + f = malloc(size); + TEST_ASSERT(f, "Out of memory"); + memset(f, 0, size); + f->nevents = nevents; + return f; +} + +static struct kvm_pmu_event_filter *event_filter(uint32_t action) +{ + struct kvm_pmu_event_filter *f; + int i; + + f = make_pmu_event_filter(ARRAY_SIZE(event_list)); + f->action = action; + for (i = 0; i < ARRAY_SIZE(event_list); i++) + f->events[i] = event_list[i]; + + return f; +} + +/* + * Remove the first occurrence of 'event' (if any) from the filter's + * event list. + */ +static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, + uint64_t event) +{ + bool found = false; + int i; + + for (i = 0; i < f->nevents; i++) { + if (found) + f->events[i - 1] = f->events[i]; + else + found = f->events[i] == event; + } + if (found) + f->nevents--; + return f; +} + +static void test_without_filter(struct kvm_vm *vm) +{ + uint64_t count = run_vm_to_sync(vm); + + if (count != NUM_BRANCHES) + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", + __func__, count, NUM_BRANCHES); + TEST_ASSERT(count, "Allowed PMU event is not counting"); +} + +static uint64_t test_with_filter(struct kvm_vm *vm, + struct kvm_pmu_event_filter *f) +{ + vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f); + return run_vm_to_sync(vm); +} + +static void test_member_deny_list(struct kvm_vm *vm) +{ + struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + uint64_t count = test_with_filter(vm, f); + + free(f); + if (count) + pr_info("%s: Branch instructions retired = %lu (expected 0)\n", + __func__, count); + TEST_ASSERT(!count, "Disallowed PMU Event is counting"); +} + +static void test_member_allow_list(struct kvm_vm *vm) +{ + struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + uint64_t count = test_with_filter(vm, f); + + free(f); + if (count != NUM_BRANCHES) + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", + __func__, count, NUM_BRANCHES); + TEST_ASSERT(count, "Allowed PMU event is not counting"); +} + +static void test_not_member_deny_list(struct kvm_vm *vm) +{ + struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + uint64_t count; + + remove_event(f, INTEL_BR_RETIRED); + remove_event(f, AMD_ZEN_BR_RETIRED); + count = test_with_filter(vm, f); + free(f); + if (count != NUM_BRANCHES) + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", + __func__, count, NUM_BRANCHES); + TEST_ASSERT(count, "Allowed PMU event is not counting"); +} + +static void test_not_member_allow_list(struct kvm_vm *vm) +{ + struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + uint64_t count; + + remove_event(f, INTEL_BR_RETIRED); + remove_event(f, AMD_ZEN_BR_RETIRED); + count = test_with_filter(vm, f); + free(f); + if (count) + pr_info("%s: Branch instructions retired = %lu (expected 0)\n", + __func__, count); + TEST_ASSERT(!count, "Disallowed PMU Event is counting"); +} + +/* + * Check for a non-zero PMU version, at least one general-purpose + * counter per logical processor, an EBX bit vector of length greater + * than 5, and EBX[5] clear. + */ +static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry) +{ + union cpuid10_eax eax = { .full = entry->eax }; + union cpuid10_ebx ebx = { .full = entry->ebx }; + + return eax.split.version_id && eax.split.num_counters > 0 && + eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED && + !ebx.split.no_branch_instruction_retired; +} + +/* + * Note that CPUID leaf 0xa is Intel-specific. This leaf should be + * clear on AMD hardware. + */ +static bool use_intel_pmu(void) +{ + struct kvm_cpuid_entry2 *entry; + + entry = kvm_get_supported_cpuid_index(0xa, 0); + return is_intel_cpu() && entry && check_intel_pmu_leaf(entry); +} + +static bool is_zen1(uint32_t eax) +{ + return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f; +} + +static bool is_zen2(uint32_t eax) +{ + return x86_family(eax) == 0x17 && + x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f; +} + +static bool is_zen3(uint32_t eax) +{ + return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f; +} + +/* + * Determining AMD support for a PMU event requires consulting the AMD + * PPR for the CPU or reference material derived therefrom. The AMD + * test code herein has been verified to work on Zen1, Zen2, and Zen3. + * + * Feel free to add more AMD CPUs that are documented to support event + * select 0xc2 umask 0 as "retired branch instructions." + */ +static bool use_amd_pmu(void) +{ + struct kvm_cpuid_entry2 *entry; + + entry = kvm_get_supported_cpuid_index(1, 0); + return is_amd_cpu() && entry && + (is_zen1(entry->eax) || + is_zen2(entry->eax) || + is_zen3(entry->eax)); +} + +int main(int argc, char *argv[]) +{ + void (*guest_code)(void) = NULL; + struct kvm_vm *vm; + int r; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER); + if (!r) { + print_skip("KVM_CAP_PMU_EVENT_FILTER not supported"); + exit(KSFT_SKIP); + } + + if (use_intel_pmu()) + guest_code = intel_guest_code; + else if (use_amd_pmu()) + guest_code = amd_guest_code; + + if (!guest_code) { + print_skip("Don't know how to test this guest PMU"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + if (!sanity_check_pmu(vm)) { + print_skip("Guest PMU is not functional"); + exit(KSFT_SKIP); + } + + test_without_filter(vm); + test_member_deny_list(vm); + test_member_allow_list(vm); + test_not_member_deny_list(vm); + test_not_member_allow_list(vm); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index 29b18d565cf4..80056bbbb003 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -21,7 +21,7 @@ #define NR_LOCK_TESTING_THREADS 3 #define NR_LOCK_TESTING_ITERATIONS 10000 -static void sev_ioctl(int vm_fd, int cmd_id, void *data) +static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error) { struct kvm_sev_cmd cmd = { .id = cmd_id, @@ -31,9 +31,19 @@ static void sev_ioctl(int vm_fd, int cmd_id, void *data) int ret; ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd); - TEST_ASSERT((ret == 0 || cmd.error == SEV_RET_SUCCESS), + *fw_error = cmd.error; + return ret; +} + +static void sev_ioctl(int vm_fd, int cmd_id, void *data) +{ + int ret; + __u32 fw_error; + + ret = __sev_ioctl(vm_fd, cmd_id, data, &fw_error); + TEST_ASSERT(ret == 0 && fw_error == SEV_RET_SUCCESS, "%d failed: return code: %d, errno: %d, fw error: %d", - cmd_id, ret, errno, cmd.error); + cmd_id, ret, errno, fw_error); } static struct kvm_vm *sev_vm_create(bool es) @@ -225,12 +235,45 @@ static void sev_mirror_create(int dst_fd, int src_fd) TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno); } +static void verify_mirror_allowed_cmds(int vm_fd) +{ + struct kvm_sev_guest_status status; + + for (int cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) { + int ret; + __u32 fw_error; + + /* + * These commands are allowed for mirror VMs, all others are + * not. + */ + switch (cmd_id) { + case KVM_SEV_LAUNCH_UPDATE_VMSA: + case KVM_SEV_GUEST_STATUS: + case KVM_SEV_DBG_DECRYPT: + case KVM_SEV_DBG_ENCRYPT: + continue; + default: + break; + } + + /* + * These commands should be disallowed before the data + * parameter is examined so NULL is OK here. + */ + ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able call command: %d. ret: %d, errno: %d\n", + cmd_id, ret, errno); + } + + sev_ioctl(vm_fd, KVM_SEV_GUEST_STATUS, &status); +} + static void test_sev_mirror(bool es) { struct kvm_vm *src_vm, *dst_vm; - struct kvm_sev_launch_start start = { - .policy = es ? SEV_POLICY_ES : 0 - }; int i; src_vm = sev_vm_create(es); @@ -241,10 +284,12 @@ static void test_sev_mirror(bool es) /* Check that we can complete creation of the mirror VM. */ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) vm_vcpu_add(dst_vm, i); - sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_START, &start); + if (es) sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + verify_mirror_allowed_cmds(dst_vm->fd); + kvm_vm_free(src_vm); kvm_vm_free(dst_vm); } diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index d0fe2fdce58c..a626d40fdb48 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -105,7 +105,6 @@ static void guest_code(void *arg) if (cpu_has_svm()) { run_guest(svm->vmcb, svm->vmcb_gpa); - svm->vmcb->save.rip += 3; run_guest(svm->vmcb, svm->vmcb_gpa); } else { vmlaunch(); @@ -212,7 +211,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - free(state); + kvm_x86_state_cleanup(state); } done: diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 32854c1462ad..2e0a92da8ff5 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -218,7 +218,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - free(state); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); vcpu_regs_get(vm, VCPU_ID, ®s2); diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index 5a6a662f2e59..a426078b16a3 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -77,8 +77,8 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) switch (get_ucall(vm, vcpuid, &uc)) { case UCALL_SYNC: TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", - stage + 1, (ulong)uc.args[1]); + uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); return; case UCALL_DONE: return; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index 2835a17f1b7a..edac8839e717 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -30,8 +30,8 @@ static struct kvm_vm *vm; static void l2_guest_code(void) { /* Exit to L0 */ - asm volatile("inb %%dx, %%al" - : : [port] "d" (PORT_L0_EXIT) : "rax"); + asm volatile("inb %%dx, %%al" + : : [port] "d" (PORT_L0_EXIT) : "rax"); } static void l1_guest_code(struct vmx_pages *vmx_pages) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c new file mode 100644 index 000000000000..27a850f3d7ce --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#include <signal.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/time.h> + +#include "kselftest.h" + +#define VCPU_ID 0 + +static struct kvm_vm *vm; + +static void guest_ud_handler(struct ex_regs *regs) +{ + /* Loop on the ud2 until guest state is made invalid. */ +} + +static void guest_code(void) +{ + asm volatile("ud2"); +} + +static void __run_vcpu_with_invalid_state(void) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, + "Expected KVM_EXIT_INTERNAL_ERROR, got %d (%s)\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, + "Expected emulation failure, got %d\n", + run->emulation_failure.suberror); +} + +static void run_vcpu_with_invalid_state(void) +{ + /* + * Always run twice to verify KVM handles the case where _KVM_ queues + * an exception with invalid state and then exits to userspace, i.e. + * that KVM doesn't explode if userspace ignores the initial error. + */ + __run_vcpu_with_invalid_state(); + __run_vcpu_with_invalid_state(); +} + +static void set_timer(void) +{ + struct itimerval timer; + + timer.it_value.tv_sec = 0; + timer.it_value.tv_usec = 200; + timer.it_interval = timer.it_value; + ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); +} + +static void set_or_clear_invalid_guest_state(bool set) +{ + static struct kvm_sregs sregs; + + if (!sregs.cr0) + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.tr.unusable = !!set; + vcpu_sregs_set(vm, VCPU_ID, &sregs); +} + +static void set_invalid_guest_state(void) +{ + set_or_clear_invalid_guest_state(true); +} + +static void clear_invalid_guest_state(void) +{ + set_or_clear_invalid_guest_state(false); +} + +static void sigalrm_handler(int sig) +{ + struct kvm_vcpu_events events; + + TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig); + + vcpu_events_get(vm, VCPU_ID, &events); + + /* + * If an exception is pending, attempt KVM_RUN with invalid guest, + * otherwise rearm the timer and keep doing so until the timer fires + * between KVM queueing an exception and re-entering the guest. + */ + if (events.exception.pending) { + set_invalid_guest_state(); + run_vcpu_with_invalid_state(); + } else { + set_timer(); + } +} + +int main(int argc, char *argv[]) +{ + if (!is_intel_cpu() || vm_is_unrestricted_guest(NULL)) { + print_skip("Must be run with kvm_intel.unrestricted_guest=0"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, (void *)guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + + /* + * Stuff invalid guest state for L2 by making TR unusuable. The next + * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support + * emulating invalid guest state for L2. + */ + set_invalid_guest_state(); + run_vcpu_with_invalid_state(); + + /* + * Verify KVM also handles the case where userspace gains control while + * an exception is pending and stuffs invalid state. Run with valid + * guest state and a timer firing every 200us, and attempt to enter the + * guest with invalid state when the handler interrupts KVM with an + * exception pending. + */ + clear_invalid_guest_state(); + TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR, + "Failed to register SIGALRM handler, errno = %d (%s)", + errno, strerror(errno)); + + set_timer(); + run_vcpu_with_invalid_state(); +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index a07480aed397..ff92e25b6f1e 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -244,7 +244,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); - free(state); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); vcpu_regs_get(vm, VCPU_ID, ®s2); diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index a0699f00b3d6..865e17146815 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -14,6 +14,9 @@ #include <stdint.h> #include <time.h> #include <sched.h> +#include <signal.h> + +#include <sys/eventfd.h> #define VCPU_ID 5 @@ -22,10 +25,15 @@ #define SHINFO_REGION_SLOT 10 #define PAGE_SIZE 4096 +#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE)) +#define DUMMY_REGION_SLOT 11 + +#define SHINFO_ADDR (SHINFO_REGION_GPA) #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) +#define SHINFO_VADDR (SHINFO_REGION_GVA) #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) @@ -38,20 +46,20 @@ static struct kvm_vm *vm; #define MIN_STEAL_TIME 50000 struct pvclock_vcpu_time_info { - u32 version; - u32 pad0; - u64 tsc_timestamp; - u64 system_time; - u32 tsc_to_system_mul; - s8 tsc_shift; - u8 flags; - u8 pad[2]; + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; } __attribute__((__packed__)); /* 32 bytes */ struct pvclock_wall_clock { - u32 version; - u32 sec; - u32 nsec; + u32 version; + u32 sec; + u32 nsec; } __attribute__((__packed__)); struct vcpu_runstate_info { @@ -66,22 +74,44 @@ struct arch_vcpu_info { }; struct vcpu_info { - uint8_t evtchn_upcall_pending; - uint8_t evtchn_upcall_mask; - unsigned long evtchn_pending_sel; - struct arch_vcpu_info arch; - struct pvclock_vcpu_time_info time; + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; + struct pvclock_vcpu_time_info time; }; /* 64 bytes (x86) */ +struct shared_info { + struct vcpu_info vcpu_info[32]; + unsigned long evtchn_pending[64]; + unsigned long evtchn_mask[64]; + struct pvclock_wall_clock wc; + uint32_t wc_sec_hi; + /* arch_shared_info here */ +}; + #define RUNSTATE_running 0 #define RUNSTATE_runnable 1 #define RUNSTATE_blocked 2 #define RUNSTATE_offline 3 +static const char *runstate_names[] = { + "running", + "runnable", + "blocked", + "offline" +}; + +struct { + struct kvm_irq_routing info; + struct kvm_irq_routing_entry entries[2]; +} irq_routes; + static void evtchn_handler(struct ex_regs *regs) { struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; vi->evtchn_upcall_pending = 0; + vi->evtchn_pending_sel = 0; GUEST_SYNC(0x20); } @@ -127,7 +157,25 @@ static void guest_code(void) GUEST_SYNC(6); GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); - GUEST_DONE(); + /* Attempt to deliver a *masked* interrupt */ + GUEST_SYNC(7); + + /* Wait until we see the bit set */ + struct shared_info *si = (void *)SHINFO_VADDR; + while (!si->evtchn_pending[0]) + __asm__ __volatile__ ("rep nop" : : : "memory"); + + /* Now deliver an *unmasked* interrupt */ + GUEST_SYNC(8); + + while (!si->evtchn_pending[1]) + __asm__ __volatile__ ("rep nop" : : : "memory"); + + /* Change memslots and deliver an interrupt */ + GUEST_SYNC(9); + + for (;;) + __asm__ __volatile__ ("rep nop" : : : "memory"); } static int cmp_timespec(struct timespec *a, struct timespec *b) @@ -144,9 +192,18 @@ static int cmp_timespec(struct timespec *a, struct timespec *b) return 0; } +static void handle_alrm(int sig) +{ + TEST_FAIL("IRQ delivery timed out"); +} + int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; + bool verbose; + + verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || + !strncmp(argv[1], "--verbose", 10)); int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { @@ -155,6 +212,7 @@ int main(int argc, char *argv[]) } bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); + bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); clock_gettime(CLOCK_REALTIME, &min_ts); @@ -166,6 +224,11 @@ int main(int argc, char *argv[]) SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); + struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); + + int zero_fd = open("/dev/zero", O_RDONLY); + TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); + struct kvm_xen_hvm_config hvmc = { .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, .msr = XEN_HYPERCALL_MSR, @@ -184,6 +247,16 @@ int main(int argc, char *argv[]) }; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); + /* + * Test what happens when the HVA of the shinfo page is remapped after + * the kernel has a reference to it. But make sure we copy the clock + * info over since that's only set at setup time, and we test it later. + */ + struct pvclock_wall_clock wc_copy = shinfo->wc; + void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); + TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); + shinfo->wc = wc_copy; + struct kvm_xen_vcpu_attr vi = { .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, .u.gpa = VCPU_INFO_ADDR, @@ -214,6 +287,49 @@ int main(int argc, char *argv[]) vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); } + int irq_fd[2] = { -1, -1 }; + + if (do_eventfd_tests) { + irq_fd[0] = eventfd(0, 0); + irq_fd[1] = eventfd(0, 0); + + /* Unexpected, but not a KVM failure */ + if (irq_fd[0] == -1 || irq_fd[1] == -1) + do_eventfd_tests = false; + } + + if (do_eventfd_tests) { + irq_routes.info.nr = 2; + + irq_routes.entries[0].gsi = 32; + irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; + irq_routes.entries[0].u.xen_evtchn.port = 15; + irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID; + irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; + + irq_routes.entries[1].gsi = 33; + irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; + irq_routes.entries[1].u.xen_evtchn.port = 66; + irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID; + irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; + + vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes); + + struct kvm_irqfd ifd = { }; + + ifd.fd = irq_fd[0]; + ifd.gsi = 32; + vm_ioctl(vm, KVM_IRQFD, &ifd); + + ifd.fd = irq_fd[1]; + ifd.gsi = 33; + vm_ioctl(vm, KVM_IRQFD, &ifd); + + struct sigaction sa = { }; + sa.sa_handler = handle_alrm; + sigaction(SIGALRM, &sa, NULL); + } + struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); vinfo->evtchn_upcall_pending = 0; @@ -248,6 +364,8 @@ int main(int argc, char *argv[]) switch (uc.args[1]) { case 0: + if (verbose) + printf("Delivering evtchn upcall\n"); evtchn_irq_expected = true; vinfo->evtchn_upcall_pending = 1; break; @@ -256,11 +374,16 @@ int main(int argc, char *argv[]) TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); if (!do_runstate_tests) goto done; + if (verbose) + printf("Testing runstate %s\n", runstate_names[uc.args[1]]); rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; rst.u.runstate.state = uc.args[1]; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); break; + case 4: + if (verbose) + printf("Testing RUNSTATE_ADJUST\n"); rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; memset(&rst.u, 0, sizeof(rst.u)); rst.u.runstate.state = (uint64_t)-1; @@ -274,6 +397,8 @@ int main(int argc, char *argv[]) break; case 5: + if (verbose) + printf("Testing RUNSTATE_DATA\n"); rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; memset(&rst.u, 0, sizeof(rst.u)); rst.u.runstate.state = RUNSTATE_running; @@ -282,16 +407,54 @@ int main(int argc, char *argv[]) rst.u.runstate.time_offline = 0x5a; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); break; + case 6: + if (verbose) + printf("Testing steal time\n"); /* Yield until scheduler delay exceeds target */ rundelay = get_run_delay() + MIN_STEAL_TIME; do { sched_yield(); } while (get_run_delay() < rundelay); break; + + case 7: + if (!do_eventfd_tests) + goto done; + if (verbose) + printf("Testing masked event channel\n"); + shinfo->evtchn_mask[0] = 0x8000; + eventfd_write(irq_fd[0], 1UL); + alarm(1); + break; + + case 8: + if (verbose) + printf("Testing unmasked event channel\n"); + /* Unmask that, but deliver the other one */ + shinfo->evtchn_pending[0] = 0; + shinfo->evtchn_mask[0] = 0; + eventfd_write(irq_fd[1], 1UL); + evtchn_irq_expected = true; + alarm(1); + break; + + case 9: + if (verbose) + printf("Testing event channel after memslot change\n"); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); + eventfd_write(irq_fd[0], 1UL); + evtchn_irq_expected = true; + alarm(1); + break; + case 0x20: TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); evtchn_irq_expected = false; + if (shinfo->evtchn_pending[1] && + shinfo->evtchn_pending[0]) + goto done; break; } break; @@ -318,9 +481,19 @@ int main(int argc, char *argv[]) ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); ti2 = addr_gpa2hva(vm, PVTIME_ADDR); + if (verbose) { + printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); + printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", + ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, + ti->tsc_shift, ti->flags); + printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", + ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, + ti2->tsc_shift, ti2->flags); + } + vm_ts.tv_sec = wc->sec; vm_ts.tv_nsec = wc->nsec; - TEST_ASSERT(wc->version && !(wc->version & 1), + TEST_ASSERT(wc->version && !(wc->version & 1), "Bad wallclock version %x", wc->version); TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); @@ -341,6 +514,15 @@ int main(int argc, char *argv[]) }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); + if (verbose) { + printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", + rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", + rs->state, rs->state_entry_time); + for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { + printf("State %s: %" PRIu64 " ns\n", + runstate_names[i], rs->time[i]); + } + } TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, "State entry time mismatch"); diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh index 1b4d95d575f8..14fedeef762e 100755 --- a/tools/testing/selftests/lkdtm/stack-entropy.sh +++ b/tools/testing/selftests/lkdtm/stack-entropy.sh @@ -4,13 +4,27 @@ # Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test. set -e samples="${1:-1000}" +TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT +KSELFTEST_SKIP_TEST=4 + +# Verify we have LKDTM available in the kernel. +if [ ! -r $TRIGGER ] ; then + /sbin/modprobe -q lkdtm || true + if [ ! -r $TRIGGER ] ; then + echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)" + else + echo "Cannot write $TRIGGER (need to run as root?)" + fi + # Skip this test + exit $KSELFTEST_SKIP_TEST +fi # Capture dmesg continuously since it may fill up depending on sample size. log=$(mktemp -t stack-entropy-XXXXXX) dmesg --follow >"$log" & pid=$! report=-1 for i in $(seq 1 $samples); do - echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT + echo "REPORT_STACK" > $TRIGGER if [ -t 1 ]; then percent=$(( 100 * $i / $samples )) if [ "$percent" -ne "$report" ]; then diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 192a2899bae8..94df2692e6e4 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -455,6 +455,7 @@ static void mfd_fail_write(int fd) printf("mmap()+mprotect() didn't fail as expected\n"); abort(); } + munmap(p, mfd_def_size); } /* verify PUNCH_HOLE fails */ diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e54106643337..4c88238fc8f0 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -207,15 +207,21 @@ TEST(check_file_mmap) errno = 0; fd = open(".", O_TMPFILE | O_RDWR, 0600); - ASSERT_NE(-1, fd) { - TH_LOG("Can't create temporary file: %s", - strerror(errno)); + if (fd < 0) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Can't create temporary file: %s", + strerror(errno)); + } + SKIP(goto out_free, "O_TMPFILE not supported by filesystem."); } errno = 0; retval = fallocate(fd, 0, 0, FILE_SIZE); - ASSERT_EQ(0, retval) { - TH_LOG("Error allocating space for the temporary file: %s", - strerror(errno)); + if (retval) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Error allocating space for the temporary file: %s", + strerror(errno)); + } + SKIP(goto out_close, "fallocate not supported by filesystem."); } /* @@ -271,7 +277,9 @@ TEST(check_file_mmap) } munmap(addr, FILE_SIZE); +out_close: close(fd); +out_free: free(vec); } diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index f31205f04ee0..8c5fea68ae67 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -1236,7 +1236,7 @@ static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long } /** - * Validate that an attached mount in our mount namespace can be idmapped. + * Validate that an attached mount in our mount namespace cannot be idmapped. * (The kernel enforces that the mount's mount namespace and the caller's mount * namespace match.) */ @@ -1259,7 +1259,7 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) attr.userns_fd = get_userns_fd(0, 10000, 10000); ASSERT_GE(attr.userns_fd, 0); - ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 412d85205546..3f4c8cfe7aca 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4059,6 +4059,9 @@ usage: ${0##*/} OPTS -p Pause on fail -P Pause after each test -v Be verbose + +Tests: + $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER EOF } diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index 8f6997d35816..d9d1d4190126 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -240,11 +240,8 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, *p += sizeof(__u32); } - if (ioam6h->type.bit6) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) - return 1; + if (ioam6h->type.bit6) *p += sizeof(__u32); - } if (ioam6h->type.bit7) { if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 2674ba20d524..ff821025d309 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -71,6 +71,36 @@ chk_msk_remote_key_nr() __chk_nr "grep -c remote_key" $* } +# $1: ns, $2: port +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +wait_connected() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec ${listener_ns} grep -q " 0100007F:${port_hex} " /proc/net/tcp && break + sleep 0.1 + done +} trap cleanup EXIT ip netns add $ns @@ -81,15 +111,15 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ 0.0.0.0 >/dev/null & -sleep 0.1 +wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -j -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \ 127.0.0.1 >/dev/null & -sleep 0.1 +wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" @@ -101,13 +131,13 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ 0.0.0.0 >/dev/null & -sleep 0.1 +wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -j -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \ 127.0.0.1 >/dev/null & -sleep 0.1 +wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" flush_pids @@ -119,7 +149,7 @@ for I in `seq 1 $NR_CLIENTS`; do ./mptcp_connect -p $((I+10001)) -l -w 10 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done -sleep 0.1 +wait_local_port_listen $ns $((NR_CLIENTS + 10001)) for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index cb5809b89081..f0f4ab96b8f3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -763,8 +763,8 @@ run_tests_disconnect() run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin" # restore previous status - cout=$old_cout - cout_disconnect="$cout".disconnect + sin=$old_sin + sin_disconnect="$cout".disconnect cin=$old_cin cin_disconnect="$cin".disconnect connect_per_transfer=1 diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 27d0eb9afdca..0c8a2a20b96c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -75,6 +75,7 @@ init() # let $ns2 reach any $ns1 address from any interface ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i done } @@ -659,6 +660,7 @@ chk_join_nr() local ack_nr=$4 local count local dump_stats + local with_cookie printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn" count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'` @@ -672,12 +674,20 @@ chk_join_nr() fi echo -n " - synack" + with_cookie=`ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies` count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$syn_ack_nr" ]; then - echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" - ret=1 - dump_stats=1 + # simult connections exceeding the limit with cookie enabled could go up to + # synack validation as the conn limit can be enforced reliably only after + # the subflow creation + if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then + echo -n "[ ok ]" + else + echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" + ret=1 + dump_stats=1 + fi else echo -n "[ ok ]" fi @@ -751,11 +761,17 @@ chk_add_nr() local mis_ack_nr=${8:-0} local count local dump_stats + local timeout + + timeout=`ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout` printf "%-39s %s" " " "add" - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'` + count=`ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}'` [ -z "$count" ] && count=0 - if [ "$count" != "$add_nr" ]; then + + # if the test configured a short timeout tolerate greater then expected + # add addrs options, due to retransmissions + if [ "$count" != "$add_nr" ] && [ "$timeout" -gt 1 -o "$count" -lt "$add_nr" ]; then echo "[fail] got $count ADD_ADDR[s] expected $add_nr" ret=1 dump_stats=1 @@ -960,7 +976,7 @@ wait_for_tw() local ns=$1 while [ $time -lt $timeout_ms ]; do - local cnt=$(ip netns exec $ns ss -t state time-wait |wc -l) + local cnt=$(ip netns exec $ns nstat -as TcpAttemptFails | grep TcpAttemptFails | awk '{print $2}') [ "$cnt" = 1 ] && return 1 time=$((time + 100)) @@ -1157,7 +1173,11 @@ signal_address_tests() ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal - run_tests $ns1 $ns2 10.0.1.1 + + # the peer could possibly miss some addr notification, allow retransmission + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr "signal addresses race test" 3 3 3 # the server will not signal the address terminating # the MPC subflow @@ -1476,7 +1496,7 @@ ipv6_tests() reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow chk_join_nr "single subflow IPv6" 1 1 1 @@ -1511,7 +1531,7 @@ ipv6_tests() ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow chk_join_nr "remove subflow and signal IPv6" 2 2 2 chk_add_nr 1 1 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 543ad7513a8e..694732e4b344 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -374,6 +374,16 @@ run_cmd() { return $rc } +run_cmd_bg() { + cmd="$*" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: %s &\n" "${cmd}" + fi + + $cmd 2>&1 & +} + # Find the auto-generated name for this namespace nsname() { eval echo \$NS_$1 @@ -670,10 +680,10 @@ setup_nettest_xfrm() { [ ${1} -eq 6 ] && proto="-6" || proto="" port=${2} - run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5 nettest_pids="${nettest_pids} $!" - run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5 nettest_pids="${nettest_pids} $!" } @@ -865,7 +875,6 @@ setup_ovs_bridge() { setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip - cleanup for arg do eval setup_${arg} || { echo " ${arg} not supported"; return 1; } done @@ -876,7 +885,7 @@ trace() { for arg do [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue - ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & + ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & tcpdump_pids="${tcpdump_pids} $!" ns_cmd= done @@ -1836,6 +1845,10 @@ run_test() { unset IFS + # Since cleanup() relies on variables modified by this subshell, it + # has to run in this context. + trap cleanup EXIT + if [ "$VERBOSE" = "1" ]; then printf "\n##########################################################################\n\n" fi diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings index 694d70710ff0..dfc27cdc6c05 100644 --- a/tools/testing/selftests/net/settings +++ b/tools/testing/selftests/net/settings @@ -1 +1 @@ -timeout=300 +timeout=1500 diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 8448f74adfec..4cb887b57413 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only nf-queue +connect_close diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index ffca314897c4..7e81c9a7fff9 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -6,9 +6,9 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh + conntrack_vrf.sh nft_synproxy.sh LDLIBS = -lmnl -TEST_GEN_FILES = nf-queue +TEST_GEN_FILES = nf-queue connect_close include ../lib.mk diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/netfilter/connect_close.c new file mode 100644 index 000000000000..1c3b0add54c4 --- /dev/null +++ b/tools/testing/selftests/netfilter/connect_close.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> + +#include <arpa/inet.h> +#include <sys/socket.h> + +#define PORT 12345 +#define RUNTIME 10 + +static struct { + unsigned int timeout; + unsigned int port; +} opts = { + .timeout = RUNTIME, + .port = PORT, +}; + +static void handler(int sig) +{ + _exit(sig == SIGALRM ? 0 : 1); +} + +static void set_timeout(void) +{ + struct sigaction action = { + .sa_handler = handler, + }; + + sigaction(SIGALRM, &action, NULL); + + alarm(opts.timeout); +} + +static void do_connect(const struct sockaddr_in *dst) +{ + int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s >= 0) + fcntl(s, F_SETFL, O_NONBLOCK); + + connect(s, (struct sockaddr *)dst, sizeof(*dst)); + close(s); +} + +static void do_accept(const struct sockaddr_in *src) +{ + int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s < 0) + return; + + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + + bind(s, (struct sockaddr *)src, sizeof(*src)); + + listen(s, 16); + + c = accept(s, NULL, NULL); + if (c >= 0) + close(c); + + close(s); +} + +static int accept_loop(void) +{ + struct sockaddr_in src = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &src.sin_addr); + + set_timeout(); + + for (;;) + do_accept(&src); + + return 1; +} + +static int connect_loop(void) +{ + struct sockaddr_in dst = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr); + + set_timeout(); + + for (;;) + do_connect(&dst); + + return 1; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "t:p:")) != -1) { + switch (c) { + case 't': + opts.timeout = atoi(optarg); + break; + case 'p': + opts.port = atoi(optarg); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + pid_t p; + + parse_opts(argc, argv); + + p = fork(); + if (p < 0) + return 111; + + if (p > 0) + return accept_loop(); + + return connect_loop(); +} diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index ed61f6cab60f..b35010cc7f6a 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add" +BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -354,6 +354,23 @@ TYPE_flush_remove_add=" display Add two elements, flush, re-add " +TYPE_reload=" +display net,mac with reload +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 1 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1473,6 +1490,59 @@ test_bug_flush_remove_add() { nft flush ruleset } +# - add ranged element, check that packets match it +# - reload the set, check packets still match +test_bug_reload() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + rstart=${start} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + # check kernel does allocate pcpu sctrach map + # for reload with no elemet add/delete + ( echo flush set inet filter test ; + nft list set inet filter test ) | nft -f - + + start=${rstart} + range_size=1 + + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset +} + test_reported_issues() { eval test_bug_"${subtest}" } @@ -1531,4 +1601,4 @@ for name in ${TESTS}; do done done -[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} +[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0 diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh index 6caf6ac8c285..695a1958723f 100755 --- a/tools/testing/selftests/netfilter/nft_fib.sh +++ b/tools/testing/selftests/netfilter/nft_fib.sh @@ -174,6 +174,7 @@ test_ping() { ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null sleep 3 diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 349a319a9e51..eb8543b9a5c4 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -880,9 +880,8 @@ EOF return $ksft_skip fi - # test default behaviour. Packet from ns1 to ns0 is not redirected - # due to automatic port translation. - test_port_shadow "default" "ROUTER" + # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. + test_port_shadow "default" "CLIENT" # test packet filter based mitigation: prevent forwarding of # packets claiming to come from the service port. @@ -899,6 +898,144 @@ EOF ip netns exec "$ns0" nft delete table $family nat } +test_stateless_nat_ip() +{ + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules" + return 1 + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table ip stateless { + map xlate_in { + typeof meta iifname . ip saddr . ip daddr : ip daddr + elements = { + "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2, + } + } + map xlate_out { + typeof meta iifname . ip saddr . ip daddr : ip daddr + elements = { + "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99 + } + } + + chain prerouting { + type filter hook prerouting priority -400; policy accept; + ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in + ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add ip statless rules" + return $ksft_skip + fi + + reset_counters + + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules" + lret=1 + fi + + # ns1 should have seen packets from .2.2, due to stateless rewrite. + expect="packets 1 bytes 84" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 1" + lret=1 + fi + + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3" + lret=1 + fi + + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4" + lret=1 + fi + done + + reset_counters + + socat -h > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run stateless nat frag test without socat tool" + if [ $lret -eq 0 ]; then + return $ksft_skip + fi + + ip netns exec "$ns0" nft delete table ip stateless + return $lret + fi + + local tmpfile=$(mktemp) + dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null + + local outfile=$(mktemp) + ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null & + sc_r=$! + + sleep 1 + # re-do with large ping -> ip fragmentation + ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null + if [ $? -ne 0 ] ; then + echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2 + lret=1 + fi + + wait + + cmp "$tmpfile" "$outfile" + if [ $? -ne 0 ]; then + ls -l "$tmpfile" "$outfile" + echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 + lret=1 + fi + + rm -f "$tmpfile" "$outfile" + + # ns1 should have seen packets from 2.2, due to stateless rewrite. + expect="packets 3 bytes 4164" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 5" + lret=1 + fi + + ip netns exec "$ns0" nft delete table ip stateless + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete table ip stateless" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP statless for $ns2" + + return $lret +} + # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in 0 1 2; do ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF @@ -965,6 +1102,19 @@ table inet filter { EOF done +# special case for stateless nat check, counter needs to +# be done before (input) ip defragmentation +ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF +table inet filter { + counter ns0insl {} + + chain pre { + type filter hook prerouting priority -400; policy accept; + ip saddr 10.0.2.2 counter name "ns0insl" + } +} +EOF + sleep 3 # test basic connectivity for i in 1 2; do @@ -1019,6 +1169,7 @@ $test_inet_nat && test_redirect inet $test_inet_nat && test_redirect6 inet test_port_shadowing +test_stateless_nat_ip if [ $ret -ne 0 ];then echo -n "FAIL: " diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh index 7d27f1f3bc01..e12729753351 100755 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -113,6 +113,7 @@ table inet $name { chain output { type filter hook output priority $prio; policy accept; tcp dport 12345 queue num 3 + tcp sport 23456 queue num 3 jump nfq } chain post { @@ -296,6 +297,23 @@ test_tcp_localhost() wait 2>/dev/null } +test_tcp_localhost_connectclose() +{ + tmpfile=$(mktemp) || exit 1 + + ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout & + + ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout & + local nfqpid=$! + + sleep 1 + rm -f "$tmpfile" + + wait $rpid + [ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close" + wait 2>/dev/null +} + test_tcp_localhost_requeue() { ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF @@ -424,6 +442,7 @@ test_queue 20 test_tcp_forward test_tcp_localhost +test_tcp_localhost_connectclose test_tcp_localhost_requeue test_icmp_vrf diff --git a/tools/testing/selftests/netfilter/nft_synproxy.sh b/tools/testing/selftests/netfilter/nft_synproxy.sh new file mode 100755 index 000000000000..b62933b680d6 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_synproxy.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +rnd=$(mktemp -u XXXXXXXX) +nsr="nsr-$rnd" # synproxy machine +ns1="ns1-$rnd" # iperf client +ns2="ns2-$rnd" # iperf server + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "nft --version" "run test without nft tool" +checktool "ip -Version" "run test without ip tool" +checktool "iperf3 --version" "run test without iperf3" +checktool "ip netns add $nsr" "create net namespace" + +modprobe -q nf_conntrack + +ip netns add $ns1 +ip netns add $ns2 + +cleanup() { + ip netns pids $ns1 | xargs kill 2>/dev/null + ip netns pids $ns2 | xargs kill 2>/dev/null + ip netns del $ns1 + ip netns del $ns2 + + ip netns del $nsr +} + +trap cleanup EXIT + +ip link add veth0 netns $nsr type veth peer name eth0 netns $ns1 +ip link add veth1 netns $nsr type veth peer name eth0 netns $ns2 + +for dev in lo veth0 veth1; do +ip -net $nsr link set $dev up +done + +ip -net $nsr addr add 10.0.1.1/24 dev veth0 +ip -net $nsr addr add 10.0.2.1/24 dev veth1 + +ip netns exec $nsr sysctl -q net.ipv4.conf.veth0.forwarding=1 +ip netns exec $nsr sysctl -q net.ipv4.conf.veth1.forwarding=1 +ip netns exec $nsr sysctl -q net.netfilter.nf_conntrack_tcp_loose=0 + +for n in $ns1 $ns2; do + ip -net $n link set lo up + ip -net $n link set eth0 up +done +ip -net $ns1 addr add 10.0.1.99/24 dev eth0 +ip -net $ns2 addr add 10.0.2.99/24 dev eth0 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns2 route add default via 10.0.2.1 + +# test basic connectivity +if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach $ns2" 1>&2 + exit 1 +fi + +if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 + exit 1 +fi + +ip netns exec $ns2 iperf3 -s > /dev/null 2>&1 & +# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 & + +sleep 1 + +ip netns exec $nsr nft -f - <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 tcp flags syn counter notrack + } + + chain forward { + type filter hook forward priority 0; policy accept; + + ct state new,established counter accept + + meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp + + ct state invalid counter drop + + # make ns2 unreachable w.o. tcp synproxy + tcp flags syn counter drop + } +} +EOF +if [ $? -ne 0 ]; then + echo "SKIP: Cannot add nft synproxy" + exit $ksft_skip +fi + +ip netns exec $ns1 timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null + +if [ $? -ne 0 ]; then + echo "FAIL: iperf3 returned an error" 1>&2 + ret=$? + ip netns exec $nsr nft list ruleset +else + echo "PASS: synproxy connection successful" +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh index 04633119b29a..5a8db0b48928 100755 --- a/tools/testing/selftests/netfilter/nft_zones_many.sh +++ b/tools/testing/selftests/netfilter/nft_zones_many.sh @@ -9,7 +9,7 @@ ns="ns-$sfx" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -zones=20000 +zones=2000 have_ct_tool=0 ret=0 @@ -75,10 +75,10 @@ EOF while [ $i -lt $max_zones ]; do local start=$(date +%s%3N) - i=$((i + 10000)) + i=$((i + 1000)) j=$((j + 1)) # nft rule in output places each packet in a different zone. - dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 + dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 if [ $? -ne 0 ] ;then ret=1 break @@ -86,7 +86,7 @@ EOF stop=$(date +%s%3N) local duration=$((stop-start)) - echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)" + echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)" done if [ $have_ct_tool -eq 1 ]; then @@ -128,11 +128,11 @@ test_conntrack_tool() { break fi - if [ $((i%10000)) -eq 0 ];then + if [ $((i%1000)) -eq 0 ];then stop=$(date +%s%3N) local duration=$((stop-start)) - echo "PASS: added 10000 entries in $duration ms (now $i total)" + echo "PASS: added 1000 entries in $duration ms (now $i total)" start=$stop fi done diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 4b93b1417b86..843ba56d8e49 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -5,4 +5,4 @@ TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test include ../lib.mk -$(TEST_GEN_PROGS): helpers.c +$(TEST_GEN_PROGS): helpers.c helpers.h diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h index a6ea27344db2..7056340b9339 100644 --- a/tools/testing/selftests/openat2/helpers.h +++ b/tools/testing/selftests/openat2/helpers.h @@ -9,6 +9,7 @@ #define _GNU_SOURCE #include <stdint.h> +#include <stdbool.h> #include <errno.h> #include <linux/types.h> #include "../kselftest.h" @@ -62,11 +63,12 @@ bool needs_openat2(const struct open_how *how); (similar to chroot(2)). */ #endif /* RESOLVE_IN_ROOT */ -#define E_func(func, ...) \ - do { \ - if (func(__VA_ARGS__) < 0) \ - ksft_exit_fail_msg("%s:%d %s failed\n", \ - __FILE__, __LINE__, #func);\ +#define E_func(func, ...) \ + do { \ + errno = 0; \ + if (func(__VA_ARGS__) < 0) \ + ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \ + __FILE__, __LINE__, #func, errno); \ } while (0) #define E_asprintf(...) E_func(asprintf, __VA_ARGS__) diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c index 1bddbe934204..7fb902099de4 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -259,6 +259,16 @@ void test_openat2_flags(void) unlink(path); fd = sys_openat2(AT_FDCWD, path, &test->how); + if (fd < 0 && fd == -EOPNOTSUPP) { + /* + * Skip the testcase if it failed because not supported + * by FS. (e.g. a valid O_TMPFILE combination on NFS) + */ + ksft_test_result_skip("openat2 with %s fails with %d (%s)\n", + test->name, fd, strerror(-fd)); + goto next; + } + if (test->err >= 0) failed = (fd < 0); else @@ -303,7 +313,7 @@ skip: else resultfn("openat2 with %s fails with %d (%s)\n", test->name, test->err, strerror(-test->err)); - +next: free(fdpath); fflush(stdout); } diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c index 8e83cf91513a..6d849dc2bee0 100644 --- a/tools/testing/selftests/perf_events/sigtrap_threads.c +++ b/tools/testing/selftests/perf_events/sigtrap_threads.c @@ -44,9 +44,10 @@ static struct { } ctx; /* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */ -#define TEST_SIG_DATA(addr) (~(unsigned long)(addr)) +#define TEST_SIG_DATA(addr, id) (~(unsigned long)(addr) + id) -static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) +static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr, + unsigned long id) { struct perf_event_attr attr = { .type = PERF_TYPE_BREAKPOINT, @@ -60,7 +61,7 @@ static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */ .remove_on_exec = 1, /* Required by sigtrap. */ .sigtrap = 1, /* Request synchronous SIGTRAP on event. */ - .sig_data = TEST_SIG_DATA(addr), + .sig_data = TEST_SIG_DATA(addr, id), }; return attr; } @@ -110,7 +111,7 @@ FIXTURE(sigtrap_threads) FIXTURE_SETUP(sigtrap_threads) { - struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on); + struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on, 0); struct sigaction action = {}; int i; @@ -165,7 +166,7 @@ TEST_F(sigtrap_threads, enable_event) EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -175,7 +176,7 @@ TEST_F(sigtrap_threads, enable_event) /* Test that modification propagates to all inherited events. */ TEST_F(sigtrap_threads, modify_and_enable_event) { - struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on); + struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on, 42); EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr), 0); run_test_threads(_metadata, self); @@ -184,7 +185,7 @@ TEST_F(sigtrap_threads, modify_and_enable_event) EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 42)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -204,7 +205,7 @@ TEST_F(sigtrap_threads, signal_stress) EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0)); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 01f8d3c0cf2c..6922d6417e1c 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -68,7 +68,7 @@ #define PIDFD_SKIP 3 #define PIDFD_XFAIL 4 -int wait_for_pid(pid_t pid) +static inline int wait_for_pid(pid_t pid) { int status, ret; @@ -78,13 +78,20 @@ again: if (errno == EINTR) goto again; + ksft_print_msg("waitpid returned -1, errno=%d\n", errno); return -1; } - if (!WIFEXITED(status)) + if (!WIFEXITED(status)) { + ksft_print_msg( + "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n", + WIFSIGNALED(status), WTERMSIG(status)); return -1; + } - return WEXITSTATUS(status); + ret = WEXITSTATUS(status); + ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret); + return ret; } static inline int sys_pidfd_open(pid_t pid, unsigned int flags) diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 22558524f71c..3fd8e903118f 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -12,6 +12,7 @@ #include <string.h> #include <syscall.h> #include <sys/wait.h> +#include <sys/mman.h> #include "pidfd.h" #include "../kselftest.h" @@ -80,7 +81,10 @@ static inline int error_check(struct error *err, const char *test_name) return err->code; } +#define CHILD_STACK_SIZE 8192 + struct child { + char *stack; pid_t pid; int fd; }; @@ -89,17 +93,22 @@ static struct child clone_newns(int (*fn)(void *), void *args, struct error *err) { static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD; - size_t stack_size = 1024; - char *stack[1024] = { 0 }; struct child ret; if (!(flags & CLONE_NEWUSER) && geteuid() != 0) flags |= CLONE_NEWUSER; + ret.stack = mmap(NULL, CHILD_STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (ret.stack == MAP_FAILED) { + error_set(err, -1, "mmap of stack failed (errno %d)", errno); + return ret; + } + #ifdef __ia64__ - ret.pid = __clone2(fn, stack, stack_size, flags, args, &ret.fd); + ret.pid = __clone2(fn, ret.stack, CHILD_STACK_SIZE, flags, args, &ret.fd); #else - ret.pid = clone(fn, stack + stack_size, flags, args, &ret.fd); + ret.pid = clone(fn, ret.stack + CHILD_STACK_SIZE, flags, args, &ret.fd); #endif if (ret.pid < 0) { @@ -129,6 +138,11 @@ static inline int child_join(struct child *child, struct error *err) else if (r > 0) error_set(err, r, "child %d reported: %d", child->pid, r); + if (munmap(child->stack, CHILD_STACK_SIZE)) { + error_set(err, -1, "munmap of child stack failed (errno %d)", errno); + r = -1; + } + return r; } diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 529eb700ac26..9a2d64901d59 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -441,7 +441,6 @@ static void test_pidfd_poll_exec(int use_waitpid) { int pid, pidfd = 0; int status, ret; - pthread_t t1; time_t prog_start = time(NULL); const char *test_name = "pidfd_poll check for premature notification on child thread exec"; @@ -500,13 +499,14 @@ static int child_poll_leader_exit_test(void *args) */ *child_exit_secs = time(NULL); syscall(SYS_exit, 0); + /* Never reached, but appeases compiler thinking we should return. */ + exit(0); } static void test_pidfd_poll_leader_exit(int use_waitpid) { int pid, pidfd = 0; - int status, ret; - time_t prog_start = time(NULL); + int status, ret = 0; const char *test_name = "pidfd_poll check for premature notification on non-empty" "group leader exit"; diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index be2943f072f6..17999e082aa7 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -39,7 +39,7 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, TEST(wait_simple) { - int pidfd = -1, status = 0; + int pidfd = -1; pid_t parent_tid = -1; struct clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), @@ -47,7 +47,6 @@ TEST(wait_simple) .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, }; - int ret; pid_t pid; siginfo_t info = { .si_signo = 0, @@ -88,7 +87,7 @@ TEST(wait_simple) TEST(wait_states) { - int pidfd = -1, status = 0; + int pidfd = -1; pid_t parent_tid = -1; struct clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index b0b20e0b4e30..f43aa4b77fba 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -44,7 +44,10 @@ mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush for m in $mitigations do - do_one "$m" & + if [[ -f /sys/kernel/debug/powerpc/$m ]] + then + do_one "$m" & + fi done echo "Spawned threads enabling/disabling mitigations ..." diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index adc2b7294e5f..83647b8277e7 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -193,7 +193,7 @@ int spectre_v2_test(void) * We are not vulnerable and reporting otherwise, so * missing such a mismatch is safe. */ - if (state == VULNERABLE) + if (miss_percent > 95) return 4; return 1; diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index ce3375cd8e73..9d0915777fed 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -4,3 +4,5 @@ signal_tm sigfuz sigreturn_vdso sig_sc_double_restart +sigreturn_kernel +sigreturn_unaligned diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index d6ae54663aed..f679d260afc8 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart +TEST_GEN_PROGS += sigreturn_kernel +TEST_GEN_PROGS += sigreturn_unaligned CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c new file mode 100644 index 000000000000..0a1b6e591eee --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that we can't sigreturn to kernel addresses, or to kernel mode. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <signal.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + +#define MSR_PR (1ul << 14) + +static volatile unsigned long long sigreturn_addr; +static volatile unsigned long long sigreturn_msr_mask; + +static void sigusr1_handler(int signo, siginfo_t *si, void *uc_ptr) +{ + ucontext_t *uc = (ucontext_t *)uc_ptr; + + if (sigreturn_addr) + UCONTEXT_NIA(uc) = sigreturn_addr; + + if (sigreturn_msr_mask) + UCONTEXT_MSR(uc) &= sigreturn_msr_mask; +} + +static pid_t fork_child(void) +{ + pid_t pid; + + pid = fork(); + if (pid == 0) { + raise(SIGUSR1); + exit(0); + } + + return pid; +} + +static int expect_segv(pid_t pid) +{ + int child_ret; + + waitpid(pid, &child_ret, 0); + FAIL_IF(WIFEXITED(child_ret)); + FAIL_IF(!WIFSIGNALED(child_ret)); + FAIL_IF(WTERMSIG(child_ret) != 11); + + return 0; +} + +int test_sigreturn_kernel(void) +{ + struct sigaction act; + int child_ret, i; + pid_t pid; + + act.sa_sigaction = sigusr1_handler; + act.sa_flags = SA_SIGINFO; + sigemptyset(&act.sa_mask); + + FAIL_IF(sigaction(SIGUSR1, &act, NULL)); + + for (i = 0; i < 2; i++) { + // Return to kernel + sigreturn_addr = 0xcull << 60; + pid = fork_child(); + expect_segv(pid); + + // Return to kernel virtual + sigreturn_addr = 0xc008ull << 48; + pid = fork_child(); + expect_segv(pid); + + // Return out of range + sigreturn_addr = 0xc010ull << 48; + pid = fork_child(); + expect_segv(pid); + + // Return to no-man's land, just below PAGE_OFFSET + sigreturn_addr = (0xcull << 60) - (64 * 1024); + pid = fork_child(); + expect_segv(pid); + + // Return to no-man's land, above TASK_SIZE_4PB + sigreturn_addr = 0x1ull << 52; + pid = fork_child(); + expect_segv(pid); + + // Return to 0xd space + sigreturn_addr = 0xdull << 60; + pid = fork_child(); + expect_segv(pid); + + // Return to 0xe space + sigreturn_addr = 0xeull << 60; + pid = fork_child(); + expect_segv(pid); + + // Return to 0xf space + sigreturn_addr = 0xfull << 60; + pid = fork_child(); + expect_segv(pid); + + // Attempt to set PR=0 for 2nd loop (should be blocked by kernel) + sigreturn_msr_mask = ~MSR_PR; + } + + printf("All children killed as expected\n"); + + // Don't change address, just MSR, should return to user as normal + sigreturn_addr = 0; + sigreturn_msr_mask = ~MSR_PR; + pid = fork_child(); + waitpid(pid, &child_ret, 0); + FAIL_IF(!WIFEXITED(child_ret)); + FAIL_IF(WIFSIGNALED(child_ret)); + FAIL_IF(WEXITSTATUS(child_ret) != 0); + + return 0; +} + +int main(void) +{ + return test_harness(test_sigreturn_kernel, "sigreturn_kernel"); +} diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c new file mode 100644 index 000000000000..6e58ee4f0fdf --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test sigreturn to an unaligned address, ie. low 2 bits set. + * Nothing bad should happen. + * This was able to trigger warnings with CONFIG_PPC_RFI_SRR_DEBUG=y. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <unistd.h> + +#include "utils.h" + + +static void sigusr1_handler(int signo, siginfo_t *info, void *ptr) +{ + ucontext_t *uc = ptr; + + UCONTEXT_NIA(uc) |= 3; +} + +static int test_sigreturn_unaligned(void) +{ + struct sigaction action; + + memset(&action, 0, sizeof(action)); + action.sa_sigaction = sigusr1_handler; + action.sa_flags = SA_SIGINFO; + + FAIL_IF(sigaction(SIGUSR1, &action, NULL) == -1); + + raise(SIGUSR1); + + return 0; +} + +int main(void) +{ + return test_harness(test_sigreturn_unaligned, "sigreturn_unaligned"); +} diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index e6a132df6172..69f8a5958cef 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -10,7 +10,7 @@ # # Authors: Paul E. McKenney <[email protected]> -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | +egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | grep -v 'Warning: unable to open an initial console' | diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh index 5a0023d183da..0941f1ddab65 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -47,8 +47,8 @@ else exit 1 fi -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh dryrun= diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh index 370406bbfeed..f17000a2ccf1 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh @@ -49,8 +49,8 @@ fi mkdir $resdir/$ds echo Results directory: $resdir/$ds -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh echo Using all `identify_qemu_vcpus` CPUs. diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh index e4a00779b8c6..ee886b40a5d2 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh @@ -22,8 +22,8 @@ T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$ trap 'rm -rf $T' 0 mkdir $T -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh default_starttime="`get_starttime`" starttime="${2-default_starttime}" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh index daf64b507038..5f682fc892dd 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -30,9 +30,15 @@ editor=${EDITOR-vi} files= for i in ${rundir}/*/Make.out do - if egrep -q "error:|warning:" < $i + scenariodir="`dirname $i`" + scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`" + if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i then - egrep "error:|warning:" < $i > $i.diags + egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags + files="$files $i.diags $i" + elif ! test -f ${scenariobasedir}/vmlinux + then + echo No ${scenariobasedir}/vmlinux file > $i.diags files="$files $i.diags $i" fi done diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index fbdf162b6acd..43e1387234d1 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -25,7 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^\[[ 0-9.]*] //' | awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' | tr -d '\012\015'`" -fwdprog="`grep 'rcu_torture_fwd_prog_cr Duration' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k15nr | head -1 | awk '{ print $14 " " $15 }'`" +fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }' | tr -d '\015'`" if test -z "$ngps" then echo "$configfile ------- " $stopstate diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index c7d42ef80c53..8c4c1e4792d0 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -19,8 +19,8 @@ then exit 1 fi -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh starttime="`get_starttime`" @@ -108,8 +108,8 @@ else cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log" exit 2 fi - cp -a "$rundir" "$KVM/res/" - oldrun="$KVM/res/$ds" + cp -a "$rundir" "$RCUTORTURE/res/" + oldrun="$RCUTORTURE/res/$ds" fi echo | tee -a "$oldrun/remote-log" echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log" @@ -144,7 +144,7 @@ do if test "$ret" -ne 0 then echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log" - exit 4 | tee -a "$oldrun/remote-log" + exit 4 fi done @@ -155,11 +155,23 @@ do echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log" cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" ret=$? - if test "$ret" -ne 0 - then - echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" - exit 10 | tee -a "$oldrun/remote-log" - fi + tries=0 + while test "$ret" -ne 0 + do + echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log" + sleep 60 + cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" + ret=$? + if test "$ret" -ne 0 + then + if test "$tries" > 5 + then + echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" + exit 10 + fi + fi + tries=$((tries+1)) + done done # Function to check for presence of a file on the specified system. @@ -177,16 +189,16 @@ checkremotefile () { ret=$? if test "$ret" -eq 255 then - echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` + echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" elif test "$ret" -eq 0 then return 0 elif test "$ret" -eq 1 then - echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" + echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log" return 1 else - echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` + echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" return $ret fi sleep $sleeptime @@ -245,7 +257,7 @@ do sleep 30 fi done -echo All batches started. `date` +echo All batches started. `date` | tee -a "$oldrun/remote-log" # Wait for all remaining scenarios to complete and collect results. for i in $systems @@ -254,7 +266,7 @@ do do sleep 30 done - echo " ---" Collecting results from $i `date` + echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 6cf9ec6a3d1c..55b2c1533282 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -25,15 +25,15 @@ LANG=en_US.UTF-8; export LANG dur=$((30*60)) dryrun="" -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" TORTURE_BUILDONLY= -TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD +TORTURE_INITRD="$RCUTORTURE/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" TORTURE_KCONFIG_GDB_ARG="" TORTURE_BOOT_GDB_ARG="" @@ -74,7 +74,9 @@ usage () { echo " --help" echo " --interactive" echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]" + echo " --kasan" echo " --kconfig Kconfig-options" + echo " --kcsan" echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" echo " --memory megabytes|nnnG" @@ -83,6 +85,7 @@ usage () { echo " --qemu-cmd qemu-system-..." echo " --remote" echo " --results absolute-pathname" + echo " --shutdown-grace seconds" echo " --torture lock|rcu|rcuscale|refscale|scf" echo " --trust-make" exit 1 @@ -175,14 +178,14 @@ do jitter="$2" shift ;; + --kasan) + TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG + ;; --kconfig|--kconfigs) checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$' TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; - --kasan) - TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG - ;; --kcsan) TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG ;; @@ -259,7 +262,7 @@ else exit 1 fi -CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG +CONFIGFRAG=${RCUTORTURE}/configs/${TORTURE_SUITE}; export CONFIGFRAG defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`" if test -z "$configs" @@ -269,7 +272,7 @@ fi if test -z "$resdir" then - resdir=$KVM/res + resdir=$RCUTORTURE/res fi # Create a file of test-name/#cpus pairs, sorted by decreasing #cpus. @@ -277,7 +280,7 @@ configs_derep= for CF in $configs do case $CF in - [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**) + [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**|[0-9][0-9][0-9][0-9]\**) config_reps=`echo $CF | sed -e 's/\*.*$//'` CF1=`echo $CF | sed -e 's/^[^*]*\*//'` ;; @@ -383,7 +386,7 @@ END { # Generate a script to execute the tests in appropriate batches. cat << ___EOF___ > $T/script CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG -KVM="$KVM"; export KVM +RCUTORTURE="$RCUTORTURE"; export RCUTORTURE PATH="$PATH"; export PATH TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE @@ -566,7 +569,7 @@ ___EOF___ awk < $T/cfgcpu.pack \ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \ -v CONFIGDIR="$CONFIGFRAG/" \ - -v KVM="$KVM" \ + -v RCUTORTURE="$RCUTORTURE" \ -v ncpus=$cpus \ -v jitter="$jitter" \ -v rd=$resdir/$ds/ \ diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh index 9313e5065ae9..2dbfca3589b1 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-build.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh @@ -39,7 +39,8 @@ fi grep warning: < $F > $T/warnings grep "include/linux/*rcu*\.h:" $T/warnings > $T/hwarnings grep "kernel/rcu/[^/]*:" $T/warnings > $T/cwarnings -cat $T/hwarnings $T/cwarnings > $T/rcuwarnings +grep "^ld: .*undefined reference to" $T/warnings | head -1 > $T/ldwarnings +cat $T/hwarnings $T/cwarnings $T/ldwarnings > $T/rcuwarnings if test -s $T/rcuwarnings then print_warning $title build errors: diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 9f624bd53c27..822eb037a057 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -138,6 +138,16 @@ then then summary="$summary Bugs: $n_bugs" fi + n_kcsan=`egrep -c 'BUG: KCSAN: ' $file` + if test "$n_kcsan" -ne 0 + then + if test "$n_bugs" = "$n_kcsan" + then + summary="$summary (all bugs kcsan)" + else + summary="$summary KCSAN: $n_kcsan" + fi + fi n_calltrace=`grep -c 'Call Trace:' $file` if test "$n_calltrace" -ne 0 then diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index eae88aacca2a..bfe09e2829c8 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -13,8 +13,8 @@ scriptname=$0 args="$*" -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" @@ -37,7 +37,7 @@ configs_scftorture= kcsan_kmake_args= # Default compression, duration, and apportionment. -compress_kasan_vmlinux="`identify_qemu_vcpus`" +compress_concurrency="`identify_qemu_vcpus`" duration_base=10 duration_rcutorture_frac=7 duration_locktorture_frac=1 @@ -67,12 +67,12 @@ function doyesno () { usage () { echo "Usage: $scriptname optional arguments:" - echo " --compress-kasan-vmlinux concurrency" + echo " --compress-concurrency concurrency" echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\"" echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\"" echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" - echo " --doall" - echo " --doallmodconfig / --do-no-allmodconfig" + echo " --do-all" + echo " --do-allmodconfig / --do-no-allmodconfig" echo " --do-clocksourcewd / --do-no-clocksourcewd" echo " --do-kasan / --do-no-kasan" echo " --do-kcsan / --do-no-kcsan" @@ -91,9 +91,9 @@ usage () { while test $# -gt 0 do case "$1" in - --compress-kasan-vmlinux) - checkarg --compress-kasan-vmlinux "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error' - compress_kasan_vmlinux=$2 + --compress-concurrency) + checkarg --compress-concurrency "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error' + compress_concurrency=$2 shift ;; --config-rcutorture|--configs-rcutorture) @@ -414,8 +414,14 @@ nfailures=0 echo FAILURES: | tee -a $T/log if test -s "$T/failures" then - cat "$T/failures" | tee -a $T/log + awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum" nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" + grep "^ Summary: " "$T/failuresum" | + grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan" + if test -s "$T/nonkcsan" + then + nonkcsanbug="yes" + fi ret=2 fi if test "$do_kcsan" = "yes" @@ -424,12 +430,16 @@ then fi echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log +if test -z "$nonkcsanbug" && test -s "$T/failuresum" +then + echo " All bugs were KCSAN failures." +fi tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" -if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0 +if test -n "$tdir" && test $compress_concurrency -gt 0 then # KASAN vmlinux files can approach 1GB in size, so compress them. - echo Looking for KASAN files to compress: `date` > "$tdir/log-xz" 2>&1 - find "$tdir" -type d -name '*-kasan' -print > $T/xz-todo + echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1 + find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo ncompresses=0 batchno=1 if test -s $T/xz-todo @@ -447,7 +457,7 @@ then do xz "$j" >> "$tdir/log-xz" 2>&1 & ncompresses=$((ncompresses+1)) - if test $ncompresses -ge $compress_kasan_vmlinux + if test $ncompresses -ge $compress_concurrency then echo Waiting for batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log wait diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 index 3ca112444ce7..7093422050f6 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=4 +CONFIG_NR_CPUS=3 CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot index 238bfe3bd0cc..ce0694fd9b92 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot @@ -1 +1,2 @@ rcutorture.torture_type=srcu +rcutorture.fwd_progress=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot index ce48c7b82673..2db39f298d18 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -1,2 +1,4 @@ rcutorture.torture_type=srcud rcupdate.rcu_self_test=1 +rcutorture.fwd_progress=3 +srcutree.big_cpu_lim=5 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T index d6557c38dfe4..c70cf0405f24 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T @@ -2,6 +2,7 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_SRCU=y CONFIG_RCU_TRACE=n CONFIG_DEBUG_LOCK_ALLOC=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U index 6bc24e99862f..bc9eeabaa1b1 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U @@ -2,6 +2,7 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_SRCU=y CONFIG_RCU_TRACE=n CONFIG_DEBUG_LOCK_ALLOC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot index 22cdeced98ea..30ca5b493c4b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot @@ -1,2 +1,3 @@ rcutorture.torture_type=tasks rcutree.use_softirq=0 +rcupdate.rcu_task_enqueue_lim=4 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01 index 6db705e55487..0953c52fcfd7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01 @@ -2,6 +2,7 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index d8674264318d..30439f6fc20e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -2,6 +2,7 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_RCU=y CONFIG_HZ_PERIODIC=y CONFIG_NO_HZ_IDLE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 index 34c8ff5a12f2..e4d74e5fc1d0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=4 +CONFIG_NR_CPUS=5 CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot index 9675ad632dcc..ba6d636a4856 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot @@ -1 +1,2 @@ rcutorture.torture_type=tasks-tracing +rcupdate.rcu_task_enqueue_lim=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot index 9675ad632dcc..c70b5db6c2ae 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot @@ -1 +1,2 @@ rcutorture.torture_type=tasks-tracing +rcutorture.fwd_progress=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index b5b53973c01e..8ae41d5f81a3 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -6,7 +6,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_MAXSMP=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index 65daee4fbf5a..2871ee599891 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot new file mode 100644 index 000000000000..dd914fa8f690 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot @@ -0,0 +1 @@ +rcutorture.fwd_progress=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index f6d6a40c0576..22ad0261728d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y CONFIG_RCU_FANOUT=4 CONFIG_RCU_FANOUT_LEAF=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 4f95f8544f3f..9f48c73709ec 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=6 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index bf4980d606b5..db27651de04b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index d7afb271a586..2789b47e4ecd 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 index c810c5276a89..8b561355b9ef 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index 7311f84a5876..4a00539bfdd7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot new file mode 100644 index 000000000000..dd914fa8f690 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot @@ -0,0 +1 @@ +rcutorture.fwd_progress=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY index fb05ef5279b4..0fa2dc086e10 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY @@ -2,11 +2,11 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 index e6baa2fbaeb3..227aba7783af 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 @@ -5,7 +5,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE index 4cc1cc581321..f110d9ffbe4c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE @@ -6,7 +6,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 index f5952061fde7..9f83e5372796 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT index ad505a887bec..7f06838a91e6 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT @@ -6,7 +6,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT index 4f08e641bb6b..52e3ef674056 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT @@ -6,7 +6,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index 1b96d68473b8..42acb1a64ce1 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -15,7 +15,6 @@ CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. -CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs. CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with rcu_nocbs=0, and one with all rcu_nocbs CPUs. CONFIG_RCU_TRACE -- Do half. diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 2af9d39a9716..215e1067f037 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -6,7 +6,7 @@ endif CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \ $(CLANG_FLAGS) -LDLIBS += -lpthread +LDLIBS += -lpthread -ldl # Own dependencies because we only want to build against 1st prerequisite, but # still track changes to header files and depend on shared object. diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index b953a52ff706..517756afc2a4 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -167,7 +167,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, for (;;) { struct percpu_list_node *head; intptr_t *targetptr, expectnot, *load; - off_t offset; + long offset; int ret, cpu; cpu = rseq_cpu_start(); diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h new file mode 100644 index 000000000000..876eb6a7f75b --- /dev/null +++ b/tools/testing/selftests/rseq/compiler.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq/compiler.h + * + * Work-around asm goto compiler bugs. + * + * (C) Copyright 2021 - Mathieu Desnoyers <[email protected]> + */ + +#ifndef RSEQ_COMPILER_H +#define RSEQ_COMPILER_H + +/* + * gcc prior to 4.8.2 miscompiles asm goto. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * gcc prior to 8.1.0 miscompiles asm goto at O1. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908 + * + * clang prior to version 13.0.1 miscompiles asm goto at O2. + * https://github.com/llvm/llvm-project/issues/52735 + * + * Work around these issues by adding a volatile inline asm with + * memory clobber in the fallthrough after the asm goto and at each + * label target. Emit this for all compilers in case other similar + * issues are found in the future. + */ +#define rseq_after_asm_goto() asm volatile ("" : : : "memory") + +#endif /* RSEQ_COMPILER_H_ */ diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 699ad5f93c34..da23c22d5882 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -161,7 +161,7 @@ unsigned int yield_mod_cnt, nr_abort; " cbnz " INJECT_ASM_REG ", 222b\n" \ "333:\n" -#elif __PPC__ +#elif defined(__PPC__) #define RSEQ_INJECT_INPUT \ , [loop_cnt_1]"m"(loop_cnt[1]) \ @@ -368,9 +368,7 @@ void *test_percpu_spinlock_thread(void *arg) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { - int cpu = rseq_cpu_start(); - - cpu = rseq_this_cpu_lock(&data->lock); + int cpu = rseq_this_cpu_lock(&data->lock); data->c[cpu].count++; rseq_percpu_unlock(&data->lock, cpu); #ifndef BENCHMARK @@ -551,7 +549,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, for (;;) { struct percpu_list_node *head; intptr_t *targetptr, expectnot, *load; - off_t offset; + long offset; int ret; cpu = rseq_cpu_start(); diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h new file mode 100644 index 000000000000..a8c44d9af71f --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-abi.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _RSEQ_ABI_H +#define _RSEQ_ABI_H + +/* + * rseq-abi.h + * + * Restartable sequences system call API + * + * Copyright (c) 2015-2022 Mathieu Desnoyers <[email protected]> + */ + +#include <linux/types.h> +#include <asm/byteorder.h> + +enum rseq_abi_cpu_id_state { + RSEQ_ABI_CPU_ID_UNINITIALIZED = -1, + RSEQ_ABI_CPU_ID_REGISTRATION_FAILED = -2, +}; + +enum rseq_abi_flags { + RSEQ_ABI_FLAG_UNREGISTER = (1 << 0), +}; + +enum rseq_abi_cs_flags_bit { + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, +}; + +enum rseq_abi_cs_flags { + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), + RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE = + (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), +}; + +/* + * struct rseq_abi_cs is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. It is usually declared as + * link-time constant data. + */ +struct rseq_abi_cs { + /* Version of this structure. */ + __u32 version; + /* enum rseq_abi_cs_flags */ + __u32 flags; + __u64 start_ip; + /* Offset from start_ip. */ + __u64 post_commit_offset; + __u64 abort_ip; +} __attribute__((aligned(4 * sizeof(__u64)))); + +/* + * struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. + * + * A single struct rseq_abi per thread is allowed. + */ +struct rseq_abi { + /* + * Restartable sequences cpu_id_start field. Updated by the + * kernel. Read by user-space with single-copy atomicity + * semantics. This field should only be read by the thread which + * registered this data structure. Aligned on 32-bit. Always + * contains a value in the range of possible CPUs, although the + * value may not be the actual current CPU (e.g. if rseq is not + * initialized). This CPU number value should always be compared + * against the value of the cpu_id field before performing a rseq + * commit or returning a value read from a data structure indexed + * using the cpu_id_start value. + */ + __u32 cpu_id_start; + /* + * Restartable sequences cpu_id field. Updated by the kernel. + * Read by user-space with single-copy atomicity semantics. This + * field should only be read by the thread which registered this + * data structure. Aligned on 32-bit. Values + * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED + * have a special semantic: the former means "rseq uninitialized", + * and latter means "rseq initialization failed". This value is + * meant to be read within rseq critical sections and compared + * with the cpu_id_start value previously read, before performing + * the commit instruction, or read and compared with the + * cpu_id_start value before returning a value loaded from a data + * structure indexed using the cpu_id_start value. + */ + __u32 cpu_id; + /* + * Restartable sequences rseq_cs field. + * + * Contains NULL when no critical section is active for the current + * thread, or holds a pointer to the currently active struct rseq_cs. + * + * Updated by user-space, which sets the address of the currently + * active rseq_cs at the beginning of assembly instruction sequence + * block, and set to NULL by the kernel when it restarts an assembly + * instruction sequence block, as well as when the kernel detects that + * it is preempting or delivering a signal outside of the range + * targeted by the rseq_cs. Also needs to be set to NULL by user-space + * before reclaiming memory that contains the targeted struct rseq_cs. + * + * Read and set by the kernel. Set by user-space with single-copy + * atomicity semantics. This field should only be updated by the + * thread which registered this data structure. Aligned on 64-bit. + */ + union { + __u64 ptr64; + + /* + * The "arch" field provides architecture accessor for + * the ptr field based on architecture pointer size and + * endianness. + */ + struct { +#ifdef __LP64__ + __u64 ptr; +#elif defined(__BYTE_ORDER) ? (__BYTE_ORDER == __BIG_ENDIAN) : defined(__BIG_ENDIAN) + __u32 padding; /* Initialized to zero. */ + __u32 ptr; +#else + __u32 ptr; + __u32 padding; /* Initialized to zero. */ +#endif + } arch; + } rseq_cs; + + /* + * Restartable sequences flags field. + * + * This field should only be updated by the thread which + * registered this data structure. Read by the kernel. + * Mainly used for single-stepping through rseq critical sections + * with debuggers. + * + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT + * Inhibit instruction sequence block restart on preemption + * for this thread. + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL + * Inhibit instruction sequence block restart on signal + * delivery for this thread. + * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE + * Inhibit instruction sequence block restart on migration for + * this thread. + */ + __u32 flags; +} __attribute__((aligned(4 * sizeof(__u64)))); + +#endif /* _RSEQ_ABI_H */ diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 5943c816c07c..893a11eca9d5 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -147,14 +147,11 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") - static inline __attribute__((always_inline)) int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -185,8 +182,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -198,30 +195,31 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -255,8 +253,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -270,19 +268,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -292,7 +292,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ #ifdef RSEQ_COMPARE_TWICE @@ -316,8 +315,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -328,14 +327,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -347,7 +347,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -381,8 +380,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -398,19 +397,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -422,7 +423,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -457,8 +457,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -474,19 +474,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -498,7 +500,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -537,8 +538,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -554,21 +555,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -582,7 +586,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -657,8 +660,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -678,21 +681,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -706,7 +709,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -782,8 +784,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -803,21 +805,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index 200dae9e4208..cbe190a4d005 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -230,8 +230,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -242,24 +242,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -287,8 +291,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expectnot] "r" (expectnot), [load] "Qo" (*load), @@ -300,16 +304,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -337,8 +346,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [count] "r" (count) RSEQ_INJECT_INPUT @@ -348,12 +357,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -388,8 +400,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -402,17 +414,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -447,8 +463,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -461,17 +477,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -508,8 +528,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "Qo" (*v), [expect] "r" (expect), [v2] "Qo" (*v2), @@ -522,19 +542,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -569,8 +594,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -584,17 +609,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -629,8 +658,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "Qo" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "Qo" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "Qo" (*v), [newv] "r" (newv), @@ -644,17 +673,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h new file mode 100644 index 000000000000..38c584661571 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-generic-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers <[email protected]> + */ + +#ifndef _RSEQ_GENERIC_THREAD_POINTER +#define _RSEQ_GENERIC_THREAD_POINTER + +#ifdef __cplusplus +extern "C" { +#endif + +/* Use gcc builtin thread pointer. */ +static inline void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h index e989e7c14b09..878739fae2fd 100644 --- a/tools/testing/selftests/rseq/rseq-mips.h +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -154,14 +154,11 @@ do { \ teardown \ "b %l[" __rseq_str(cmpfail_label) "]\n\t" -#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") - static inline __attribute__((always_inline)) int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -190,8 +187,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -203,14 +200,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -222,11 +216,10 @@ error2: static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -258,8 +251,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -273,14 +266,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -295,7 +285,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ #ifdef RSEQ_COMPARE_TWICE @@ -319,8 +308,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [count] "Ir" (count) RSEQ_INJECT_INPUT @@ -331,10 +320,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE @@ -350,7 +337,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -382,8 +368,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -399,14 +385,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -423,7 +406,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -456,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -473,14 +455,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -497,7 +476,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, { RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -532,8 +510,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "5:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -549,14 +527,11 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: @@ -577,7 +552,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -649,8 +623,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -670,21 +644,16 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } @@ -698,7 +667,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) - rseq_workaround_gcc_asm_size_guess(); __asm__ __volatile__ goto ( RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail]) @@ -771,8 +739,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "8:\n\t" : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -792,21 +760,16 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); - rseq_workaround_gcc_asm_size_guess(); return 0; abort: - rseq_workaround_gcc_asm_size_guess(); RSEQ_INJECT_FAILED return -1; cmpfail: - rseq_workaround_gcc_asm_size_guess(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("cpu_id comparison failed"); error2: - rseq_workaround_gcc_asm_size_guess(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h new file mode 100644 index 000000000000..263eee84fb76 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-ppc-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers <[email protected]> + */ + +#ifndef _RSEQ_PPC_THREAD_POINTER +#define _RSEQ_PPC_THREAD_POINTER + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void *rseq_thread_pointer(void) +{ +#ifdef __powerpc64__ + register void *__result asm ("r13"); +#else + register void *__result asm ("r2"); +#endif + asm ("" : "=r" (__result)); + return __result; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index 76be90196fe4..bab8e0b9fb11 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -47,10 +47,13 @@ do { \ #ifdef __PPC64__ -#define STORE_WORD "std " -#define LOAD_WORD "ld " -#define LOADX_WORD "ldx " -#define CMP_WORD "cmpd " +#define RSEQ_STORE_LONG(arg) "std%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_STORE_INT(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_LOAD_LONG(arg) "ld%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOAD_INT(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOADX_LONG "ldx " /* From base register ("b" constraint) */ +#define RSEQ_CMP_LONG "cmpd " +#define RSEQ_CMP_LONG_INT "cmpdi " #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ @@ -89,10 +92,13 @@ do { \ #else /* #ifdef __PPC64__ */ -#define STORE_WORD "stw " -#define LOAD_WORD "lwz " -#define LOADX_WORD "lwzx " -#define CMP_WORD "cmpw " +#define RSEQ_STORE_LONG(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */ +#define RSEQ_STORE_INT(arg) RSEQ_STORE_LONG(arg) /* To memory ("m" constraint) */ +#define RSEQ_LOAD_LONG(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */ +#define RSEQ_LOAD_INT(arg) RSEQ_LOAD_LONG(arg) /* From memory ("m" constraint) */ +#define RSEQ_LOADX_LONG "lwzx " /* From base register ("b" constraint) */ +#define RSEQ_CMP_LONG "cmpw " +#define RSEQ_CMP_LONG_INT "cmpwi " #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ @@ -125,7 +131,7 @@ do { \ RSEQ_INJECT_ASM(1) \ "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \ "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \ - "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ + RSEQ_STORE_INT(rseq_cs) "%%r17, %[" __rseq_str(rseq_cs) "]\n\t" \ __rseq_str(label) ":\n\t" #endif /* #ifdef __PPC64__ */ @@ -136,7 +142,7 @@ do { \ #define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ RSEQ_INJECT_ASM(2) \ - "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ + RSEQ_LOAD_INT(current_cpu_id) "%%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \ "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \ "bne- cr7, " __rseq_str(label) "\n\t" @@ -153,25 +159,25 @@ do { \ * RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7) */ #define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ - CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \ "bne- cr7, " __rseq_str(label) "\n\t" #define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ - CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \ + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \ "beq- cr7, " __rseq_str(label) "\n\t" #define RSEQ_ASM_OP_STORE(value, var) \ - STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" + RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" /* Load @var to r17 */ #define RSEQ_ASM_OP_R_LOAD(var) \ - LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" /* Store r17 to @var */ #define RSEQ_ASM_OP_R_STORE(var) \ - STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" + RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" /* Add @count to r17 */ #define RSEQ_ASM_OP_R_ADD(count) \ @@ -179,11 +185,11 @@ do { \ /* Load (r17 + voffp) to r17 */ #define RSEQ_ASM_OP_R_LOADX(voffp) \ - LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" + RSEQ_LOADX_LONG "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t" /* TODO: implement a faster memcpy. */ #define RSEQ_ASM_OP_R_MEMCPY() \ - "cmpdi %%r19, 0\n\t" \ + RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \ "beq 333f\n\t" \ "addi %%r20, %%r20, -1\n\t" \ "addi %%r21, %%r21, -1\n\t" \ @@ -191,16 +197,16 @@ do { \ "lbzu %%r18, 1(%%r20)\n\t" \ "stbu %%r18, 1(%%r21)\n\t" \ "addi %%r19, %%r19, -1\n\t" \ - "cmpdi %%r19, 0\n\t" \ + RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \ "bne 222b\n\t" \ "333:\n\t" \ #define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ - STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \ + RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \ __rseq_str(post_commit_label) ":\n\t" #define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ - STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ + RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \ __rseq_str(post_commit_label) ":\n\t" static inline __attribute__((always_inline)) @@ -235,8 +241,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -248,23 +254,28 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -301,8 +312,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -316,16 +327,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -359,8 +375,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -372,12 +388,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -419,8 +438,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -436,16 +455,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -489,8 +513,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -506,16 +530,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -560,8 +589,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -577,18 +606,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -635,8 +670,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -653,16 +688,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -711,8 +751,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -729,23 +769,23 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } -#undef STORE_WORD -#undef LOAD_WORD -#undef LOADX_WORD -#undef CMP_WORD - #endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index 8ef94ad1cbb4..4e6dc5f0cb42 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -165,8 +165,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -178,16 +178,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -198,7 +203,7 @@ error2: */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -233,8 +238,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -248,16 +253,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -288,8 +298,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [count] "r" (count) @@ -301,12 +311,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -347,8 +360,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -364,16 +377,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -426,8 +444,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -443,18 +461,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -534,8 +558,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -555,16 +579,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h index 72750b5905a9..7b53dac1fcdd 100644 --- a/tools/testing/selftests/rseq/rseq-skip.h +++ b/tools/testing/selftests/rseq/rseq-skip.h @@ -13,7 +13,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { return -1; } diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h new file mode 100644 index 000000000000..977c25d758b2 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers <[email protected]> + */ + +#ifndef _RSEQ_THREAD_POINTER +#define _RSEQ_THREAD_POINTER + +#if defined(__x86_64__) || defined(__i386__) +#include "rseq-x86-thread-pointer.h" +#elif defined(__PPC__) +#include "rseq-ppc-thread-pointer.h" +#else +#include "rseq-generic-thread-pointer.h" +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h new file mode 100644 index 000000000000..d3133587d996 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */ +/* + * rseq-x86-thread-pointer.h + * + * (C) Copyright 2021 - Mathieu Desnoyers <[email protected]> + */ + +#ifndef _RSEQ_X86_THREAD_POINTER +#define _RSEQ_X86_THREAD_POINTER + +#include <features.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if __GNUC_PREREQ (11, 1) +static inline void *rseq_thread_pointer(void) +{ + return __builtin_thread_pointer(); +} +#else +static inline void *rseq_thread_pointer(void) +{ + void *__result; + +# ifdef __x86_64__ + __asm__ ("mov %%fs:0, %0" : "=r" (__result)); +# else + __asm__ ("mov %%gs:0, %0" : "=r" (__result)); +# endif + return __result; +} +#endif /* !GCC 11 */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index 640411518e46..bd01dc41ca13 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -28,6 +28,8 @@ #ifdef __x86_64__ +#define RSEQ_ASM_TP_SEGMENT %%fs + #define rseq_smp_mb() \ __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") #define rseq_smp_rmb() rseq_barrier() @@ -123,14 +125,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -141,7 +143,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -152,16 +154,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -172,7 +179,7 @@ error2: */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -184,15 +191,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" "je %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movq %[v], %%rbx\n\t" "cmpq %%rbx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -207,7 +214,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -220,16 +227,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -245,11 +257,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addq %[count], %[v]\n\t" @@ -258,7 +270,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [count] "er" (count) @@ -269,12 +281,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -286,7 +301,7 @@ error1: * *pval += inc; */ static inline __attribute__((always_inline)) -int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu) { RSEQ_INJECT_C(9) @@ -296,11 +311,11 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* get p+v */ "movq %[ptr], %%rbx\n\t" @@ -314,7 +329,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [ptr] "m" (*ptr), [off] "er" (off), @@ -351,14 +366,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -372,7 +387,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -387,16 +402,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -426,8 +446,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" @@ -436,7 +456,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpq %[v], %[expect]\n\t" "jnz %l[error2]\n\t" "cmpq %[v2], %[expect2]\n\t" @@ -449,7 +469,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -464,18 +484,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -500,14 +526,14 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "movq %[dst], %[rseq_scratch1]\n\t" "movq %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpq %[v], %[expect]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "cmpq %[v], %[expect]\n\t" "jnz 7f\n\t" #endif @@ -555,7 +581,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "r" (expect), @@ -574,16 +600,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -600,7 +631,9 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif /* !RSEQ_SKIP_FASTPATH */ -#elif __i386__ +#elif defined(__i386__) + +#define RSEQ_ASM_TP_SEGMENT %%gs #define rseq_smp_mb() \ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc") @@ -701,14 +734,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -719,7 +752,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -730,16 +763,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -750,7 +788,7 @@ error2: */ static inline __attribute__((always_inline)) int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, - off_t voffp, intptr_t *load, int cpu) + long voffp, intptr_t *load, int cpu) { RSEQ_INJECT_C(9) @@ -762,15 +800,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" "je %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[v], %%ebx\n\t" "cmpl %%ebx, %[expectnot]\n\t" "je %l[error2]\n\t" @@ -785,7 +823,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expectnot] "r" (expectnot), @@ -798,16 +836,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -823,11 +866,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) #endif /* final store */ "addl %[count], %[v]\n\t" @@ -836,7 +879,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [count] "ir" (count) @@ -847,12 +890,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) , error1 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); #endif } @@ -872,14 +918,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" #endif @@ -894,7 +940,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "m" (newv2), @@ -909,16 +955,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -938,15 +989,15 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "movl %[expect], %%eax\n\t" "cmpl %[v], %%eax\n\t" "jnz %l[error2]\n\t" @@ -962,7 +1013,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* try store input */ [v2] "m" (*v2), [newv2] "r" (newv2), @@ -977,16 +1028,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif @@ -1008,8 +1064,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3]) #endif /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "cmpl %[v], %[expect]\n\t" "jnz %l[cmpfail]\n\t" @@ -1018,7 +1074,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, "jnz %l[cmpfail]\n\t" RSEQ_INJECT_ASM(5) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1]) "cmpl %[v], %[expect]\n\t" "jnz %l[error2]\n\t" "cmpl %[expect2], %[v2]\n\t" @@ -1032,7 +1088,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, "", abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* cmp2 input */ [v2] "m" (*v2), [expect2] "r" (expect2), @@ -1047,18 +1103,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, , error1, error2, error3 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("1st expected value comparison failed"); error3: + rseq_after_asm_goto(); rseq_bug("2nd expected value comparison failed"); #endif } @@ -1084,15 +1146,15 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, "movl %[dst], %[rseq_scratch1]\n\t" "movl %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 7f\n\t" @@ -1142,7 +1204,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "m" (expect), @@ -1161,16 +1223,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } @@ -1196,15 +1263,15 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, "movl %[dst], %[rseq_scratch1]\n\t" "movl %[len], %[rseq_scratch2]\n\t" /* Start rseq by storing table entry pointer into rseq_cs. */ - RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f) RSEQ_INJECT_ASM(3) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 5f\n\t" RSEQ_INJECT_ASM(4) #ifdef RSEQ_COMPARE_TWICE - RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f) "movl %[expect], %%eax\n\t" "cmpl %%eax, %[v]\n\t" "jnz 7f\n\t" @@ -1255,7 +1322,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [rseq_abi] "r" (&__rseq_abi), + [rseq_offset] "r" (rseq_offset), /* final store input */ [v] "m" (*v), [expect] "m" (expect), @@ -1274,16 +1341,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, , error1, error2 #endif ); + rseq_after_asm_goto(); return 0; abort: + rseq_after_asm_goto(); RSEQ_INJECT_FAILED return -1; cmpfail: + rseq_after_asm_goto(); return 1; #ifdef RSEQ_COMPARE_TWICE error1: + rseq_after_asm_goto(); rseq_bug("cpu_id comparison failed"); error2: + rseq_after_asm_goto(); rseq_bug("expected value comparison failed"); #endif } diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index fb440dfca158..986b9458efb2 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -26,130 +26,124 @@ #include <assert.h> #include <signal.h> #include <limits.h> +#include <dlfcn.h> +#include <stddef.h> #include "../kselftest.h" #include "rseq.h" -__thread volatile struct rseq __rseq_abi = { - .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, -}; - -/* - * Shared with other libraries. This library may take rseq ownership if it is - * still 0 when executing the library constructor. Set to 1 by library - * constructor when handling rseq. Set to 0 in destructor if handling rseq. - */ -int __rseq_handled; - -/* Whether this library have ownership of rseq registration. */ -static int rseq_ownership; +static const ptrdiff_t *libc_rseq_offset_p; +static const unsigned int *libc_rseq_size_p; +static const unsigned int *libc_rseq_flags_p; -static __thread volatile uint32_t __rseq_refcount; +/* Offset from the thread pointer to the rseq area. */ +ptrdiff_t rseq_offset; -static void signal_off_save(sigset_t *oldset) -{ - sigset_t set; - int ret; +/* Size of the registered rseq area. 0 if the registration was + unsuccessful. */ +unsigned int rseq_size = -1U; - sigfillset(&set); - ret = pthread_sigmask(SIG_BLOCK, &set, oldset); - if (ret) - abort(); -} +/* Flags used during rseq registration. */ +unsigned int rseq_flags; -static void signal_restore(sigset_t oldset) -{ - int ret; +static int rseq_ownership; - ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); - if (ret) - abort(); -} +static +__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, +}; -static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, +static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) { return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } -int rseq_register_current_thread(void) +int rseq_available(void) { - int rc, ret = 0; - sigset_t oldset; + int rc; - if (!rseq_ownership) + rc = sys_rseq(NULL, 0, 0, 0); + if (rc != -1) + abort(); + switch (errno) { + case ENOSYS: return 0; - signal_off_save(&oldset); - if (__rseq_refcount == UINT_MAX) { - ret = -1; - goto end; - } - if (__rseq_refcount++) - goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); - if (!rc) { - assert(rseq_current_cpu_raw() >= 0); - goto end; + case EINVAL: + return 1; + default: + abort(); } - if (errno != EBUSY) - __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; - ret = -1; - __rseq_refcount--; -end: - signal_restore(oldset); - return ret; } -int rseq_unregister_current_thread(void) +int rseq_register_current_thread(void) { - int rc, ret = 0; - sigset_t oldset; + int rc; - if (!rseq_ownership) + if (!rseq_ownership) { + /* Treat libc's ownership as a successful registration. */ return 0; - signal_off_save(&oldset); - if (!__rseq_refcount) { - ret = -1; - goto end; } - if (--__rseq_refcount) - goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), - RSEQ_FLAG_UNREGISTER, RSEQ_SIG); - if (!rc) - goto end; - __rseq_refcount = 1; - ret = -1; -end: - signal_restore(oldset); - return ret; + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG); + if (rc) + return -1; + assert(rseq_current_cpu_raw() >= 0); + return 0; } -int32_t rseq_fallback_current_cpu(void) +int rseq_unregister_current_thread(void) { - int32_t cpu; + int rc; - cpu = sched_getcpu(); - if (cpu < 0) { - perror("sched_getcpu()"); - abort(); + if (!rseq_ownership) { + /* Treat libc's ownership as a successful unregistration. */ + return 0; } - return cpu; + rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + if (rc) + return -1; + return 0; } -void __attribute__((constructor)) rseq_init(void) +static __attribute__((constructor)) +void rseq_init(void) { - /* Check whether rseq is handled by another library. */ - if (__rseq_handled) + libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); + libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); + libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) { + /* rseq registration owned by glibc */ + rseq_offset = *libc_rseq_offset_p; + rseq_size = *libc_rseq_size_p; + rseq_flags = *libc_rseq_flags_p; + return; + } + if (!rseq_available()) return; - __rseq_handled = 1; rseq_ownership = 1; + rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + rseq_size = sizeof(struct rseq_abi); + rseq_flags = 0; } -void __attribute__((destructor)) rseq_fini(void) +static __attribute__((destructor)) +void rseq_exit(void) { if (!rseq_ownership) return; - __rseq_handled = 0; + rseq_offset = 0; + rseq_size = -1U; rseq_ownership = 0; } + +int32_t rseq_fallback_current_cpu(void) +{ + int32_t cpu; + + cpu = sched_getcpu(); + if (cpu < 0) { + perror("sched_getcpu()"); + abort(); + } + return cpu; +} diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 3f63eb362b92..9d850b290c2e 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -16,7 +16,9 @@ #include <errno.h> #include <stdio.h> #include <stdlib.h> -#include <linux/rseq.h> +#include <stddef.h> +#include "rseq-abi.h" +#include "compiler.h" /* * Empty code injection macros, override when testing. @@ -43,8 +45,20 @@ #define RSEQ_INJECT_FAILED #endif -extern __thread volatile struct rseq __rseq_abi; -extern int __rseq_handled; +#include "rseq-thread-pointer.h" + +/* Offset from the thread pointer to the rseq area. */ +extern ptrdiff_t rseq_offset; +/* Size of the registered rseq area. 0 if the registration was + unsuccessful. */ +extern unsigned int rseq_size; +/* Flags used during rseq registration. */ +extern unsigned int rseq_flags; + +static inline struct rseq_abi *rseq_get_abi(void) +{ + return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset); +} #define rseq_likely(x) __builtin_expect(!!(x), 1) #define rseq_unlikely(x) __builtin_expect(!!(x), 0) @@ -108,7 +122,7 @@ int32_t rseq_fallback_current_cpu(void); */ static inline int32_t rseq_current_cpu_raw(void) { - return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id); + return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id); } /* @@ -124,7 +138,7 @@ static inline int32_t rseq_current_cpu_raw(void) */ static inline uint32_t rseq_cpu_start(void) { - return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); + return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start); } static inline uint32_t rseq_current_cpu(void) @@ -139,11 +153,7 @@ static inline uint32_t rseq_current_cpu(void) static inline void rseq_clear_rseq_cs(void) { -#ifdef __LP64__ - __rseq_abi.rseq_cs.ptr = 0; -#else - __rseq_abi.rseq_cs.ptr.ptr32 = 0; -#endif + RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0); } /* diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings index ba4d85f74cd6..a953c96aa16e 100644 --- a/tools/testing/selftests/rtc/settings +++ b/tools/testing/selftests/rtc/settings @@ -1 +1 @@ -timeout=90 +timeout=180 diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index 0ebfe8b0e147..585f7a0c10cb 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -Wl,-no-as-needed -Wall +CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/ LDFLAGS += -lpthread TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 2956584e1e37..75af864e07b6 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -4,7 +4,7 @@ include ../lib.mk .PHONY: all clean -CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \ +CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \ ../x86/trivial_64bit_program.c) ifndef OBJCOPY diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 9d4322c946e2..006b464c8fc9 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -21,7 +21,7 @@ void encl_delete(struct encl *encl) { - struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + struct encl_segment *heap_seg; if (encl->encl_base) munmap((void *)encl->encl_base, encl->encl_size); @@ -32,10 +32,11 @@ void encl_delete(struct encl *encl) if (encl->fd) close(encl->fd); - munmap(heap_seg->src, heap_seg->size); - - if (encl->segment_tbl) + if (encl->segment_tbl) { + heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + munmap(heap_seg->src, heap_seg->size); free(encl->segment_tbl); + } memset(encl, 0, sizeof(*encl)); } diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 370c4995f7c4..dd74fa42302e 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -146,7 +146,8 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, if (!encl_load("test_encl.elf", encl, heap_size)) { encl_delete(encl); - TH_LOG("Failed to load the test enclave.\n"); + TH_LOG("Failed to load the test enclave."); + return false; } if (!encl_measure(encl)) @@ -185,8 +186,6 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, return true; err: - encl_delete(encl); - for (i = 0; i < encl->nr_segments; i++) { seg = &encl->segment_tbl[i]; @@ -205,7 +204,9 @@ err: fclose(maps_file); } - TH_LOG("Failed to initialize the test enclave.\n"); + TH_LOG("Failed to initialize the test enclave."); + + encl_delete(encl); return false; } diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index f34486cd7342..057a4f49c79d 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -56,6 +56,7 @@ TSS2_RESMGR_TPM_RC_LAYER = (11 << TSS2_RC_LAYER_SHIFT) TPM2_CAP_HANDLES = 0x00000001 TPM2_CAP_COMMANDS = 0x00000002 +TPM2_CAP_PCRS = 0x00000005 TPM2_CAP_TPM_PROPERTIES = 0x00000006 TPM2_PT_FIXED = 0x100 @@ -712,3 +713,33 @@ class Client: pt += 1 return handles + + def get_cap_pcrs(self): + pcr_banks = {} + + fmt = '>HII III' + + cmd = struct.pack(fmt, + TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + TPM2_CC_GET_CAPABILITY, + TPM2_CAP_PCRS, 0, 1) + rsp = self.send_cmd(cmd)[10:] + _, _, cnt = struct.unpack('>BII', rsp[:9]) + rsp = rsp[9:] + + # items are TPMS_PCR_SELECTION's + for i in range(0, cnt): + hash, sizeOfSelect = struct.unpack('>HB', rsp[:3]) + rsp = rsp[3:] + + pcrSelect = 0 + if sizeOfSelect > 0: + pcrSelect, = struct.unpack('%ds' % sizeOfSelect, + rsp[:sizeOfSelect]) + rsp = rsp[sizeOfSelect:] + pcrSelect = int.from_bytes(pcrSelect, byteorder='big') + + pcr_banks[hash] = pcrSelect + + return pcr_banks diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py index 9d764306887b..ffe98b5c8d22 100644 --- a/tools/testing/selftests/tpm2/tpm2_tests.py +++ b/tools/testing/selftests/tpm2/tpm2_tests.py @@ -27,7 +27,17 @@ class SmokeTest(unittest.TestCase): result = self.client.unseal(self.root_key, blob, auth, None) self.assertEqual(data, result) + def determine_bank_alg(self, mask): + pcr_banks = self.client.get_cap_pcrs() + for bank_alg, pcrSelection in pcr_banks.items(): + if pcrSelection & mask == mask: + return bank_alg + return None + def test_seal_with_policy(self): + bank_alg = self.determine_bank_alg(1 << 16) + self.assertIsNotNone(bank_alg) + handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL) data = ('X' * 64).encode() @@ -35,7 +45,7 @@ class SmokeTest(unittest.TestCase): pcrs = [16] try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) policy_dig = self.client.get_policy_digest(handle) @@ -47,7 +57,7 @@ class SmokeTest(unittest.TestCase): handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY) try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) result = self.client.unseal(self.root_key, blob, auth, handle) @@ -72,6 +82,9 @@ class SmokeTest(unittest.TestCase): self.assertEqual(rc, tpm2.TPM2_RC_AUTH_FAIL) def test_unseal_with_wrong_policy(self): + bank_alg = self.determine_bank_alg(1 << 16 | 1 << 1) + self.assertIsNotNone(bank_alg) + handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL) data = ('X' * 64).encode() @@ -79,7 +92,7 @@ class SmokeTest(unittest.TestCase): pcrs = [16] try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) policy_dig = self.client.get_policy_digest(handle) @@ -91,13 +104,13 @@ class SmokeTest(unittest.TestCase): # Extend first a PCR that is not part of the policy and try to unseal. # This should succeed. - ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1) - self.client.extend_pcr(1, ('X' * ds).encode()) + ds = tpm2.get_digest_size(bank_alg) + self.client.extend_pcr(1, ('X' * ds).encode(), bank_alg=bank_alg) handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY) try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) result = self.client.unseal(self.root_key, blob, auth, handle) @@ -109,14 +122,14 @@ class SmokeTest(unittest.TestCase): # Then, extend a PCR that is part of the policy and try to unseal. # This should fail. - self.client.extend_pcr(16, ('X' * ds).encode()) + self.client.extend_pcr(16, ('X' * ds).encode(), bank_alg=bank_alg) handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY) rc = 0 try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) result = self.client.unseal(self.root_key, blob, auth, handle) @@ -302,3 +315,19 @@ class AsyncTest(unittest.TestCase): log.debug("Calling get_cap in a NON_BLOCKING mode") async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION) async_client.close() + + def test_flush_invalid_context(self): + log = logging.getLogger(__name__) + log.debug(sys._getframe().f_code.co_name) + + async_client = tpm2.Client(tpm2.Client.FLAG_SPACE | tpm2.Client.FLAG_NONBLOCK) + log.debug("Calling flush_context passing in an invalid handle ") + handle = 0x80123456 + rc = 0 + try: + async_client.flush_context(handle) + except OSError as e: + rc = e.errno + + self.assertEqual(rc, 22) + async_client.close() diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index 3d603f1394af..883ca85424bc 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -33,110 +33,114 @@ typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts); typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts); typedef time_t (*vdso_time_t)(time_t *t); -static int vdso_test_gettimeofday(void) +#define VDSO_TEST_PASS_MSG() "\n%s(): PASS\n", __func__ +#define VDSO_TEST_FAIL_MSG(x) "\n%s(): %s FAIL\n", __func__, x +#define VDSO_TEST_SKIP_MSG(x) "\n%s(): SKIP: Could not find %s\n", __func__, x + +static void vdso_test_gettimeofday(void) { /* Find gettimeofday. */ vdso_gettimeofday_t vdso_gettimeofday = (vdso_gettimeofday_t)vdso_sym(version, name[0]); if (!vdso_gettimeofday) { - printf("Could not find %s\n", name[0]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[0])); + return; } struct timeval tv; long ret = vdso_gettimeofday(&tv, 0); if (ret == 0) { - printf("The time is %lld.%06lld\n", - (long long)tv.tv_sec, (long long)tv.tv_usec); + ksft_print_msg("The time is %lld.%06lld\n", + (long long)tv.tv_sec, (long long)tv.tv_usec); + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); } else { - printf("%s failed\n", name[0]); - return KSFT_FAIL; + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[0])); } - - return KSFT_PASS; } -static int vdso_test_clock_gettime(clockid_t clk_id) +static void vdso_test_clock_gettime(clockid_t clk_id) { /* Find clock_gettime. */ vdso_clock_gettime_t vdso_clock_gettime = (vdso_clock_gettime_t)vdso_sym(version, name[1]); if (!vdso_clock_gettime) { - printf("Could not find %s\n", name[1]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[1])); + return; } struct timespec ts; long ret = vdso_clock_gettime(clk_id, &ts); if (ret == 0) { - printf("The time is %lld.%06lld\n", - (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_print_msg("The time is %lld.%06lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); } else { - printf("%s failed\n", name[1]); - return KSFT_FAIL; + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[1])); } - - return KSFT_PASS; } -static int vdso_test_time(void) +static void vdso_test_time(void) { /* Find time. */ vdso_time_t vdso_time = (vdso_time_t)vdso_sym(version, name[2]); if (!vdso_time) { - printf("Could not find %s\n", name[2]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[2])); + return; } long ret = vdso_time(NULL); if (ret > 0) { - printf("The time in hours since January 1, 1970 is %lld\n", + ksft_print_msg("The time in hours since January 1, 1970 is %lld\n", (long long)(ret / 3600)); + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); } else { - printf("%s failed\n", name[2]); - return KSFT_FAIL; + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[2])); } - - return KSFT_PASS; } -static int vdso_test_clock_getres(clockid_t clk_id) +static void vdso_test_clock_getres(clockid_t clk_id) { + int clock_getres_fail = 0; + /* Find clock_getres. */ vdso_clock_getres_t vdso_clock_getres = (vdso_clock_getres_t)vdso_sym(version, name[3]); if (!vdso_clock_getres) { - printf("Could not find %s\n", name[3]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[3])); + return; } struct timespec ts, sys_ts; long ret = vdso_clock_getres(clk_id, &ts); if (ret == 0) { - printf("The resolution is %lld %lld\n", - (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_print_msg("The vdso resolution is %lld %lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); } else { - printf("%s failed\n", name[3]); - return KSFT_FAIL; + clock_getres_fail++; } ret = syscall(SYS_clock_getres, clk_id, &sys_ts); - if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) { - printf("%s failed\n", name[3]); - return KSFT_FAIL; - } + ksft_print_msg("The syscall resolution is %lld %lld\n", + (long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec); - return KSFT_PASS; + if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) + clock_getres_fail++; + + if (clock_getres_fail > 0) { + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[3])); + } else { + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + } } const char *vdso_clock_name[12] = { @@ -158,36 +162,23 @@ const char *vdso_clock_name[12] = { * This function calls vdso_test_clock_gettime and vdso_test_clock_getres * with different values for clock_id. */ -static inline int vdso_test_clock(clockid_t clock_id) +static inline void vdso_test_clock(clockid_t clock_id) { - int ret0, ret1; - - ret0 = vdso_test_clock_gettime(clock_id); - /* A skipped test is considered passed */ - if (ret0 == KSFT_SKIP) - ret0 = KSFT_PASS; - - ret1 = vdso_test_clock_getres(clock_id); - /* A skipped test is considered passed */ - if (ret1 == KSFT_SKIP) - ret1 = KSFT_PASS; + ksft_print_msg("\nclock_id: %s\n", vdso_clock_name[clock_id]); - ret0 += ret1; + vdso_test_clock_gettime(clock_id); - printf("clock_id: %s", vdso_clock_name[clock_id]); - - if (ret0 > 0) - printf(" [FAIL]\n"); - else - printf(" [PASS]\n"); - - return ret0; + vdso_test_clock_getres(clock_id); } +#define VDSO_TEST_PLAN 16 + int main(int argc, char **argv) { unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); - int ret; + + ksft_print_header(); + ksft_set_plan(VDSO_TEST_PLAN); if (!sysinfo_ehdr) { printf("AT_SYSINFO_EHDR is not present!\n"); @@ -201,44 +192,42 @@ int main(int argc, char **argv) vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); - ret = vdso_test_gettimeofday(); + vdso_test_gettimeofday(); #if _POSIX_TIMERS > 0 #ifdef CLOCK_REALTIME - ret += vdso_test_clock(CLOCK_REALTIME); + vdso_test_clock(CLOCK_REALTIME); #endif #ifdef CLOCK_BOOTTIME - ret += vdso_test_clock(CLOCK_BOOTTIME); + vdso_test_clock(CLOCK_BOOTTIME); #endif #ifdef CLOCK_TAI - ret += vdso_test_clock(CLOCK_TAI); + vdso_test_clock(CLOCK_TAI); #endif #ifdef CLOCK_REALTIME_COARSE - ret += vdso_test_clock(CLOCK_REALTIME_COARSE); + vdso_test_clock(CLOCK_REALTIME_COARSE); #endif #ifdef CLOCK_MONOTONIC - ret += vdso_test_clock(CLOCK_MONOTONIC); + vdso_test_clock(CLOCK_MONOTONIC); #endif #ifdef CLOCK_MONOTONIC_RAW - ret += vdso_test_clock(CLOCK_MONOTONIC_RAW); + vdso_test_clock(CLOCK_MONOTONIC_RAW); #endif #ifdef CLOCK_MONOTONIC_COARSE - ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE); + vdso_test_clock(CLOCK_MONOTONIC_COARSE); #endif #endif - ret += vdso_test_time(); - - if (ret > 0) - return KSFT_FAIL; + vdso_test_time(); - return KSFT_PASS; + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; } diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 2e7e86e85282..3b5faec3c04f 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -2,6 +2,7 @@ hugepage-mmap hugepage-mremap hugepage-shm +hugepage-vmemmap khugepaged map_hugetlb map_populate diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 1607322a112c..81040f01711f 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests +LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h + include local_config.mk uname_M := $(shell uname -m 2>/dev/null || echo not) @@ -31,6 +33,7 @@ TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-mremap TEST_GEN_FILES += hugepage-shm +TEST_GEN_FILES += hugepage-vmemmap TEST_GEN_FILES += khugepaged TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace @@ -49,9 +52,9 @@ TEST_GEN_FILES += split_huge_page_test TEST_GEN_FILES += ksm_tests ifeq ($(MACHINE),x86_64) -CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) -CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) +CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) TARGETS := protection_keys BINARIES_32 := $(TARGETS:%=%_32) @@ -140,10 +143,6 @@ endif $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap -$(OUTPUT)/gup_test: ../../../../mm/gup_test.h - -$(OUTPUT)/hmm-tests: local_config.h - # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh index fe8fcfb334e0..a5cb4b09a46c 100644 --- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh @@ -24,19 +24,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then reservation_usage_file=rsvd.current fi -cgroup_path=/dev/cgroup/memory -if [[ ! -e $cgroup_path ]]; then - mkdir -p $cgroup_path - if [[ $cgroup2 ]]; then +if [[ $cgroup2 ]]; then + cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + if [[ -z "$cgroup_path" ]]; then + cgroup_path=/dev/cgroup/memory mount -t cgroup2 none $cgroup_path - else + do_umount=1 + fi + echo "+hugetlb" >$cgroup_path/cgroup.subtree_control +else + cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + if [[ -z "$cgroup_path" ]]; then + cgroup_path=/dev/cgroup/memory mount -t cgroup memory,hugetlb $cgroup_path + do_umount=1 fi fi - -if [[ $cgroup2 ]]; then - echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control -fi +export cgroup_path function cleanup() { if [[ $cgroup2 ]]; then @@ -108,7 +112,7 @@ function setup_cgroup() { function wait_for_hugetlb_memory_to_get_depleted() { local cgroup="$1" - local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" + local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" # Wait for hugetlbfs memory to get depleted. while [ $(cat $path) != 0 ]; do echo Waiting for hugetlb memory to get depleted. @@ -121,7 +125,7 @@ function wait_for_hugetlb_memory_to_get_reserved() { local cgroup="$1" local size="$2" - local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" + local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" # Wait for hugetlbfs memory to get written. while [ $(cat $path) != $size ]; do echo Waiting for hugetlb memory reservation to reach size $size. @@ -134,7 +138,7 @@ function wait_for_hugetlb_memory_to_get_written() { local cgroup="$1" local size="$2" - local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file" + local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file" # Wait for hugetlbfs memory to get written. while [ $(cat $path) != $size ]; do echo Waiting for hugetlb memory to reach size $size. @@ -574,5 +578,7 @@ for populate in "" "-o"; do done # populate done # method -umount $cgroup_path -rmdir $cgroup_path +if [[ $do_umount ]]; then + umount $cgroup_path + rmdir $cgroup_path +fi diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 864f126ffd78..203323967b50 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -1251,6 +1251,48 @@ TEST_F(hmm, anon_teardown) /* * Test memory snapshot without faulting in pages accessed by the device. */ +TEST_F(hmm, mixedmap) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned char *m; + int ret; + + npages = 1; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(npages); + ASSERT_NE(buffer->mirror, NULL); + + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, + self->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Simulate a device snapshotting CPU pagetables. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ); + + hmm_buffer_free(buffer); +} + +/* + * Test memory snapshot without faulting in pages accessed by the device. + */ TEST_F(hmm2, snapshot) { struct hmm_buffer *buffer; diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index 257df94697a5..1d689084a54b 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -3,8 +3,13 @@ * hugepage-mremap: * * Example of remapping huge page memory in a user application using the - * mremap system call. Code assumes a hugetlbfs filesystem is mounted - * at './huge'. The code will use 10MB worth of huge pages. + * mremap system call. The path to a file in a hugetlbfs filesystem must + * be passed as the last argument to this test. The amount of memory used + * by this test in MBs can optionally be passed as an argument. If no memory + * amount is passed, the default amount is 10MB. + * + * To make sure the test triggers pmd sharing and goes through the 'unshare' + * path in the mremap code use 1GB (1024) or more. */ #define _GNU_SOURCE @@ -18,7 +23,8 @@ #include <linux/userfaultfd.h> #include <sys/ioctl.h> -#define LENGTH (1UL * 1024 * 1024 * 1024) +#define DEFAULT_LENGTH_MB 10UL +#define MB_TO_BYTES(x) (x * 1024 * 1024) #define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC) #define FLAGS (MAP_SHARED | MAP_ANONYMOUS) @@ -28,20 +34,20 @@ static void check_bytes(char *addr) printf("First hex is %x\n", *((unsigned int *)addr)); } -static void write_bytes(char *addr) +static void write_bytes(char *addr, size_t len) { unsigned long i; - for (i = 0; i < LENGTH; i++) + for (i = 0; i < len; i++) *(addr + i) = (char)i; } -static int read_bytes(char *addr) +static int read_bytes(char *addr, size_t len) { unsigned long i; check_bytes(addr); - for (i = 0; i < LENGTH; i++) + for (i = 0; i < len; i++) if (*(addr + i) != (char)i) { printf("Mismatch at %lu\n", i); return 1; @@ -99,11 +105,28 @@ static void register_region_with_uffd(char *addr, size_t len) } } -int main(void) +int main(int argc, char *argv[]) { + size_t length; + + if (argc != 2 && argc != 3) { + printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]); + exit(1); + } + + /* Read memory length as the first arg if valid, otherwise fallback to + * the default length. + */ + if (argc == 3) + length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL; + + length = length > 0 ? length : DEFAULT_LENGTH_MB; + length = MB_TO_BYTES(length); + int ret = 0; - int fd = open("/huge/test", O_CREAT | O_RDWR, 0755); + /* last arg is the hugetlb file name */ + int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755); if (fd < 0) { perror("Open failed"); @@ -112,7 +135,7 @@ int main(void) /* mmap to a PUD aligned address to hopefully trigger pmd sharing. */ unsigned long suggested_addr = 0x7eaa40000000; - void *haddr = mmap((void *)suggested_addr, LENGTH, PROTECTION, + void *haddr = mmap((void *)suggested_addr, length, PROTECTION, MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); printf("Map haddr: Returned address is %p\n", haddr); if (haddr == MAP_FAILED) { @@ -122,7 +145,7 @@ int main(void) /* mmap again to a dummy address to hopefully trigger pmd sharing. */ suggested_addr = 0x7daa40000000; - void *daddr = mmap((void *)suggested_addr, LENGTH, PROTECTION, + void *daddr = mmap((void *)suggested_addr, length, PROTECTION, MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); printf("Map daddr: Returned address is %p\n", daddr); if (daddr == MAP_FAILED) { @@ -132,16 +155,16 @@ int main(void) suggested_addr = 0x7faa40000000; void *vaddr = - mmap((void *)suggested_addr, LENGTH, PROTECTION, FLAGS, -1, 0); + mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0); printf("Map vaddr: Returned address is %p\n", vaddr); if (vaddr == MAP_FAILED) { perror("mmap2"); exit(1); } - register_region_with_uffd(haddr, LENGTH); + register_region_with_uffd(haddr, length); - void *addr = mremap(haddr, LENGTH, LENGTH, + void *addr = mremap(haddr, length, length, MREMAP_MAYMOVE | MREMAP_FIXED, vaddr); if (addr == MAP_FAILED) { perror("mremap"); @@ -150,10 +173,13 @@ int main(void) printf("Mremap: Returned address is %p\n", addr); check_bytes(addr); - write_bytes(addr); - ret = read_bytes(addr); + write_bytes(addr, length); + ret = read_bytes(addr, length); + + munmap(addr, length); - munmap(addr, LENGTH); + close(fd); + unlink(argv[argc-1]); return ret; } diff --git a/tools/testing/selftests/vm/hugepage-vmemmap.c b/tools/testing/selftests/vm/hugepage-vmemmap.c new file mode 100644 index 000000000000..557bdbd4f87e --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-vmemmap.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test case of using hugepage memory in a user application using the + * mmap system call with MAP_HUGETLB flag. Before running this program + * make sure the administrator has allocated enough default sized huge + * pages to cover the 2 MB allocation. + */ +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <fcntl.h> + +#define MAP_LENGTH (2UL * 1024 * 1024) + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 /* arch specific */ +#endif + +#define PAGE_SIZE 4096 + +#define PAGE_COMPOUND_HEAD (1UL << 15) +#define PAGE_COMPOUND_TAIL (1UL << 16) +#define PAGE_HUGE (1UL << 17) + +#define HEAD_PAGE_FLAGS (PAGE_COMPOUND_HEAD | PAGE_HUGE) +#define TAIL_PAGE_FLAGS (PAGE_COMPOUND_TAIL | PAGE_HUGE) + +#define PM_PFRAME_BITS 55 +#define PM_PFRAME_MASK ~((1UL << PM_PFRAME_BITS) - 1) + +/* + * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. + * That means the addresses starting with 0x800000... will need to be + * specified. Specifying a fixed address is not required on ppc64, i386 + * or x86_64. + */ +#ifdef __ia64__ +#define MAP_ADDR (void *)(0x8000000000000000UL) +#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) +#else +#define MAP_ADDR NULL +#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) +#endif + +static void write_bytes(char *addr, size_t length) +{ + unsigned long i; + + for (i = 0; i < length; i++) + *(addr + i) = (char)i; +} + +static unsigned long virt_to_pfn(void *addr) +{ + int fd; + unsigned long pagemap; + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) + return -1UL; + + lseek(fd, (unsigned long)addr / PAGE_SIZE * sizeof(pagemap), SEEK_SET); + read(fd, &pagemap, sizeof(pagemap)); + close(fd); + + return pagemap & ~PM_PFRAME_MASK; +} + +static int check_page_flags(unsigned long pfn) +{ + int fd, i; + unsigned long pageflags; + + fd = open("/proc/kpageflags", O_RDONLY); + if (fd < 0) + return -1; + + lseek(fd, pfn * sizeof(pageflags), SEEK_SET); + + read(fd, &pageflags, sizeof(pageflags)); + if ((pageflags & HEAD_PAGE_FLAGS) != HEAD_PAGE_FLAGS) { + close(fd); + printf("Head page flags (%lx) is invalid\n", pageflags); + return -1; + } + + /* + * pages other than the first page must be tail and shouldn't be head; + * this also verifies kernel has correctly set the fake page_head to tail + * while hugetlb_free_vmemmap is enabled. + */ + for (i = 1; i < MAP_LENGTH / PAGE_SIZE; i++) { + read(fd, &pageflags, sizeof(pageflags)); + if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS || + (pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) { + close(fd); + printf("Tail page flags (%lx) is invalid\n", pageflags); + return -1; + } + } + + close(fd); + + return 0; +} + +int main(int argc, char **argv) +{ + void *addr; + unsigned long pfn; + + addr = mmap(MAP_ADDR, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + /* Trigger allocation of HugeTLB page. */ + write_bytes(addr, MAP_LENGTH); + + pfn = virt_to_pfn(addr); + if (pfn == -1UL) { + munmap(addr, MAP_LENGTH); + perror("virt_to_pfn"); + exit(1); + } + + printf("Returned address is %p whose pfn is %lx\n", addr, pfn); + + if (check_page_flags(pfn) < 0) { + munmap(addr, MAP_LENGTH); + perror("check_page_flags"); + exit(1); + } + + /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */ + if (munmap(addr, MAP_LENGTH)) { + perror("munmap"); + exit(1); + } + + return 0; +} diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh index 4a9a3afe9fd4..bf2d2a684edf 100644 --- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh @@ -18,19 +18,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then usage_file=current fi -CGROUP_ROOT='/dev/cgroup/memory' -MNT='/mnt/huge/' -if [[ ! -e $CGROUP_ROOT ]]; then - mkdir -p $CGROUP_ROOT - if [[ $cgroup2 ]]; then +if [[ $cgroup2 ]]; then + CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + if [[ -z "$CGROUP_ROOT" ]]; then + CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup2 none $CGROUP_ROOT - sleep 1 - echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control - else + do_umount=1 + fi + echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control +else + CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + if [[ -z "$CGROUP_ROOT" ]]; then + CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup memory,hugetlb $CGROUP_ROOT + do_umount=1 fi fi +MNT='/mnt/huge/' function get_machine_hugepage_size() { hpz=$(grep -i hugepagesize /proc/meminfo) diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c index d91bde511268..eed44322d1a6 100644 --- a/tools/testing/selftests/vm/map_fixed_noreplace.c +++ b/tools/testing/selftests/vm/map_fixed_noreplace.c @@ -17,9 +17,6 @@ #define MAP_FIXED_NOREPLACE 0x100000 #endif -#define BASE_ADDRESS (256ul * 1024 * 1024) - - static void dump_maps(void) { char cmd[32]; @@ -28,18 +25,46 @@ static void dump_maps(void) system(cmd); } +static unsigned long find_base_addr(unsigned long size) +{ + void *addr; + unsigned long flags; + + flags = MAP_PRIVATE | MAP_ANONYMOUS; + addr = mmap(NULL, size, PROT_NONE, flags, -1, 0); + if (addr == MAP_FAILED) { + printf("Error: couldn't map the space we need for the test\n"); + return 0; + } + + if (munmap(addr, size) != 0) { + printf("Error: couldn't map the space we need for the test\n"); + return 0; + } + return (unsigned long)addr; +} + int main(void) { + unsigned long base_addr; unsigned long flags, addr, size, page_size; char *p; page_size = sysconf(_SC_PAGE_SIZE); + //let's find a base addr that is free before we start the tests + size = 5 * page_size; + base_addr = find_base_addr(size); + if (!base_addr) { + printf("Error: couldn't map the space we need for the test\n"); + return 1; + } + flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE; // Check we can map all the areas we need below errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); @@ -60,7 +85,7 @@ int main(void) printf("unmap() successful\n"); errno = 0; - addr = BASE_ADDRESS + page_size; + addr = base_addr + page_size; size = 3 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -80,7 +105,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -101,7 +126,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS + (2 * page_size); + addr = base_addr + (2 * page_size); size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -121,7 +146,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS + (3 * page_size); + addr = base_addr + (3 * page_size); size = 2 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -141,7 +166,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 2 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -161,7 +186,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -181,7 +206,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS + (4 * page_size); + addr = base_addr + (4 * page_size); size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -192,7 +217,7 @@ int main(void) return 1; } - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; if (munmap((void *)addr, size) != 0) { dump_maps(); diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index a24d30af3094..e10d50e0b8e8 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -111,7 +111,19 @@ fi echo "-----------------------" echo "running hugepage-mremap" echo "-----------------------" -./hugepage-mremap +./hugepage-mremap $mnt/huge_mremap +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi +rm -f $mnt/huge_mremap + +echo "------------------------" +echo "running hugepage-vmemmap" +echo "------------------------" +./hugepage-vmemmap if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c index 5e4c036f6ad3..a03cb3fce1f6 100644 --- a/tools/testing/selftests/vm/transhuge-stress.c +++ b/tools/testing/selftests/vm/transhuge-stress.c @@ -26,15 +26,17 @@ #define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1)) int pagemap_fd; +int backing_fd = -1; +int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE; +#define PROT_RW (PROT_READ | PROT_WRITE) int64_t allocate_transhuge(void *ptr) { uint64_t ent[2]; /* drop pmd */ - if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANONYMOUS | - MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) + if (mmap(ptr, HPAGE_SIZE, PROT_RW, MAP_FIXED | mmap_flags, + backing_fd, 0) != ptr) errx(2, "mmap transhuge"); if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) @@ -60,6 +62,8 @@ int main(int argc, char **argv) size_t ram, len; void *ptr, *p; struct timespec a, b; + int i = 0; + char *name = NULL; double s; uint8_t *map; size_t map_len; @@ -69,13 +73,23 @@ int main(int argc, char **argv) ram = SIZE_MAX / 4; else ram *= sysconf(_SC_PAGESIZE); + len = ram; + + while (++i < argc) { + if (!strcmp(argv[i], "-h")) + errx(1, "usage: %s [size in MiB]", argv[0]); + else if (!strcmp(argv[i], "-f")) + name = argv[++i]; + else + len = atoll(argv[i]) << 20; + } - if (argc == 1) - len = ram; - else if (!strcmp(argv[1], "-h")) - errx(1, "usage: %s [size in MiB]", argv[0]); - else - len = atoll(argv[1]) << 20; + if (name) { + backing_fd = open(name, O_RDWR); + if (backing_fd == -1) + errx(2, "open %s", name); + mmap_flags = MAP_SHARED; + } warnx("allocate %zd transhuge pages, using %zd MiB virtual memory" " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20, @@ -86,8 +100,7 @@ int main(int argc, char **argv) err(2, "open pagemap"); len -= len % HPAGE_SIZE; - ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, len + HPAGE_SIZE, PROT_RW, mmap_flags, backing_fd, 0); if (ptr == MAP_FAILED) err(2, "initial mmap"); ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE; diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 9354a5e0321c..29cc6f247a9e 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -46,6 +46,7 @@ #include <signal.h> #include <poll.h> #include <string.h> +#include <linux/mman.h> #include <sys/mman.h> #include <sys/syscall.h> #include <sys/ioctl.h> @@ -87,7 +88,7 @@ static bool test_uffdio_minor = false; static bool map_shared; static int shm_fd; -static int huge_fd = -1; /* only used for hugetlb_shared test */ +static int huge_fd; static char *huge_fd_off0; static unsigned long long *count_verify; static int uffd = -1; @@ -223,9 +224,6 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) static void hugetlb_release_pages(char *rel_area) { - if (huge_fd == -1) - return; - if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, rel_area == huge_fd_off0 ? 0 : nr_pages * page_size, nr_pages * page_size)) @@ -238,17 +236,17 @@ static void hugetlb_allocate_area(void **alloc_area) char **alloc_area_alias; *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - map_shared ? MAP_SHARED : - MAP_PRIVATE | MAP_HUGETLB | + (map_shared ? MAP_SHARED : MAP_PRIVATE) | + MAP_HUGETLB | (*alloc_area == area_src ? 0 : MAP_NORESERVE), - huge_fd, - *alloc_area == area_src ? 0 : nr_pages * page_size); + huge_fd, *alloc_area == area_src ? 0 : + nr_pages * page_size); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); if (map_shared) { area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED, + MAP_SHARED | MAP_HUGETLB, huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) @@ -542,7 +540,7 @@ static void continue_range(int ufd, __u64 start, __u64 len) static void *locking_thread(void *arg) { unsigned long cpu = (unsigned long) arg; - unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */ + unsigned long page_nr; unsigned long long count; if (!(bounces & BOUNCE_RANDOM)) { @@ -648,7 +646,7 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg) if (ret != sizeof(*msg)) { if (ret < 0) { - if (errno == EAGAIN) + if (errno == EAGAIN || errno == EINTR) return 1; err("blocking read error"); } else { @@ -724,8 +722,11 @@ static void *uffd_poll_thread(void *arg) for (;;) { ret = poll(pollfd, 2, -1); - if (ret <= 0) + if (ret <= 0) { + if (errno == EINTR || errno == EAGAIN) + continue; err("poll error: %d", ret); + } if (pollfd[1].revents & POLLIN) { if (read(pollfd[1].fd, &tmp_chr, 1) != 1) err("read pipefd error"); diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh index d3d0d108924d..70a02301f4c2 100644 --- a/tools/testing/selftests/vm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh @@ -14,7 +14,7 @@ want_sleep=$8 reserve=$9 echo "Putting task in cgroup '$cgroup'" -echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs +echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs echo "Method is $method" diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 8a1f62ab3c8e..53df7d3893d3 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -6,9 +6,9 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean UNAME_M := $(shell uname -m) -CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) -CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) +CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh index 3e2089c8cf54..8c669c0d662e 100755 --- a/tools/testing/selftests/x86/check_cc.sh +++ b/tools/testing/selftests/x86/check_cc.sh @@ -7,7 +7,7 @@ CC="$1" TESTPROG="$2" shift 2 -if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then +if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then echo 1 else echo 0 diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 232e958ec454..b0b91d9b0dc2 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,9 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh" -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - . ./zram_lib.sh run_zram () { @@ -18,14 +15,4 @@ echo "" check_prereqs -# check zram module exists -MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko -if [ -f $MODULE_PATH ]; then - run_zram -elif [ -b /dev/zram0 ]; then - run_zram -else - echo "$TCID : No zram.ko module or /dev/zram0 device file not found" - echo "$TCID : CONFIG_ZRAM is not set" - exit $ksft_skip -fi +run_zram diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index 114863d9fb87..8f4affe34f3e 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,9 +33,7 @@ zram_algs="lzo" zram_fill_fs() { - local mem_free0=$(free -m | awk 'NR==2 {print $4}') - - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "fill zram$i..." local b=0 while [ true ]; do @@ -45,29 +43,17 @@ zram_fill_fs() b=$(($b + 1)) done echo "zram$i can be filled with '$b' KB" - done - local mem_free1=$(free -m | awk 'NR==2 {print $4}') - local used_mem=$(($mem_free0 - $mem_free1)) + local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"` + local v=$((100 * 1024 * $b / $mem_used_total)) + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + return + fi - local total_size=0 - for sm in $zram_sizes; do - local s=$(echo $sm | sed 's/M//') - total_size=$(($total_size + $s)) + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" done - - echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" - - local v=$((100 * $total_size / $used_mem)) - - if [ "$v" -lt 100 ]; then - echo "FAIL compression ratio: 0.$v:1" - ERR_CODE=-1 - zram_cleanup - return - fi - - echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" } check_prereqs @@ -81,7 +67,6 @@ zram_mount zram_fill_fs zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh index e83b404807c0..2418b0c4ed13 100755 --- a/tools/testing/selftests/zram/zram02.sh +++ b/tools/testing/selftests/zram/zram02.sh @@ -36,7 +36,6 @@ zram_set_memlimit zram_makeswap zram_swapoff zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index 6f872f266fd1..21ec1966de76 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -5,12 +5,17 @@ # Author: Alexey Kodanev <[email protected]> # Modified: Naresh Kamboju <[email protected]> -MODULE=0 dev_makeswap=-1 dev_mounted=-1 - +dev_start=0 +dev_end=-1 +module_load=-1 +sys_control=-1 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +kernel_version=`uname -r | cut -d'.' -f1,2` +kernel_major=${kernel_version%.*} +kernel_minor=${kernel_version#*.} trap INT @@ -25,68 +30,104 @@ check_prereqs() fi } +kernel_gte() +{ + major=${1%.*} + minor=${1#*.} + + if [ $kernel_major -gt $major ]; then + return 0 + elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then + return 0 + fi + + return 1 +} + zram_cleanup() { echo "zram cleanup" local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_makeswap); do swapoff /dev/zram$i done - for i in $(seq 0 $dev_mounted); do + for i in $(seq $dev_start $dev_mounted); do umount /dev/zram$i done - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo 1 > /sys/block/zram${i}/reset rm -rf zram$i done -} + if [ $sys_control -eq 1 ]; then + for i in $(seq $dev_start $dev_end); do + echo $i > /sys/class/zram-control/hot_remove + done + fi -zram_unload() -{ - if [ $MODULE -ne 0 ] ; then - echo "zram rmmod zram" + if [ $module_load -eq 1 ]; then rmmod zram > /dev/null 2>&1 fi } zram_load() { - # check zram module exists - MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko - if [ -f $MODULE_PATH ]; then - MODULE=1 - echo "create '$dev_num' zram device(s)" - modprobe zram num_devices=$dev_num - if [ $? -ne 0 ]; then - echo "failed to insert zram module" - exit 1 - fi - - dev_num_created=$(ls /dev/zram* | wc -w) + echo "create '$dev_num' zram device(s)" + + # zram module loaded, new kernel + if [ -d "/sys/class/zram-control" ]; then + echo "zram modules already loaded, kernel supports" \ + "zram-control interface" + dev_start=$(ls /dev/zram* | wc -w) + dev_end=$(($dev_start + $dev_num - 1)) + sys_control=1 + + for i in $(seq $dev_start $dev_end); do + cat /sys/class/zram-control/hot_add > /dev/null + done + + echo "all zram devices (/dev/zram$dev_start~$dev_end" \ + "successfully created" + return 0 + fi - if [ "$dev_num_created" -ne "$dev_num" ]; then - echo "unexpected num of devices: $dev_num_created" - ERR_CODE=-1 + # detect old kernel or built-in + modprobe zram num_devices=$dev_num + if [ ! -d "/sys/class/zram-control" ]; then + if grep -q '^zram' /proc/modules; then + rmmod zram > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "zram module is being used on old kernel" \ + "without zram-control interface" + exit $ksft_skip + fi else - echo "zram load module successful" + echo "test needs CONFIG_ZRAM=m on old kernel without" \ + "zram-control interface" + exit $ksft_skip fi - elif [ -b /dev/zram0 ]; then - echo "/dev/zram0 device file found: OK" - else - echo "ERROR: No zram.ko module or no /dev/zram0 device found" - echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + modprobe zram num_devices=$dev_num fi + + module_load=1 + dev_end=$(($dev_num - 1)) + echo "all zram devices (/dev/zram0~$dev_end) successfully created" } zram_max_streams() { echo "set max_comp_streams to zram device(s)" - local i=0 + kernel_gte 4.7 + if [ $? -eq 0 ]; then + echo "The device attribute max_comp_streams was"\ + "deprecated in 4.7" + return 0 + fi + + local i=$dev_start for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams" echo $max_s > $sys_path || \ @@ -98,7 +139,7 @@ zram_max_streams() echo "FAIL can't set max_streams '$max_s', get $max_stream" i=$(($i + 1)) - echo "$sys_path = '$max_streams' ($i/$dev_num)" + echo "$sys_path = '$max_streams'" done echo "zram max streams: OK" @@ -108,15 +149,16 @@ zram_compress_alg() { echo "test that we can set compression algorithm" - local algs=$(cat /sys/block/zram0/comp_algorithm) + local i=$dev_start + local algs=$(cat /sys/block/zram${i}/comp_algorithm) echo "supported algs: $algs" - local i=0 + for alg in $zram_algs; do local sys_path="/sys/block/zram${i}/comp_algorithm" echo "$alg" > $sys_path || \ echo "FAIL can't set '$alg' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$alg' ($i/$dev_num)" + echo "$sys_path = '$alg'" done echo "zram set compression algorithm: OK" @@ -125,14 +167,14 @@ zram_compress_alg() zram_set_disksizes() { echo "set disk size to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_sizes; do local sys_path="/sys/block/zram${i}/disksize" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set disksizes: OK" @@ -142,14 +184,14 @@ zram_set_memlimit() { echo "set memory limit to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_mem_limits; do local sys_path="/sys/block/zram${i}/mem_limit" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set memory limit: OK" @@ -158,8 +200,8 @@ zram_set_memlimit() zram_makeswap() { echo "make swap with zram device(s)" - local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + local i=$dev_start + for i in $(seq $dev_start $dev_end); do mkswap /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -182,7 +224,7 @@ zram_makeswap() zram_swapoff() { local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_end); do swapoff /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -196,7 +238,7 @@ zram_swapoff() zram_makefs() { - local i=0 + local i=$dev_start for fs in $zram_filesystems; do # if requested fs not supported default it to ext2 which mkfs.$fs > /dev/null 2>&1 || fs=ext2 @@ -215,7 +257,7 @@ zram_makefs() zram_mount() { local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "mount /dev/zram$i" mkdir zram$i mount /dev/zram$i zram$i > /dev/null || \ diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile index 87e0ec48e2e7..95e485f12d97 100644 --- a/tools/tracing/Makefile +++ b/tools/tracing/Makefile @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 include ../scripts/Makefile.include -all: latency +all: latency rtla -clean: latency_clean +clean: latency_clean rtla_clean -install: latency_install +install: latency_install rtla_install latency: $(call descend,latency) @@ -16,4 +16,14 @@ latency_install: latency_clean: $(call descend,latency,clean) -.PHONY: all install clean latency latency_install latency_clean +rtla: + $(call descend,rtla) + +rtla_install: + $(call descend,rtla,install) + +rtla_clean: + $(call descend,rtla,clean) + +.PHONY: all install clean latency latency_install latency_clean \ + rtla rtla_install rtla_clean diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile new file mode 100644 index 000000000000..5a1eda617992 --- /dev/null +++ b/tools/tracing/rtla/Makefile @@ -0,0 +1,104 @@ +NAME := rtla +# Follow the kernel version +VERSION := $(shell cat VERSION 2> /dev/null || make -sC ../../.. kernelversion) + +# From libtracefs: +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) +$(call allow-override,PKG_CONFIG,pkg-config) +$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) +$(call allow-override,LDCONFIG,ldconfig) + +INSTALL = install +FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ + -fasynchronous-unwind-tables -fstack-clash-protection +WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized + +TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) + +CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) +LDFLAGS := -ggdb +LIBS := $$($(PKG_CONFIG) --libs libtracefs) -lprocps + +SRC := $(wildcard src/*.c) +HDR := $(wildcard src/*.h) +OBJ := $(SRC:.c=.o) +DIRS := src +FILES := Makefile README.txt +CEXT := bz2 +TARBALL := $(NAME)-$(VERSION).tar.$(CEXT) +TAROPTS := -cvjf $(TARBALL) +BINDIR := /usr/bin +DATADIR := /usr/share +DOCDIR := $(DATADIR)/doc +MANDIR := $(DATADIR)/man +LICDIR := $(DATADIR)/licenses +SRCTREE := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) + +# If running from the tarball, man pages are stored in the Documentation +# dir. If running from the kernel source, man pages are stored in +# Documentation/tools/rtla/. +ifneq ($(wildcard Documentation/.*),) +DOCSRC = Documentation/ +else +DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/ +endif + +.PHONY: all +all: rtla + +rtla: $(OBJ) + $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) + +static: $(OBJ) + $(CC) -o rtla-static $(LDFLAGS) --static $(OBJ) $(LIBS) -lpthread -ldl + +.PHONY: install +install: doc_install + $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR) + $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR) + $(STRIP) $(DESTDIR)$(BINDIR)/rtla + @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise + ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/osnoise + @test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat + ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/timerlat + +.PHONY: clean tarball +clean: doc_clean + @test ! -f rtla || rm rtla + @test ! -f rtla-static || rm rtla-static + @test ! -f src/rtla.o || rm src/rtla.o + @test ! -f $(TARBALL) || rm -f $(TARBALL) + @rm -rf *~ $(OBJ) *.tar.$(CEXT) + +tarball: clean + rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION) + echo $(VERSION) > $(NAME)-$(VERSION)/VERSION + cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION)/Documentation/ + cp -rp $(SRCTREE)/../../../Documentation/tools/rtla/* $(NAME)-$(VERSION)/Documentation/ + tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION) + rm -rf $(NAME)-$(VERSION) + +.PHONY: doc doc_clean doc_install +doc: + $(MAKE) -C $(DOCSRC) + +doc_clean: + $(MAKE) -C $(DOCSRC) clean + +doc_install: + $(MAKE) -C $(DOCSRC) install diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt new file mode 100644 index 000000000000..6c88446f7e74 --- /dev/null +++ b/tools/tracing/rtla/README.txt @@ -0,0 +1,36 @@ +RTLA: Real-Time Linux Analysis tools + +The rtla is a meta-tool that includes a set of commands that +aims to analyze the real-time properties of Linux. But, instead of +testing Linux as a black box, rtla leverages kernel tracing +capabilities to provide precise information about the properties +and root causes of unexpected results. + +Installing RTLA + +RTLA depends on some libraries and tools. More precisely, it depends on the +following libraries: + + - libtracefs + - libtraceevent + - procps + +It also depends on python3-docutils to compile man pages. + +For development, we suggest the following steps for compiling rtla: + + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git + $ cd libtraceevent/ + $ make + $ sudo make install + $ cd .. + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git + $ cd libtracefs/ + $ make + $ sudo make install + $ cd .. + $ cd $rtla_src + $ make + $ sudo make install + +For further information, please refer to the rtla man page. diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c new file mode 100644 index 000000000000..e60f1862bad0 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.c @@ -0,0 +1,878 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]> + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "osnoise.h" +#include "utils.h" + +/* + * osnoise_get_cpus - return the original "osnoise/cpus" content + * + * It also saves the value to be restored. + */ +char *osnoise_get_cpus(struct osnoise_context *context) +{ + if (context->curr_cpus) + return context->curr_cpus; + + if (context->orig_cpus) + return context->orig_cpus; + + context->orig_cpus = tracefs_instance_file_read(NULL, "osnoise/cpus", NULL); + + /* + * The error value (NULL) is the same for tracefs_instance_file_read() + * and this functions, so: + */ + return context->orig_cpus; +} + +/* + * osnoise_set_cpus - configure osnoise to run on *cpus + * + * "osnoise/cpus" file is used to set the cpus in which osnoise/timerlat + * will run. This function opens this file, saves the current value, + * and set the cpus passed as argument. + */ +int osnoise_set_cpus(struct osnoise_context *context, char *cpus) +{ + char *orig_cpus = osnoise_get_cpus(context); + char buffer[1024]; + int retval; + + if (!orig_cpus) + return -1; + + context->curr_cpus = strdup(cpus); + if (!context->curr_cpus) + return -1; + + snprintf(buffer, 1024, "%s\n", cpus); + + debug_msg("setting cpus to %s from %s", cpus, context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", buffer); + if (retval < 0) { + free(context->curr_cpus); + context->curr_cpus = NULL; + return -1; + } + + return 0; +} + +/* + * osnoise_restore_cpus - restore the original "osnoise/cpus" + * + * osnoise_set_cpus() saves the original data for the "osnoise/cpus" + * file. This function restore the original config it was previously + * modified. + */ +void osnoise_restore_cpus(struct osnoise_context *context) +{ + int retval; + + if (!context->orig_cpus) + return; + + if (!context->curr_cpus) + return; + + /* nothing to do? */ + if (!strcmp(context->orig_cpus, context->curr_cpus)) + goto out_done; + + debug_msg("restoring cpus to %s", context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", context->orig_cpus); + if (retval < 0) + err_msg("could not restore original osnoise cpus\n"); + +out_done: + free(context->curr_cpus); + context->curr_cpus = NULL; +} + +/* + * osnoise_put_cpus - restore cpus config and cleanup data + */ +void osnoise_put_cpus(struct osnoise_context *context) +{ + osnoise_restore_cpus(context); + + if (!context->orig_cpus) + return; + + free(context->orig_cpus); + context->orig_cpus = NULL; +} + +/* + * osnoise_read_ll_config - read a long long value from a config + * + * returns -1 on error. + */ +static long long osnoise_read_ll_config(char *rel_path) +{ + long long retval; + char *buffer; + + buffer = tracefs_instance_file_read(NULL, rel_path, NULL); + if (!buffer) + return -1; + + /* get_llong_from_str returns -1 on error */ + retval = get_llong_from_str(buffer); + + debug_msg("reading %s returned %lld\n", rel_path, retval); + + free(buffer); + + return retval; +} + +/* + * osnoise_write_ll_config - write a long long value to a config in rel_path + * + * returns -1 on error. + */ +static long long osnoise_write_ll_config(char *rel_path, long long value) +{ + char buffer[BUFF_U64_STR_SIZE]; + long long retval; + + snprintf(buffer, sizeof(buffer), "%lld\n", value); + + debug_msg("setting %s to %lld\n", rel_path, value); + + retval = tracefs_instance_file_write(NULL, rel_path, buffer); + return retval; +} + +/* + * osnoise_get_runtime - return the original "osnoise/runtime_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_runtime(struct osnoise_context *context) +{ + long long runtime_us; + + if (context->runtime_us != OSNOISE_TIME_INIT_VAL) + return context->runtime_us; + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + return context->orig_runtime_us; + + runtime_us = osnoise_read_ll_config("osnoise/runtime_us"); + if (runtime_us < 0) + goto out_err; + + context->orig_runtime_us = runtime_us; + return runtime_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_period - return the original "osnoise/period_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_period(struct osnoise_context *context) +{ + long long period_us; + + if (context->period_us != OSNOISE_TIME_INIT_VAL) + return context->period_us; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_period_us; + + period_us = osnoise_read_ll_config("osnoise/period_us"); + if (period_us < 0) + goto out_err; + + context->orig_period_us = period_us; + return period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +static int __osnoise_write_runtime(struct osnoise_context *context, + unsigned long long runtime) +{ + int retval; + + if (context->orig_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/runtime_us", runtime); + if (retval < 0) + return -1; + + context->runtime_us = runtime; + return 0; +} + +static int __osnoise_write_period(struct osnoise_context *context, + unsigned long long period) +{ + int retval; + + if (context->orig_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/period_us", period); + if (retval < 0) + return -1; + + context->period_us = period; + return 0; +} + +/* + * osnoise_set_runtime_period - set osnoise runtime and period + * + * Osnoise's runtime and period are related as runtime <= period. + * Thus, this function saves the original values, and then tries + * to set the runtime and period if they are != 0. + */ +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period) +{ + unsigned long long curr_runtime_us; + unsigned long long curr_period_us; + int retval; + + if (!period && !runtime) + return 0; + + curr_runtime_us = osnoise_get_runtime(context); + curr_period_us = osnoise_get_period(context); + + /* error getting any value? */ + if (curr_period_us == OSNOISE_TIME_INIT_VAL || curr_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + if (!period) { + if (runtime > curr_period_us) + return -1; + return __osnoise_write_runtime(context, runtime); + } else if (!runtime) { + if (period < curr_runtime_us) + return -1; + return __osnoise_write_period(context, period); + } + + if (runtime > curr_period_us) { + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + } else { + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + } + + return 0; +} + +/* + * osnoise_restore_runtime_period - restore the original runtime and period + */ +void osnoise_restore_runtime_period(struct osnoise_context *context) +{ + unsigned long long orig_runtime = context->orig_runtime_us; + unsigned long long orig_period = context->orig_period_us; + unsigned long long curr_runtime = context->runtime_us; + unsigned long long curr_period = context->period_us; + int retval; + + if ((orig_runtime == OSNOISE_TIME_INIT_VAL) && (orig_period == OSNOISE_TIME_INIT_VAL)) + return; + + if ((orig_period == curr_period) && (orig_runtime == curr_runtime)) + goto out_done; + + retval = osnoise_set_runtime_period(context, orig_runtime, orig_period); + if (retval) + err_msg("Could not restore original osnoise runtime/period\n"); + +out_done: + context->runtime_us = OSNOISE_TIME_INIT_VAL; + context->period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_runtime_period - restore original values and cleanup data + */ +void osnoise_put_runtime_period(struct osnoise_context *context) +{ + osnoise_restore_runtime_period(context); + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + context->orig_runtime_us = OSNOISE_TIME_INIT_VAL; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + context->orig_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_timerlat_period_us - read and save the original "timerlat_period_us" + */ +static long long +osnoise_get_timerlat_period_us(struct osnoise_context *context) +{ + long long timerlat_period_us; + + if (context->timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->timerlat_period_us; + + if (context->orig_timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_timerlat_period_us; + + timerlat_period_us = osnoise_read_ll_config("osnoise/timerlat_period_us"); + if (timerlat_period_us < 0) + goto out_err; + + context->orig_timerlat_period_us = timerlat_period_us; + return timerlat_period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_set_timerlat_period_us - set "timerlat_period_us" + */ +int osnoise_set_timerlat_period_us(struct osnoise_context *context, long long timerlat_period_us) +{ + long long curr_timerlat_period_us = osnoise_get_timerlat_period_us(context); + int retval; + + if (curr_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", timerlat_period_us); + if (retval < 0) + return -1; + + context->timerlat_period_us = timerlat_period_us; + + return 0; +} + +/* + * osnoise_restore_timerlat_period_us - restore "timerlat_period_us" + */ +void osnoise_restore_timerlat_period_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + if (context->orig_timerlat_period_us == context->timerlat_period_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", context->orig_timerlat_period_us); + if (retval < 0) + err_msg("Could not restore original osnoise timerlat_period_us\n"); + +out_done: + context->timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_timerlat_period_us - restore original values and cleanup data + */ +void osnoise_put_timerlat_period_us(struct osnoise_context *context) +{ + osnoise_restore_timerlat_period_us(context); + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + context->orig_timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_stop_us - read and save the original "stop_tracing_us" + */ +static long long +osnoise_get_stop_us(struct osnoise_context *context) +{ + long long stop_us; + + if (context->stop_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_us; + + if (context->orig_stop_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_us; + + stop_us = osnoise_read_ll_config("osnoise/stop_tracing_us"); + if (stop_us < 0) + goto out_err; + + context->orig_stop_us = stop_us; + return stop_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_us - set "stop_tracing_us" + */ +int osnoise_set_stop_us(struct osnoise_context *context, long long stop_us) +{ + long long curr_stop_us = osnoise_get_stop_us(context); + int retval; + + if (curr_stop_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", stop_us); + if (retval < 0) + return -1; + + context->stop_us = stop_us; + + return 0; +} + +/* + * osnoise_restore_stop_us - restore the original "stop_tracing_us" + */ +void osnoise_restore_stop_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_us == context->stop_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", context->orig_stop_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_us\n"); + +out_done: + context->stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_us - restore original values and cleanup data + */ +void osnoise_put_stop_us(struct osnoise_context *context) +{ + osnoise_restore_stop_us(context); + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_stop_total_us - read and save the original "stop_tracing_total_us" + */ +static long long +osnoise_get_stop_total_us(struct osnoise_context *context) +{ + long long stop_total_us; + + if (context->stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_total_us; + + if (context->orig_stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_total_us; + + stop_total_us = osnoise_read_ll_config("osnoise/stop_tracing_total_us"); + if (stop_total_us < 0) + goto out_err; + + context->orig_stop_total_us = stop_total_us; + return stop_total_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_total_us - set "stop_tracing_total_us" + */ +int osnoise_set_stop_total_us(struct osnoise_context *context, long long stop_total_us) +{ + long long curr_stop_total_us = osnoise_get_stop_total_us(context); + int retval; + + if (curr_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", stop_total_us); + if (retval < 0) + return -1; + + context->stop_total_us = stop_total_us; + + return 0; +} + +/* + * osnoise_restore_stop_total_us - restore the original "stop_tracing_total_us" + */ +void osnoise_restore_stop_total_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_total_us == context->stop_total_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", + context->orig_stop_total_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_total_us\n"); + +out_done: + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_total_us - restore original values and cleanup data + */ +void osnoise_put_stop_total_us(struct osnoise_context *context) +{ + osnoise_restore_stop_total_us(context); + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_print_stack - read and save the original "print_stack" + */ +static long long +osnoise_get_print_stack(struct osnoise_context *context) +{ + long long print_stack; + + if (context->print_stack != OSNOISE_OPTION_INIT_VAL) + return context->print_stack; + + if (context->orig_print_stack != OSNOISE_OPTION_INIT_VAL) + return context->orig_print_stack; + + print_stack = osnoise_read_ll_config("osnoise/print_stack"); + if (print_stack < 0) + goto out_err; + + context->orig_print_stack = print_stack; + return print_stack; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_print_stack - set "print_stack" + */ +int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack) +{ + long long curr_print_stack = osnoise_get_print_stack(context); + int retval; + + if (curr_print_stack == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/print_stack", print_stack); + if (retval < 0) + return -1; + + context->print_stack = print_stack; + + return 0; +} + +/* + * osnoise_restore_print_stack - restore the original "print_stack" + */ +void osnoise_restore_print_stack(struct osnoise_context *context) +{ + int retval; + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_print_stack == context->print_stack) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/print_stack", context->orig_print_stack); + if (retval < 0) + err_msg("Could not restore original osnoise print_stack\n"); + +out_done: + context->print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_print_stack - restore original values and cleanup data + */ +void osnoise_put_print_stack(struct osnoise_context *context) +{ + osnoise_restore_print_stack(context); + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * enable_osnoise - enable osnoise tracer in the trace_instance + */ +int enable_osnoise(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "osnoise"); +} + +/* + * enable_timerlat - enable timerlat tracer in the trace_instance + */ +int enable_timerlat(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "timerlat"); +} + +enum { + FLAG_CONTEXT_NEWLY_CREATED = (1 << 0), + FLAG_CONTEXT_DELETED = (1 << 1), +}; + +/* + * osnoise_get_context - increase the usage of a context and return it + */ +int osnoise_get_context(struct osnoise_context *context) +{ + int ret; + + if (context->flags & FLAG_CONTEXT_DELETED) { + ret = -1; + } else { + context->ref++; + ret = 0; + } + + return ret; +} + +/* + * osnoise_context_alloc - alloc an osnoise_context + * + * The osnoise context contains the information of the "osnoise/" configs. + * It is used to set and restore the config. + */ +struct osnoise_context *osnoise_context_alloc(void) +{ + struct osnoise_context *context; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; + context->stop_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; + context->print_stack = OSNOISE_OPTION_INIT_VAL; + + osnoise_get_context(context); + + return context; +} + +/* + * osnoise_put_context - put the osnoise_put_context + * + * If there is no other user for the context, the original data + * is restored. + */ +void osnoise_put_context(struct osnoise_context *context) +{ + if (--context->ref < 1) + context->flags |= FLAG_CONTEXT_DELETED; + + if (!(context->flags & FLAG_CONTEXT_DELETED)) + return; + + osnoise_put_cpus(context); + osnoise_put_runtime_period(context); + osnoise_put_stop_us(context); + osnoise_put_stop_total_us(context); + osnoise_put_timerlat_period_us(context); + osnoise_put_print_stack(context); + + free(context); +} + +/* + * osnoise_destroy_tool - disable trace, restore configs and free data + */ +void osnoise_destroy_tool(struct osnoise_tool *top) +{ + if (!top) + return; + + trace_instance_destroy(&top->trace); + + if (top->context) + osnoise_put_context(top->context); + + free(top); +} + +/* + * osnoise_init_tool - init an osnoise tool + * + * It allocs data, create a context to store data and + * creates a new trace instance for the tool. + */ +struct osnoise_tool *osnoise_init_tool(char *tool_name) +{ + struct osnoise_tool *top; + int retval; + + top = calloc(1, sizeof(*top)); + if (!top) + return NULL; + + top->context = osnoise_context_alloc(); + if (!top->context) + goto out_err; + + retval = trace_instance_init(&top->trace, tool_name); + if (retval) + goto out_err; + + return top; +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +/* + * osnoise_init_trace_tool - init a tracer instance to trace osnoise events + */ +struct osnoise_tool *osnoise_init_trace_tool(char *tracer) +{ + struct osnoise_tool *trace; + int retval; + + trace = osnoise_init_tool("osnoise_trace"); + if (!trace) + return NULL; + + retval = tracefs_event_enable(trace->trace.inst, "osnoise", NULL); + if (retval < 0 && !errno) { + err_msg("Could not find osnoise events\n"); + goto out_err; + } + + retval = enable_tracer_by_name(trace->trace.inst, tracer); + if (retval) { + err_msg("Could not enable %s tracer for tracing\n", tracer); + goto out_err; + } + + return trace; +out_err: + osnoise_destroy_tool(trace); + return NULL; +} + +static void osnoise_usage(void) +{ + int i; + + static const char *msg[] = { + "", + "osnoise version " VERSION, + "", + " usage: [rtla] osnoise [MODE] ...", + "", + " modes:", + " top - prints the summary from osnoise tracer", + " hist - prints a histogram of osnoise samples", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +int osnoise_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if osnoise was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + osnoise_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + osnoise_usage(); + exit(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + osnoise_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + osnoise_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + osnoise_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + osnoise_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h new file mode 100644 index 000000000000..9e4b2e2a4559 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.h @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "trace.h" + +/* + * osnoise_context - read, store, write, restore osnoise configs. + */ +struct osnoise_context { + int flags; + int ref; + + char *curr_cpus; + char *orig_cpus; + + /* 0 as init value */ + unsigned long long orig_runtime_us; + unsigned long long runtime_us; + + /* 0 as init value */ + unsigned long long orig_period_us; + unsigned long long period_us; + + /* 0 as init value */ + long long orig_timerlat_period_us; + long long timerlat_period_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_us; + long long stop_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_total_us; + long long stop_total_us; + + /* -1 as init value because 0 is disabled */ + long long orig_print_stack; + long long print_stack; +}; + +/* + * *_INIT_VALs are also invalid values, they are used to + * communicate errors. + */ +#define OSNOISE_OPTION_INIT_VAL (-1) +#define OSNOISE_TIME_INIT_VAL (0) + +struct osnoise_context *osnoise_context_alloc(void); +int osnoise_get_context(struct osnoise_context *context); +void osnoise_put_context(struct osnoise_context *context); + +int osnoise_set_cpus(struct osnoise_context *context, char *cpus); +void osnoise_restore_cpus(struct osnoise_context *context); + +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period); +void osnoise_restore_runtime_period(struct osnoise_context *context); + +int osnoise_set_stop_us(struct osnoise_context *context, + long long stop_us); +void osnoise_restore_stop_us(struct osnoise_context *context); + +int osnoise_set_stop_total_us(struct osnoise_context *context, + long long stop_total_us); +void osnoise_restore_stop_total_us(struct osnoise_context *context); + +int osnoise_set_timerlat_period_us(struct osnoise_context *context, + long long timerlat_period_us); +void osnoise_restore_timerlat_period_us(struct osnoise_context *context); + +void osnoise_restore_print_stack(struct osnoise_context *context); +int osnoise_set_print_stack(struct osnoise_context *context, + long long print_stack); + +/* + * osnoise_tool - osnoise based tool definition. + */ +struct osnoise_tool { + struct trace_instance trace; + struct osnoise_context *context; + void *data; + void *params; + time_t start_time; +}; + +void osnoise_destroy_tool(struct osnoise_tool *top); +struct osnoise_tool *osnoise_init_tool(char *tool_name); +struct osnoise_tool *osnoise_init_trace_tool(char *tracer); + +int osnoise_hist_main(int argc, char *argv[]); +int osnoise_top_main(int argc, char **argv); +int osnoise_main(int argc, char **argv); diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c new file mode 100644 index 000000000000..52c053cc1789 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -0,0 +1,800 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" + +struct osnoise_hist_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + unsigned long long period; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int set_sched; + int output_divisor; + struct sched_attr sched_param; + + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct osnoise_hist_cpu { + int *samples; + int count; + + unsigned long long min_sample; + unsigned long long sum_sample; + unsigned long long max_sample; + +}; + +struct osnoise_hist_data { + struct tracefs_hist *trace_hist; + struct osnoise_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * osnoise_free_histogram - free runtime data + */ +static void +osnoise_free_histogram(struct osnoise_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].samples) + free(data->hist[cpu].samples); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_hist_data +*osnoise_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct osnoise_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].samples = calloc(1, sizeof(*data->hist->samples) * (entries + 1)); + if (!data->hist[cpu].samples) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) + data->hist[cpu].min_sample = ~0; + + return data; + +cleanup: + osnoise_free_histogram(data); + return NULL; +} + +static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, + unsigned long long duration, int count) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + duration = duration / params->output_divisor; + + if (data->bucket_size) + bucket = duration / data->bucket_size; + + hist = data->hist[cpu].samples; + data->hist[cpu].count += count; + update_min(&data->hist[cpu].min_sample, &duration); + update_sum(&data->hist[cpu].sum_sample, &duration); + update_max(&data->hist[cpu].max_sample, &duration); + + if (bucket < entries) + hist[bucket] += count; + else + hist[entries] += count; +} + +/* + * osnoise_destroy_trace_hist - disable events used to collect histogram + */ +static void osnoise_destroy_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + tracefs_hist_destroy(tool->trace.inst, data->trace_hist); +} + +/* + * osnoise_init_trace_hist - enable events used to collect histogram + */ +static int osnoise_init_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + int bucket_size; + char buff[128]; + int retval = 0; + + /* + * Set the size of the bucket. + */ + bucket_size = params->output_divisor * params->bucket_size; + snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size); + + data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold", + buff, TRACEFS_HIST_KEY_NORMAL); + if (!data->trace_hist) + return 1; + + retval = tracefs_hist_add_key(data->trace_hist, "cpu", 0); + if (retval) + goto out_err; + + retval = tracefs_hist_start(tool->trace.inst, data->trace_hist); + if (retval) + goto out_err; + + return 0; + +out_err: + osnoise_destroy_trace_hist(tool); + return 1; +} + +/* + * osnoise_read_trace_hist - parse histogram file and file osnoise histogram + */ +static void osnoise_read_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + long long cpu, counter, duration; + char *content, *position; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + + content = tracefs_event_file_read(tool->trace.inst, "osnoise", + "sample_threshold", + "hist", NULL); + if (!content) + return; + + position = content; + while (true) { + position = strstr(position, "duration: ~"); + if (!position) + break; + position += strlen("duration: ~"); + duration = get_llong_from_str(position); + if (duration == -1) + err_msg("error reading duration from histogram\n"); + + position = strstr(position, "cpu:"); + if (!position) + break; + position += strlen("cpu: "); + cpu = get_llong_from_str(position); + if (cpu == -1) + err_msg("error reading cpu from histogram\n"); + + position = strstr(position, "hitcount:"); + if (!position) + break; + position += strlen("hitcount: "); + counter = get_llong_from_str(position); + if (counter == -1) + err_msg("error reading counter from histogram\n"); + + osnoise_hist_update_multiple(tool, cpu, duration, counter); + } + free(content); +} + +/* + * osnoise_hist_header - print the header of the tracer to the output + */ +static void osnoise_hist_header(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA osnoise histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(s, " CPU-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * osnoise_print_summary - print the summary of the hist data to the output + */ +static void +osnoise_print_summary(struct osnoise_hist_params *params, + struct trace_instance *trace, + struct osnoise_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_sample); + + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + if (data->hist[cpu].count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_sample / data->hist[cpu].count); + else + trace_seq_printf(trace->seq, " - "); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_sample); + + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_print_stats - print data for all CPUs + */ +static void +osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + osnoise_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + total += data->hist[cpu].samples[bucket]; + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]); + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].samples[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + osnoise_print_summary(params, trace, data); +} + +/* + * osnoise_hist_usage - prints osnoise hist usage message + */ +static void osnoise_hist_usage(char *usage) +{ + int i; + + static const char * const msg[] = { + "", + " usage: rtla osnoise hist [-h] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\", + " [-c cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros]", + "", + " -h/--help: print this menu", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -E/--entries N: set the number of entries of the histogram (default 256)", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period: set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct osnoise_hist_params +*osnoise_hist_parse_args(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"bucket-size", required_argument, 0, 'b'}, + {"entries", required_argument, 0, 'E'}, + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {"no-header", no_argument, 0, '0'}, + {"no-summary", no_argument, 0, '1'}, + {"no-index", no_argument, 0, '2'}, + {"with-zeros", no_argument, 0, '3'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:b:d:E:Dhp:P:r:s:S:t::0123", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_hist_usage("Invalid -D duration\n"); + break; + case 'E': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + osnoise_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + osnoise_hist_usage(NULL); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_hist_usage("Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_hist_usage("Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + case '0': /* no header */ + params->no_header = 1; + break; + case '1': /* no summary */ + params->no_summary = 1; + break; + case '2': /* no index */ + params->no_index = 1; + break; + case '3': /* with zeros */ + params->with_zeros = 1; + break; + default: + osnoise_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_index && !params->with_zeros) + osnoise_hist_usage("no-index set and with-zeros not set - it does not make sense"); + + return params; +} + +/* + * osnoise_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_hist - initialize a osnoise hist tool with parameters + */ +static struct osnoise_tool +*osnoise_init_hist(struct osnoise_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_hist"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_hist_set_signals - handles the signal to stop the tool + */ +static void +osnoise_hist_set_signals(struct osnoise_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int osnoise_hist_main(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *tool = NULL; + struct trace_instance *trace; + int return_value = 1; + int retval; + + params = osnoise_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = osnoise_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_destroy; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_destroy; + } + + retval = osnoise_init_trace_hist(tool); + if (retval) + goto out_destroy; + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_hist; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_hist; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + osnoise_hist_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (!tracefs_trace_is_on(trace->inst)) + break; + }; + + osnoise_read_trace_hist(tool); + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + osnoise_free_histogram(tool->data); +out_destroy: + osnoise_destroy_tool(record); + osnoise_destroy_tool(tool); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c new file mode 100644 index 000000000000..7af769b9c0de --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "osnoise.h" +#include "utils.h" + +/* + * osnoise top parameters + */ +struct osnoise_top_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + unsigned long long period; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int quiet; + int set_sched; + struct sched_attr sched_param; +}; + +struct osnoise_top_cpu { + unsigned long long sum_runtime; + unsigned long long sum_noise; + unsigned long long max_noise; + unsigned long long max_sample; + + unsigned long long hw_count; + unsigned long long nmi_count; + unsigned long long irq_count; + unsigned long long softirq_count; + unsigned long long thread_count; + + int sum_cycles; +}; + +struct osnoise_top_data { + struct osnoise_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * osnoise_free_top - free runtime data + */ +static void +osnoise_free_top(struct osnoise_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_top_data *osnoise_alloc_top(int nr_cpus) +{ + struct osnoise_top_data *data; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + return data; + +cleanup: + osnoise_free_top(data); + return NULL; +} + +/* + * osnoise_top_handler - this is the handler for osnoise tracer events + */ +static int +osnoise_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + struct osnoise_tool *tool; + unsigned long long val; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + cpu_data->sum_cycles++; + + tep_get_field_val(s, event, "runtime", record, &val, 1); + update_sum(&cpu_data->sum_runtime, &val); + + tep_get_field_val(s, event, "noise", record, &val, 1); + update_max(&cpu_data->max_noise, &val); + update_sum(&cpu_data->sum_noise, &val); + + tep_get_field_val(s, event, "max_sample", record, &val, 1); + update_max(&cpu_data->max_sample, &val); + + tep_get_field_val(s, event, "hw_count", record, &val, 1); + update_sum(&cpu_data->hw_count, &val); + + tep_get_field_val(s, event, "nmi_count", record, &val, 1); + update_sum(&cpu_data->nmi_count, &val); + + tep_get_field_val(s, event, "irq_count", record, &val, 1); + update_sum(&cpu_data->irq_count, &val); + + tep_get_field_val(s, event, "softirq_count", record, &val, 1); + update_sum(&cpu_data->softirq_count, &val); + + tep_get_field_val(s, event, "thread_count", record, &val, 1); + update_sum(&cpu_data->thread_count, &val); + + return 0; +} + +/* + * osnoise_top_header - print the header of the tool output + */ +static void osnoise_top_header(struct osnoise_tool *top) +{ + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Operating System Noise"); + trace_seq_printf(s, " "); + trace_seq_printf(s, " "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "duration: %9s | time is in us\n", duration); + + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU Period Runtime "); + trace_seq_printf(s, " Noise "); + trace_seq_printf(s, " %% CPU Aval "); + trace_seq_printf(s, " Max Noise Max Single "); + trace_seq_printf(s, " HW NMI IRQ Softirq Thread"); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * osnoise_top_print - prints the output of a given CPU + */ +static void osnoise_top_print(struct osnoise_tool *tool, int cpu) +{ + struct trace_seq *s = tool->trace.seq; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int percentage; + int decimal; + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + if (!cpu_data->sum_runtime) + return; + + percentage = ((cpu_data->sum_runtime - cpu_data->sum_noise) * 10000000) + / cpu_data->sum_runtime; + decimal = percentage % 100000; + percentage = percentage / 100000; + + trace_seq_printf(s, "%3d #%-6d %12llu ", cpu, cpu_data->sum_cycles, cpu_data->sum_runtime); + trace_seq_printf(s, "%12llu ", cpu_data->sum_noise); + trace_seq_printf(s, " %3d.%05d", percentage, decimal); + trace_seq_printf(s, "%12llu %12llu", cpu_data->max_noise, cpu_data->max_sample); + + trace_seq_printf(s, "%12llu ", cpu_data->hw_count); + trace_seq_printf(s, "%12llu ", cpu_data->nmi_count); + trace_seq_printf(s, "%12llu ", cpu_data->irq_count); + trace_seq_printf(s, "%12llu ", cpu_data->softirq_count); + trace_seq_printf(s, "%12llu\n", cpu_data->thread_count); +} + +/* + * osnoise_print_stats - print data for all cpus + */ +static void +osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + osnoise_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !params->monitored_cpus[i]) + continue; + osnoise_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_top_usage - prints osnoise top usage message + */ +void osnoise_top_usage(char *usage) +{ + int i; + + static const char * const msg[] = { + " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\", + " [-c cpu-list] [-P priority]", + "", + " -h/--help: print this menu", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters + */ +struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) +{ + struct osnoise_top_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:d:Dhp:P:qr:s:S:t::", + long_options, &option_index); + + /* Detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_top_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_top_usage("Invalid -D duration\n"); + break; + case 'h': + case '?': + osnoise_top_usage(NULL); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_top_usage("Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_top_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_top_usage("Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + default: + osnoise_top_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("osnoise needs root permission\n"); + exit(EXIT_FAILURE); + } + + return params; +} + +/* + * osnoise_top_apply_config - apply the top configs to the initialized tool + */ +static int +osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_top - initialize a osnoise top tool with parameters + */ +struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_top"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_top(nr_cpus); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise", + osnoise_top_handler, NULL); + + return tool; + +out_err: + osnoise_free_top(tool->data); + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_top_set_signals - handles the signal to stop the tool + */ +static void osnoise_top_set_signals(struct osnoise_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int osnoise_top_main(int argc, char **argv) +{ + struct osnoise_top_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *tool = NULL; + struct trace_instance *trace; + int return_value = 1; + int retval; + + params = osnoise_top_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_top(params); + if (!tool) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = osnoise_top_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_top; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_top; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_top; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_top; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + osnoise_top_set_signals(params); + + do { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + osnoise_print_stats(params, tool); + + if (!tracefs_trace_is_on(trace->inst)) + break; + + } while (!stop_tracing); + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("osnoise hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_top: + osnoise_free_top(tool->data); + osnoise_destroy_tool(record); + osnoise_destroy_tool(tool); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c new file mode 100644 index 000000000000..09bd21b8af81 --- /dev/null +++ b/tools/tracing/rtla/src/rtla.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "osnoise.h" +#include "timerlat.h" + +/* + * rtla_usage - print rtla usage + */ +static void rtla_usage(void) +{ + int i; + + static const char *msg[] = { + "", + "rtla version " VERSION, + "", + " usage: rtla COMMAND ...", + "", + " commands:", + " osnoise - gives information about the operating system noise (osnoise)", + " timerlat - measures the timer irq and thread latency", + "", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * run_command - try to run a rtla tool command + * + * It returns 0 if it fails. The tool's main will generally not + * return as they should call exit(). + */ +int run_command(int argc, char **argv, int start_position) +{ + if (strcmp(argv[start_position], "osnoise") == 0) { + osnoise_main(argc-start_position, &argv[start_position]); + goto ran; + } else if (strcmp(argv[start_position], "timerlat") == 0) { + timerlat_main(argc-start_position, &argv[start_position]); + goto ran; + } + + return 0; +ran: + return 1; +} + +int main(int argc, char *argv[]) +{ + int retval; + + /* is it an alias? */ + retval = run_command(argc, argv, 0); + if (retval) + exit(0); + + if (argc < 2) + goto usage; + + if (strcmp(argv[1], "-h") == 0) { + rtla_usage(); + exit(0); + } else if (strcmp(argv[1], "--help") == 0) { + rtla_usage(); + exit(0); + } + + retval = run_command(argc, argv, 1); + if (retval) + exit(0); + +usage: + rtla_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c new file mode 100644 index 000000000000..97abbf494fee --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]> + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "timerlat.h" + +static void timerlat_usage(void) +{ + int i; + + static const char * const msg[] = { + "", + "timerlat version " VERSION, + "", + " usage: [rtla] timerlat [MODE] ...", + "", + " modes:", + " top - prints the summary from timerlat tracer", + " hist - prints a histogram of timer latencies", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +int timerlat_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if timerlat was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + timerlat_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + timerlat_usage(); + exit(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + timerlat_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + timerlat_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + timerlat_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + timerlat_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h new file mode 100644 index 000000000000..88561bfd14f3 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.h @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +int timerlat_hist_main(int argc, char *argv[]); +int timerlat_top_main(int argc, char *argv[]); +int timerlat_main(int argc, char *argv[]); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c new file mode 100644 index 000000000000..237e1735afa7 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -0,0 +1,821 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" + +struct timerlat_hist_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int set_sched; + struct sched_attr sched_param; + + char no_irq; + char no_thread; + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct timerlat_hist_cpu { + int *irq; + int *thread; + + int irq_count; + int thread_count; + + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; +}; + +struct timerlat_hist_data { + struct timerlat_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * timerlat_free_histogram - free runtime data + */ +static void +timerlat_free_histogram(struct timerlat_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].irq) + free(data->hist[cpu].irq); + + if (data->hist[cpu].thread) + free(data->hist[cpu].thread); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_hist_data +*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct timerlat_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + /* one histogram for IRQ and one for thread, per cpu */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); + if (!data->hist[cpu].irq) + goto cleanup; + data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); + if (!data->hist[cpu].thread) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].min_irq = ~0; + data->hist[cpu].min_thread = ~0; + } + + return data; + +cleanup: + timerlat_free_histogram(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_hist_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + latency = latency / params->output_divisor; + + if (data->bucket_size) + bucket = latency / data->bucket_size; + + if (!thread) { + hist = data->hist[cpu].irq; + data->hist[cpu].irq_count++; + update_min(&data->hist[cpu].min_irq, &latency); + update_sum(&data->hist[cpu].sum_irq, &latency); + update_max(&data->hist[cpu].max_irq, &latency); + } else { + hist = data->hist[cpu].thread; + data->hist[cpu].thread_count++; + update_min(&data->hist[cpu].min_thread, &latency); + update_sum(&data->hist[cpu].sum_thread, &latency); + update_max(&data->hist[cpu].max_thread, &latency); + } + + if (bucket < entries) + hist[bucket]++; + else + hist[entries]++; +} + +/* + * timerlat_hist_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *data) +{ + struct trace_instance *trace = data; + unsigned long long thread, latency; + struct osnoise_tool *tool; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_hist_update(tool, cpu, thread, latency); + + return 0; +} + +/* + * timerlat_hist_header - print the header of the tracer to the output + */ +static void timerlat_hist_header(struct osnoise_tool *tool) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA timerlat histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(s, " IRQ-%03d", cpu); + + if (!params->no_thread) + trace_seq_printf(s, " Thr-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * timerlat_print_summary - print the summary of the hist data to the output + */ +static void +timerlat_print_summary(struct timerlat_hist_params *params, + struct trace_instance *trace, + struct timerlat_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq_count); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread_count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_thread); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + if (data->hist[cpu].irq_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_irq / data->hist[cpu].irq_count); + else + trace_seq_printf(trace->seq, " - "); + } + + if (!params->no_thread) { + if (data->hist[cpu].thread_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_thread / data->hist[cpu].thread_count); + else + trace_seq_printf(trace->seq, " - "); + } + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_thread); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_print_stats - print data for all CPUs + */ +static void +timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool) +{ + struct timerlat_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + timerlat_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + total += data->hist[cpu].irq[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[bucket]); + } + + if (!params->no_thread) { + total += data->hist[cpu].thread[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[bucket]); + } + + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[data->entries]); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + timerlat_print_summary(params, trace, data); +} + +/* + * timerlat_hist_usage - prints timerlat top usage message + */ +static void timerlat_hist_usage(char *usage) +{ + int i; + + char *msg[] = { + "", + " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\", + " [-c cpu-list] [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros]", + "", + " -h/--help: print this menu", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " -D/--debug: print debug info", + " -T/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -n/--nano: display data in nanoseconds", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -E/--entries N: set the number of entries of the histogram (default 256)", + " --no-irq: ignore IRQ latencies", + " --no-thread: ignore thread latencies", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_hist_params +*timerlat_hist_parse_args(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"bucket-size", required_argument, 0, 'b'}, + {"debug", no_argument, 0, 'D'}, + {"entries", required_argument, 0, 'E'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"no-irq", no_argument, 0, '0'}, + {"no-thread", no_argument, 0, '1'}, + {"no-header", no_argument, 0, '2'}, + {"no-summary", no_argument, 0, '3'}, + {"no-index", no_argument, 0, '4'}, + {"with-zeros", no_argument, 0, '5'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:b:d:E:Dhi:np:P:s:t::T:012345", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_hist_usage("Invalid -D duration\n"); + break; + case 'E': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + timerlat_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + timerlat_hist_usage(NULL); + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_hist_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + break; + case '0': /* no irq */ + params->no_irq = 1; + break; + case '1': /* no thread */ + params->no_thread = 1; + break; + case '2': /* no header */ + params->no_header = 1; + break; + case '3': /* no summary */ + params->no_summary = 1; + break; + case '4': /* no index */ + params->no_index = 1; + break; + case '5': /* with zeros */ + params->with_zeros = 1; + break; + default: + timerlat_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_irq && params->no_thread) + timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here"); + + if (params->no_index && !params->with_zeros) + timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense"); + + return params; +} + +/* + * timerlat_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + if (params->print_stack) { + retval = osnoise_set_print_stack(tool->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_hist - initialize a timerlat hist tool with parameters + */ +static struct osnoise_tool +*timerlat_init_hist(struct timerlat_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("timerlat_hist"); + if (!tool) + return NULL; + + tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_hist_handler, tool); + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_hist_set_signals - handles the signal to stop the tool + */ +static void +timerlat_hist_set_signals(struct timerlat_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int timerlat_hist_main(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *tool = NULL; + struct trace_instance *trace; + int return_value = 1; + int retval; + + params = timerlat_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = timerlat_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = timerlat_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_hist; + } + + trace = &tool->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_hist; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_hist; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_hist; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + timerlat_hist_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (!tracefs_trace_is_on(trace->inst)) + break; + }; + + timerlat_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + timerlat_free_histogram(tool->data); + osnoise_destroy_tool(record); + osnoise_destroy_tool(tool); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c new file mode 100644 index 000000000000..d4187f6534ed --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" + +struct timerlat_top_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int quiet; + int set_sched; + struct sched_attr sched_param; +}; + +struct timerlat_top_cpu { + int irq_count; + int thread_count; + + unsigned long long cur_irq; + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long cur_thread; + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; +}; + +struct timerlat_top_data { + struct timerlat_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * timerlat_free_top - free runtime data + */ +static void +timerlat_free_top(struct timerlat_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus) +{ + struct timerlat_top_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->cpu_data[cpu].min_irq = ~0; + data->cpu_data[cpu].min_thread = ~0; + } + + return data; + +cleanup: + timerlat_free_top(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_top_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_top_data *data = tool->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + + if (!thread) { + cpu_data->irq_count++; + cpu_data->cur_irq = latency; + update_min(&cpu_data->min_irq, &latency); + update_sum(&cpu_data->sum_irq, &latency); + update_max(&cpu_data->max_irq, &latency); + } else { + cpu_data->thread_count++; + cpu_data->cur_thread = latency; + update_min(&cpu_data->min_thread, &latency); + update_sum(&cpu_data->sum_thread, &latency); + update_max(&cpu_data->max_thread, &latency); + } +} + +/* + * timerlat_top_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + unsigned long long latency, thread; + struct osnoise_tool *top; + int cpu = record->cpu; + + top = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_top_update(top, cpu, thread, latency); + + return 0; +} + +/* + * timerlat_top_header - print the header of the tool output + */ +static void timerlat_top_header(struct osnoise_tool *top) +{ + struct timerlat_top_params *params = top->params; + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Timer Latency "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration, + params->output_divisor == 1 ? "ns" : "us", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * timerlat_top_print - prints the output of a given CPU + */ +static void timerlat_top_print(struct osnoise_tool *top, int cpu) +{ + + struct timerlat_top_params *params = top->params; + struct timerlat_top_data *data = top->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + int divisor = params->output_divisor; + struct trace_seq *s = top->trace.seq; + + if (divisor == 0) + return; + + /* + * Skip if no data is available: is this cpu offline? + */ + if (!cpu_data->irq_count && !cpu_data->thread_count) + return; + + /* + * Unless trace is being lost, IRQ counter is always the max. + */ + trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + + if (!cpu_data->irq_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - |"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor); + trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor); + } + + if (!cpu_data->thread_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " -\n"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); + trace_seq_printf(s, "%9llu ", + (cpu_data->sum_thread / cpu_data->thread_count) / divisor); + trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor); + } +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * timerlat_print_stats - print data for all cpus + */ +static void +timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + timerlat_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !params->monitored_cpus[i]) + continue; + timerlat_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_top_usage - prints timerlat top usage message + */ +static void timerlat_top_usage(char *usage) +{ + int i; + + static const char *const msg[] = { + "", + " usage: rtla timerlat [top] [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\", + " [-c cpu-list] [-P priority]", + "", + " -h/--help: print this menu", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -n/--nano: display data in nanoseconds", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_top_params +*timerlat_top_parse_args(int argc, char **argv) +{ + struct timerlat_top_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:d:Dhi:np:P:qs:t::T:", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_top_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_top_usage("Invalid -D duration\n"); + break; + case 'h': + case '?': + timerlat_top_usage(NULL); + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_top_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_top_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + break; + default: + timerlat_top_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + return params; +} + +/* + * timerlat_top_apply_config - apply the top configs to the initialized tool + */ +static int +timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(top->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(top->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(top->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + + if (params->print_stack) { + retval = osnoise_set_print_stack(top->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_top - initialize a timerlat top tool with parameters + */ +static struct osnoise_tool +*timerlat_init_top(struct timerlat_top_params *params) +{ + struct osnoise_tool *top; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + top = osnoise_init_tool("timerlat_top"); + if (!top) + return NULL; + + top->data = timerlat_alloc_top(nr_cpus); + if (!top->data) + goto out_err; + + top->params = params; + + tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat", + timerlat_top_handler, top); + + return top; + +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_top_set_signals - handles the signal to stop the tool + */ +static void +timerlat_top_set_signals(struct timerlat_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int timerlat_top_main(int argc, char *argv[]) +{ + struct timerlat_top_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *top = NULL; + struct trace_instance *trace; + int return_value = 1; + int retval; + + params = timerlat_top_parse_args(argc, argv); + if (!params) + exit(1); + + top = timerlat_init_top(params); + if (!top) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = timerlat_top_apply_config(top, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_top; + } + + trace = &top->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_top; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_top; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_top; + } + trace_instance_start(&record->trace); + } + + top->start_time = time(NULL); + timerlat_top_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + timerlat_print_stats(params, top); + + if (!tracefs_trace_is_on(trace->inst)) + break; + + }; + + timerlat_print_stats(params, top); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_top: + timerlat_free_top(top->data); + osnoise_destroy_tool(record); + osnoise_destroy_tool(top); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c new file mode 100644 index 000000000000..83de259abcc1 --- /dev/null +++ b/tools/tracing/rtla/src/trace.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/sendfile.h> +#include <tracefs.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include "trace.h" +#include "utils.h" + +/* + * enable_tracer_by_name - enable a tracer on the given instance + */ +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name) +{ + enum tracefs_tracers tracer; + int retval; + + tracer = TRACEFS_TRACER_CUSTOM; + + debug_msg("Enabling %s tracer\n", tracer_name); + + retval = tracefs_tracer_set(inst, tracer, tracer_name); + if (retval < 0) { + if (errno == ENODEV) + err_msg("Tracer %s not found!\n", tracer_name); + + err_msg("Failed to enable the %s tracer\n", tracer_name); + return -1; + } + + return 0; +} + +/* + * disable_tracer - set nop tracer to the insta + */ +void disable_tracer(struct tracefs_instance *inst) +{ + enum tracefs_tracers t = TRACEFS_TRACER_NOP; + int retval; + + retval = tracefs_tracer_set(inst, t); + if (retval < 0) + err_msg("Oops, error disabling tracer\n"); +} + +/* + * create_instance - create a trace instance with *instance_name + */ +struct tracefs_instance *create_instance(char *instance_name) +{ + return tracefs_instance_create(instance_name); +} + +/* + * destroy_instance - remove a trace instance and free the data + */ +void destroy_instance(struct tracefs_instance *inst) +{ + tracefs_instance_destroy(inst); + tracefs_instance_free(inst); +} + +/* + * save_trace_to_file - save the trace output of the instance to the file + */ +int save_trace_to_file(struct tracefs_instance *inst, const char *filename) +{ + const char *file = "trace"; + mode_t mode = 0644; + char buffer[4096]; + int out_fd, in_fd; + int retval = -1; + + in_fd = tracefs_instance_file_open(inst, file, O_RDONLY); + if (in_fd < 0) { + err_msg("Failed to open trace file\n"); + return -1; + } + + out_fd = creat(filename, mode); + if (out_fd < 0) { + err_msg("Failed to create output file %s\n", filename); + goto out_close_in; + } + + do { + retval = read(in_fd, buffer, sizeof(buffer)); + if (retval <= 0) + goto out_close; + + retval = write(out_fd, buffer, retval); + if (retval < 0) + goto out_close; + } while (retval > 0); + + retval = 0; +out_close: + close(out_fd); +out_close_in: + close(in_fd); + return retval; +} + +/* + * collect_registered_events - call the existing callback function for the event + * + * If an event has a registered callback function, call it. + * Otherwise, ignore the event. + */ +int +collect_registered_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + struct trace_seq *s = trace->seq; + + if (!event->handler) + return 0; + + event->handler(s, record, event, context); + + return 0; +} + +/* + * trace_instance_destroy - destroy and free a rtla trace instance + */ +void trace_instance_destroy(struct trace_instance *trace) +{ + if (trace->inst) { + disable_tracer(trace->inst); + destroy_instance(trace->inst); + } + + if (trace->seq) + free(trace->seq); + + if (trace->tep) + tep_free(trace->tep); +} + +/* + * trace_instance_init - create an rtla trace instance + * + * It is more than the tracefs instance, as it contains other + * things required for the tracing, such as the local events and + * a seq file. + * + * Note that the trace instance is returned disabled. This allows + * the tool to apply some other configs, like setting priority + * to the kernel threads, before starting generating trace entries. + */ +int trace_instance_init(struct trace_instance *trace, char *tool_name) +{ + trace->seq = calloc(1, sizeof(*trace->seq)); + if (!trace->seq) + goto out_err; + + trace_seq_init(trace->seq); + + trace->inst = create_instance(tool_name); + if (!trace->inst) + goto out_err; + + trace->tep = tracefs_local_events(NULL); + if (!trace->tep) + goto out_err; + + /* + * Let the main enable the record after setting some other + * things such as the priority of the tracer's threads. + */ + tracefs_trace_off(trace->inst); + + return 0; + +out_err: + trace_instance_destroy(trace); + return 1; +} + +/* + * trace_instance_start - start tracing a given rtla instance + */ +int trace_instance_start(struct trace_instance *trace) +{ + return tracefs_trace_on(trace->inst); +} diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h new file mode 100644 index 000000000000..0ea1df0ad9a7 --- /dev/null +++ b/tools/tracing/rtla/src/trace.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <tracefs.h> +#include <stddef.h> + +struct trace_instance { + struct tracefs_instance *inst; + struct tep_handle *tep; + struct trace_seq *seq; +}; + +int trace_instance_init(struct trace_instance *trace, char *tool_name); +int trace_instance_start(struct trace_instance *trace); +void trace_instance_destroy(struct trace_instance *trace); + +struct trace_seq *get_trace_seq(void); +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name); +void disable_tracer(struct tracefs_instance *inst); + +int enable_osnoise(struct trace_instance *trace); +int enable_timerlat(struct trace_instance *trace); + +struct tracefs_instance *create_instance(char *instance_name); +void destroy_instance(struct tracefs_instance *inst); + +int save_trace_to_file(struct tracefs_instance *inst, const char *filename); +int collect_registered_events(struct tep_event *tep, struct tep_record *record, + int cpu, void *context); diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c new file mode 100644 index 000000000000..ffaf8ec84001 --- /dev/null +++ b/tools/tracing/rtla/src/utils.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]> + */ + +#include <proc/readproc.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <sched.h> +#include <stdio.h> + +#include "utils.h" + +#define MAX_MSG_LENGTH 1024 +int config_debug; + +/* + * err_msg - print an error message to the stderr + */ +void err_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * debug_msg - print a debug message to stderr if debug is set + */ +void debug_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + if (!config_debug) + return; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * get_llong_from_str - get a long long int from a string + */ +long long get_llong_from_str(char *start) +{ + long long value; + char *end; + + errno = 0; + value = strtoll(start, &end, 10); + if (errno || start == end) + return -1; + + return value; +} + +/* + * get_duration - fill output with a human readable duration since start_time + */ +void get_duration(time_t start_time, char *output, int output_size) +{ + time_t now = time(NULL); + struct tm *tm_info; + time_t duration; + + duration = difftime(now, start_time); + tm_info = gmtime(&duration); + + snprintf(output, output_size, "%3d %02d:%02d:%02d", + tm_info->tm_yday, + tm_info->tm_hour, + tm_info->tm_min, + tm_info->tm_sec); +} + +/* + * parse_cpu_list - parse a cpu_list filling a char vector with cpus set + * + * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char + * in the monitored_cpus. + * + * XXX: convert to a bitmask. + */ +int parse_cpu_list(char *cpu_list, char **monitored_cpus) +{ + char *mon_cpus; + const char *p; + int end_cpu; + int nr_cpus; + int cpu; + int i; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + mon_cpus = malloc(nr_cpus * sizeof(char)); + memset(mon_cpus, 0, (nr_cpus * sizeof(char))); + + for (p = cpu_list; *p; ) { + cpu = atoi(p); + if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) + goto err; + + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + end_cpu = atoi(p); + if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) + goto err; + while (isdigit(*p)) + p++; + } else + end_cpu = cpu; + + if (cpu == end_cpu) { + debug_msg("cpu_list: adding cpu %d\n", cpu); + mon_cpus[cpu] = 1; + } else { + for (i = cpu; i <= end_cpu; i++) { + debug_msg("cpu_list: adding cpu %d\n", i); + mon_cpus[i] = 1; + } + } + + if (*p == ',') + p++; + } + + *monitored_cpus = mon_cpus; + + return 0; + +err: + debug_msg("Error parsing the cpu list %s", cpu_list); + return 1; +} + +/* + * parse_duration - parse duration with s/m/h/d suffix converting it to seconds + */ +long parse_seconds_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + switch (*end) { + case 's': + case 'S': + break; + case 'm': + case 'M': + t *= 60; + break; + case 'h': + case 'H': + t *= 60 * 60; + break; + + case 'd': + case 'D': + t *= 24 * 60 * 60; + break; + } + } + + return t; +} + +/* + * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds + */ +long parse_ns_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + if (!strncmp(end, "ns", 2)) { + return t; + } else if (!strncmp(end, "us", 2)) { + t *= 1000; + return t; + } else if (!strncmp(end, "ms", 2)) { + t *= 1000 * 1000; + return t; + } else if (!strncmp(end, "s", 1)) { + t *= 1000 * 1000 * 1000; + return t; + } + return -1; + } + + return t; +} + +/* + * This is a set of helper functions to use SCHED_DEADLINE. + */ +#ifdef __x86_64__ +# define __NR_sched_setattr 314 +# define __NR_sched_getattr 315 +#elif __i386__ +# define __NR_sched_setattr 351 +# define __NR_sched_getattr 352 +#elif __arm__ +# define __NR_sched_setattr 380 +# define __NR_sched_getattr 381 +#elif __aarch64__ +# define __NR_sched_setattr 274 +# define __NR_sched_getattr 275 +#elif __powerpc__ +# define __NR_sched_setattr 355 +# define __NR_sched_getattr 356 +#elif __s390x__ +# define __NR_sched_setattr 345 +# define __NR_sched_getattr 346 +#endif + +#define SCHED_DEADLINE 6 + +static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, + unsigned int flags) { + return syscall(__NR_sched_setattr, pid, attr, flags); +} + +static inline int sched_getattr(pid_t pid, struct sched_attr *attr, + unsigned int size, unsigned int flags) +{ + return syscall(__NR_sched_getattr, pid, attr, size, flags); +} + +int __set_sched_attr(int pid, struct sched_attr *attr) +{ + int flags = 0; + int retval; + + retval = sched_setattr(pid, attr, flags); + if (retval < 0) { + err_msg("boost_with_deadline failed to boost pid %d: %s\n", + pid, strerror(errno)); + return 1; + } + + return 0; +} +/* + * set_comm_sched_attr - set sched params to threads starting with char *comm + * + * This function uses procps to list the currently running threads and then + * set the sched_attr *attr to the threads that start with char *comm. It is + * mainly used to set the priority to the kernel threads created by the + * tracers. + */ +int set_comm_sched_attr(const char *comm, struct sched_attr *attr) +{ + int flags = PROC_FILLCOM | PROC_FILLSTAT; + PROCTAB *ptp; + proc_t task; + int retval; + + ptp = openproc(flags); + if (!ptp) { + err_msg("error openproc()\n"); + return -ENOENT; + } + + memset(&task, 0, sizeof(task)); + + while (readproc(ptp, &task)) { + retval = strncmp(comm, task.cmd, strlen(comm)); + if (retval) + continue; + retval = __set_sched_attr(task.tid, attr); + if (retval) + goto out_err; + } + + closeproc(ptp); + return 0; + +out_err: + closeproc(ptp); + return 1; +} + +#define INVALID_VAL (~0L) +static long get_long_ns_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = parse_ns_duration(start); + + return val; +} + +static long get_long_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = get_llong_from_str(start); + + return val; +} + +/* + * parse priority in the format: + * SCHED_OTHER: + * o:<prio> + * O:<prio> + * SCHED_RR: + * r:<prio> + * R:<prio> + * SCHED_FIFO: + * f:<prio> + * F:<prio> + * SCHED_DEADLINE: + * d:runtime:period + * D:runtime:period + */ +int parse_prio(char *arg, struct sched_attr *sched_param) +{ + long prio; + long runtime; + long period; + + memset(sched_param, 0, sizeof(*sched_param)); + sched_param->size = sizeof(*sched_param); + + switch (arg[0]) { + case 'd': + case 'D': + /* d:runtime:period */ + if (strlen(arg) < 4) + return -1; + + runtime = get_long_ns_after_colon(arg); + if (runtime == INVALID_VAL) + return -1; + + period = get_long_ns_after_colon(&arg[2]); + if (period == INVALID_VAL) + return -1; + + if (runtime > period) + return -1; + + sched_param->sched_policy = SCHED_DEADLINE; + sched_param->sched_runtime = runtime; + sched_param->sched_deadline = period; + sched_param->sched_period = period; + break; + case 'f': + case 'F': + /* f:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_FIFO)) + return -1; + if (prio > sched_get_priority_max(SCHED_FIFO)) + return -1; + + sched_param->sched_policy = SCHED_FIFO; + sched_param->sched_priority = prio; + break; + case 'r': + case 'R': + /* r:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_RR)) + return -1; + if (prio > sched_get_priority_max(SCHED_RR)) + return -1; + + sched_param->sched_policy = SCHED_RR; + sched_param->sched_priority = prio; + break; + case 'o': + case 'O': + /* o:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_OTHER)) + return -1; + if (prio > sched_get_priority_max(SCHED_OTHER)) + return -1; + + sched_param->sched_policy = SCHED_OTHER; + sched_param->sched_priority = prio; + break; + default: + return -1; + } + return 0; +} diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h new file mode 100644 index 000000000000..9aa962319ca2 --- /dev/null +++ b/tools/tracing/rtla/src/utils.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <time.h> + +/* + * '18446744073709551615\0' + */ +#define BUFF_U64_STR_SIZE 24 + +#define container_of(ptr, type, member)({ \ + const typeof(((type *)0)->member) *__mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)) ; }) + +extern int config_debug; +void debug_msg(const char *fmt, ...); +void err_msg(const char *fmt, ...); + +long parse_seconds_duration(char *val); +void get_duration(time_t start_time, char *output, int output_size); + +int parse_cpu_list(char *cpu_list, char **monitored_cpus); +long long get_llong_from_str(char *start); + +static inline void +update_min(unsigned long long *a, unsigned long long *b) +{ + if (*a > *b) + *a = *b; +} + +static inline void +update_max(unsigned long long *a, unsigned long long *b) +{ + if (*a < *b) + *a = *b; +} + +static inline void +update_sum(unsigned long long *a, unsigned long long *b) +{ + *a += *b; +} + +struct sched_attr { + uint32_t size; + uint32_t sched_policy; + uint64_t sched_flags; + int32_t sched_nice; + uint32_t sched_priority; + uint64_t sched_runtime; + uint64_t sched_deadline; + uint64_t sched_period; +}; + +int parse_prio(char *arg, struct sched_attr *sched_param); +int set_comm_sched_attr(const char *comm, struct sched_attr *attr); diff --git a/tools/virtio/linux/mm_types.h b/tools/virtio/linux/mm_types.h new file mode 100644 index 000000000000..356ba4d496f6 --- /dev/null +++ b/tools/virtio/linux/mm_types.h @@ -0,0 +1,3 @@ +struct folio { + struct page page; +}; diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index cb3f29c09aff..23f142af544a 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -130,6 +130,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features) memset(dev, 0, sizeof *dev); dev->vdev.features = features; INIT_LIST_HEAD(&dev->vdev.vqs); + spin_lock_init(&dev->vdev.vqs_list_lock); dev->buf_size = 1024; dev->buf = malloc(dev->buf_size); assert(dev->buf); |