aboutsummaryrefslogtreecommitdiff
path: root/tools/perf/arch/x86/tests/insn-x86-dat-src.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/arch/x86/tests/insn-x86-dat-src.c')
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-src.c597
1 files changed, 597 insertions, 0 deletions
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index a391464c8dee..f55505c75d51 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -2628,6 +2628,512 @@ int main(void)
asm volatile("vucomish 0x12345678(%rax,%rcx,8), %xmm1");
asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1");
+ /* Key Locker */
+
+ asm volatile("loadiwkey %xmm1, %xmm2");
+ asm volatile("encodekey128 %eax, %edx");
+ asm volatile("encodekey256 %eax, %edx");
+ asm volatile("aesenc128kl 0x77(%rdx), %xmm3");
+ asm volatile("aesenc256kl 0x77(%rdx), %xmm3");
+ asm volatile("aesdec128kl 0x77(%rdx), %xmm3");
+ asm volatile("aesdec256kl 0x77(%rdx), %xmm3");
+ asm volatile("aesencwide128kl 0x77(%rdx)");
+ asm volatile("aesencwide256kl 0x77(%rdx)");
+ asm volatile("aesdecwide128kl 0x77(%rdx)");
+ asm volatile("aesdecwide256kl 0x77(%rdx)");
+
+ /* Remote Atomic Operations */
+
+ asm volatile("aadd %ecx,(%rax)");
+ asm volatile("aadd %edx,(%r8)");
+ asm volatile("aadd %edx,0x12345678(%rax,%rcx,8)");
+ asm volatile("aadd %edx,0x12345678(%r8,%rcx,8)");
+ asm volatile("aadd %rcx,(%rax)");
+ asm volatile("aadd %rdx,(%r8)");
+ asm volatile("aadd %rdx,(0x12345678)");
+ asm volatile("aadd %rdx,0x12345678(%rax,%rcx,8)");
+ asm volatile("aadd %rdx,0x12345678(%r8,%rcx,8)");
+
+ asm volatile("aand %ecx,(%rax)");
+ asm volatile("aand %edx,(%r8)");
+ asm volatile("aand %edx,0x12345678(%rax,%rcx,8)");
+ asm volatile("aand %edx,0x12345678(%r8,%rcx,8)");
+ asm volatile("aand %rcx,(%rax)");
+ asm volatile("aand %rdx,(%r8)");
+ asm volatile("aand %rdx,(0x12345678)");
+ asm volatile("aand %rdx,0x12345678(%rax,%rcx,8)");
+ asm volatile("aand %rdx,0x12345678(%r8,%rcx,8)");
+
+ asm volatile("aor %ecx,(%rax)");
+ asm volatile("aor %edx,(%r8)");
+ asm volatile("aor %edx,0x12345678(%rax,%rcx,8)");
+ asm volatile("aor %edx,0x12345678(%r8,%rcx,8)");
+ asm volatile("aor %rcx,(%rax)");
+ asm volatile("aor %rdx,(%r8)");
+ asm volatile("aor %rdx,(0x12345678)");
+ asm volatile("aor %rdx,0x12345678(%rax,%rcx,8)");
+ asm volatile("aor %rdx,0x12345678(%r8,%rcx,8)");
+
+ asm volatile("axor %ecx,(%rax)");
+ asm volatile("axor %edx,(%r8)");
+ asm volatile("axor %edx,0x12345678(%rax,%rcx,8)");
+ asm volatile("axor %edx,0x12345678(%r8,%rcx,8)");
+ asm volatile("axor %rcx,(%rax)");
+ asm volatile("axor %rdx,(%r8)");
+ asm volatile("axor %rdx,(0x12345678)");
+ asm volatile("axor %rdx,0x12345678(%rax,%rcx,8)");
+ asm volatile("axor %rdx,0x12345678(%r8,%rcx,8)");
+
+ /* VEX CMPxxXADD */
+
+ asm volatile("cmpbexadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpbxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmplexadd %ebx,%ecx,(%r9)");
+ asm volatile("cmplxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpnbexadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpnbxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpnlexadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpnlxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpnoxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpnpxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpnsxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpnzxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpoxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmppxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpsxadd %ebx,%ecx,(%r9)");
+ asm volatile("cmpzxadd %ebx,%ecx,(%r9)");
+
+ /* Pre-fetch */
+
+ asm volatile("prefetch (%rax)");
+ asm volatile("prefetcht0 (%rax)");
+ asm volatile("prefetcht1 (%rax)");
+ asm volatile("prefetcht2 (%rax)");
+ asm volatile("prefetchnta (%rax)");
+ asm volatile("prefetchit0 0x12345678(%rip)");
+ asm volatile("prefetchit1 0x12345678(%rip)");
+
+ /* MSR List */
+
+ asm volatile("rdmsrlist");
+ asm volatile("wrmsrlist");
+
+ /* User Read/Write MSR */
+
+ asm volatile("urdmsr %rdx,%rax");
+ asm volatile("urdmsr %rdx,%r22");
+ asm volatile("urdmsr $0x7f,%r12");
+ asm volatile("uwrmsr %rax,%rdx");
+ asm volatile("uwrmsr %r22,%rdx");
+ asm volatile("uwrmsr %r12,$0x7f");
+
+ /* AVX NE Convert */
+
+ asm volatile("vbcstnebf162ps (%rcx),%xmm6");
+ asm volatile("vbcstnesh2ps (%rcx),%xmm6");
+ asm volatile("vcvtneebf162ps (%rcx),%xmm6");
+ asm volatile("vcvtneeph2ps (%rcx),%xmm6");
+ asm volatile("vcvtneobf162ps (%rcx),%xmm6");
+ asm volatile("vcvtneoph2ps (%rcx),%xmm6");
+ asm volatile("vcvtneps2bf16 %xmm1,%xmm6");
+
+ /* FRED */
+
+ asm volatile("erets"); /* Expecting: erets indirect 0 */
+ asm volatile("eretu"); /* Expecting: eretu indirect 0 */
+
+ /* AMX Complex */
+
+ asm volatile("tcmmimfp16ps %tmm1,%tmm2,%tmm3");
+ asm volatile("tcmmrlfp16ps %tmm1,%tmm2,%tmm3");
+
+ /* AMX FP16 */
+
+ asm volatile("tdpfp16ps %tmm1,%tmm2,%tmm3");
+
+ /* REX2 */
+
+ asm volatile("test $0x5, %r18b");
+ asm volatile("test $0x5, %r18d");
+ asm volatile("test $0x5, %r18");
+ asm volatile("test $0x5, %r18w");
+ asm volatile("imull %eax, %r14d");
+ asm volatile("imull %eax, %r17d");
+ asm volatile("punpckldq (%r18), %mm2");
+ asm volatile("leal (%rax), %r16d");
+ asm volatile("leal (%rax), %r31d");
+ asm volatile("leal (,%r16), %eax");
+ asm volatile("leal (,%r31), %eax");
+ asm volatile("leal (%r16), %eax");
+ asm volatile("leal (%r31), %eax");
+ asm volatile("leaq (%rax), %r15");
+ asm volatile("leaq (%rax), %r16");
+ asm volatile("leaq (%r15), %rax");
+ asm volatile("leaq (%r16), %rax");
+ asm volatile("leaq (,%r15), %rax");
+ asm volatile("leaq (,%r16), %rax");
+ asm volatile("add (%r16), %r8");
+ asm volatile("add (%r16), %r15");
+ asm volatile("mov (,%r9), %r16");
+ asm volatile("mov (,%r14), %r16");
+ asm volatile("sub (%r10), %r31");
+ asm volatile("sub (%r13), %r31");
+ asm volatile("leal 1(%r16, %r21), %eax");
+ asm volatile("leal 1(%r16, %r26), %r31d");
+ asm volatile("leal 129(%r21, %r9), %eax");
+ asm volatile("leal 129(%r26, %r9), %r31d");
+ /*
+ * Have to use .byte for jmpabs because gas does not support the
+ * mnemonic for some reason, but then it also gets the source line wrong
+ * with .byte, so the following is a workaround.
+ */
+ asm volatile(""); /* Expecting: jmp indirect 0 */
+ asm volatile(".byte 0xd5, 0x00, 0xa1, 0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12");
+ asm volatile("pushp %rbx");
+ asm volatile("pushp %r16");
+ asm volatile("pushp %r31");
+ asm volatile("popp %r31");
+ asm volatile("popp %r16");
+ asm volatile("popp %rbx");
+
+ /* APX */
+
+ asm volatile("bextr %r25d,%edx,%r10d");
+ asm volatile("bextr %r25d,0x123(%r31,%rax,4),%edx");
+ asm volatile("bextr %r31,%r15,%r11");
+ asm volatile("bextr %r31,0x123(%r31,%rax,4),%r15");
+ asm volatile("blsi %r25d,%edx");
+ asm volatile("blsi %r31,%r15");
+ asm volatile("blsi 0x123(%r31,%rax,4),%r25d");
+ asm volatile("blsi 0x123(%r31,%rax,4),%r31");
+ asm volatile("blsmsk %r25d,%edx");
+ asm volatile("blsmsk %r31,%r15");
+ asm volatile("blsmsk 0x123(%r31,%rax,4),%r25d");
+ asm volatile("blsmsk 0x123(%r31,%rax,4),%r31");
+ asm volatile("blsr %r25d,%edx");
+ asm volatile("blsr %r31,%r15");
+ asm volatile("blsr 0x123(%r31,%rax,4),%r25d");
+ asm volatile("blsr 0x123(%r31,%rax,4),%r31");
+ asm volatile("bzhi %r25d,%edx,%r10d");
+ asm volatile("bzhi %r25d,0x123(%r31,%rax,4),%edx");
+ asm volatile("bzhi %r31,%r15,%r11");
+ asm volatile("bzhi %r31,0x123(%r31,%rax,4),%r15");
+ asm volatile("cmpbexadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpbexadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpbxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpbxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmplxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmplxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpnbexadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpnbexadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpnbxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpnbxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpnlexadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpnlexadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpnlxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpnlxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpnoxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpnoxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpnpxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpnpxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpnsxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpnsxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpnzxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpnzxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpoxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpoxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmppxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmppxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpsxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpsxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("cmpzxadd %r25d,%edx,0x123(%r31,%rax,4)");
+ asm volatile("cmpzxadd %r31,%r15,0x123(%r31,%rax,4)");
+ asm volatile("crc32q %r31, %r22");
+ asm volatile("crc32q (%r31), %r22");
+ asm volatile("crc32b %r19b, %r17");
+ asm volatile("crc32b %r19b, %r21d");
+ asm volatile("crc32b (%r19),%ebx");
+ asm volatile("crc32l %r31d, %r23d");
+ asm volatile("crc32l (%r31), %r23d");
+ asm volatile("crc32w %r31w, %r21d");
+ asm volatile("crc32w (%r31),%r21d");
+ asm volatile("crc32 %rax, %r18");
+ asm volatile("enqcmd 0x123(%r31d,%eax,4),%r25d");
+ asm volatile("enqcmd 0x123(%r31,%rax,4),%r31");
+ asm volatile("enqcmds 0x123(%r31d,%eax,4),%r25d");
+ asm volatile("enqcmds 0x123(%r31,%rax,4),%r31");
+ asm volatile("invept 0x123(%r31,%rax,4),%r31");
+ asm volatile("invpcid 0x123(%r31,%rax,4),%r31");
+ asm volatile("invvpid 0x123(%r31,%rax,4),%r31");
+ asm volatile("kmovb %k5,%r25d");
+ asm volatile("kmovb %k5,0x123(%r31,%rax,4)");
+ asm volatile("kmovb %r25d,%k5");
+ asm volatile("kmovb 0x123(%r31,%rax,4),%k5");
+ asm volatile("kmovd %k5,%r25d");
+ asm volatile("kmovd %k5,0x123(%r31,%rax,4)");
+ asm volatile("kmovd %r25d,%k5");
+ asm volatile("kmovd 0x123(%r31,%rax,4),%k5");
+ asm volatile("kmovq %k5,%r31");
+ asm volatile("kmovq %k5,0x123(%r31,%rax,4)");
+ asm volatile("kmovq %r31,%k5");
+ asm volatile("kmovq 0x123(%r31,%rax,4),%k5");
+ asm volatile("kmovw %k5,%r25d");
+ asm volatile("kmovw %k5,0x123(%r31,%rax,4)");
+ asm volatile("kmovw %r25d,%k5");
+ asm volatile("kmovw 0x123(%r31,%rax,4),%k5");
+ asm volatile("ldtilecfg 0x123(%r31,%rax,4)");
+ asm volatile("movbe %r18w,%ax");
+ asm volatile("movbe %r15w,%ax");
+ asm volatile("movbe %r18w,0x123(%r16,%rax,4)");
+ asm volatile("movbe %r18w,0x123(%r31,%rax,4)");
+ asm volatile("movbe %r25d,%edx");
+ asm volatile("movbe %r15d,%edx");
+ asm volatile("movbe %r25d,0x123(%r16,%rax,4)");
+ asm volatile("movbe %r31,%r15");
+ asm volatile("movbe %r8,%r15");
+ asm volatile("movbe %r31,0x123(%r16,%rax,4)");
+ asm volatile("movbe %r31,0x123(%r31,%rax,4)");
+ asm volatile("movbe 0x123(%r16,%rax,4),%r31");
+ asm volatile("movbe 0x123(%r31,%rax,4),%r18w");
+ asm volatile("movbe 0x123(%r31,%rax,4),%r25d");
+ asm volatile("movdir64b 0x123(%r31d,%eax,4),%r25d");
+ asm volatile("movdir64b 0x123(%r31,%rax,4),%r31");
+ asm volatile("movdiri %r25d,0x123(%r31,%rax,4)");
+ asm volatile("movdiri %r31,0x123(%r31,%rax,4)");
+ asm volatile("pdep %r25d,%edx,%r10d");
+ asm volatile("pdep %r31,%r15,%r11");
+ asm volatile("pdep 0x123(%r31,%rax,4),%r25d,%edx");
+ asm volatile("pdep 0x123(%r31,%rax,4),%r31,%r15");
+ asm volatile("pext %r25d,%edx,%r10d");
+ asm volatile("pext %r31,%r15,%r11");
+ asm volatile("pext 0x123(%r31,%rax,4),%r25d,%edx");
+ asm volatile("pext 0x123(%r31,%rax,4),%r31,%r15");
+ asm volatile("shlx %r25d,%edx,%r10d");
+ asm volatile("shlx %r25d,0x123(%r31,%rax,4),%edx");
+ asm volatile("shlx %r31,%r15,%r11");
+ asm volatile("shlx %r31,0x123(%r31,%rax,4),%r15");
+ asm volatile("shrx %r25d,%edx,%r10d");
+ asm volatile("shrx %r25d,0x123(%r31,%rax,4),%edx");
+ asm volatile("shrx %r31,%r15,%r11");
+ asm volatile("shrx %r31,0x123(%r31,%rax,4),%r15");
+ asm volatile("sttilecfg 0x123(%r31,%rax,4)");
+ asm volatile("tileloadd 0x123(%r31,%rax,4),%tmm6");
+ asm volatile("tileloaddt1 0x123(%r31,%rax,4),%tmm6");
+ asm volatile("tilestored %tmm6,0x123(%r31,%rax,4)");
+ asm volatile("vbroadcastf128 (%r16),%ymm3");
+ asm volatile("vbroadcasti128 (%r16),%ymm3");
+ asm volatile("vextractf128 $1,%ymm3,(%r16)");
+ asm volatile("vextracti128 $1,%ymm3,(%r16)");
+ asm volatile("vinsertf128 $1,(%r16),%ymm3,%ymm8");
+ asm volatile("vinserti128 $1,(%r16),%ymm3,%ymm8");
+ asm volatile("vroundpd $1,(%r24),%xmm6");
+ asm volatile("vroundps $2,(%r24),%xmm6");
+ asm volatile("vroundsd $3,(%r24),%xmm6,%xmm3");
+ asm volatile("vroundss $4,(%r24),%xmm6,%xmm3");
+ asm volatile("wrssd %r25d,0x123(%r31,%rax,4)");
+ asm volatile("wrssq %r31,0x123(%r31,%rax,4)");
+ asm volatile("wrussd %r25d,0x123(%r31,%rax,4)");
+ asm volatile("wrussq %r31,0x123(%r31,%rax,4)");
+
+ /* APX new data destination */
+
+ asm volatile("adc $0x1234,%ax,%r30w");
+ asm volatile("adc %r15b,%r17b,%r18b");
+ asm volatile("adc %r15d,(%r8),%r18d");
+ asm volatile("adc (%r15,%rax,1),%r16b,%r8b");
+ asm volatile("adc (%r15,%rax,1),%r16w,%r8w");
+ asm volatile("adcl $0x11,(%r19,%rax,4),%r20d");
+ asm volatile("adcx %r15d,%r8d,%r18d");
+ asm volatile("adcx (%r15,%r31,1),%r8");
+ asm volatile("adcx (%r15,%r31,1),%r8d,%r18d");
+ asm volatile("add $0x1234,%ax,%r30w");
+ asm volatile("add $0x12344433,%r15,%r16");
+ asm volatile("add $0x34,%r13b,%r17b");
+ asm volatile("add $0xfffffffff4332211,%rax,%r8");
+ asm volatile("add %r31,%r8,%r16");
+ asm volatile("add %r31,(%r8),%r16");
+ asm volatile("add %r31,(%r8,%r16,8),%r16");
+ asm volatile("add %r31b,%r8b,%r16b");
+ asm volatile("add %r31d,%r8d,%r16d");
+ asm volatile("add %r31w,%r8w,%r16w");
+ asm volatile("add (%r31),%r8,%r16");
+ asm volatile("add 0x9090(%r31,%r16,1),%r8,%r16");
+ asm volatile("addb %r31b,%r8b,%r16b");
+ asm volatile("addl %r31d,%r8d,%r16d");
+ asm volatile("addl $0x11,(%r19,%rax,4),%r20d");
+ asm volatile("addq %r31,%r8,%r16");
+ asm volatile("addq $0x12344433,(%r15,%rcx,4),%r16");
+ asm volatile("addw %r31w,%r8w,%r16w");
+ asm volatile("adox %r15d,%r8d,%r18d");
+ asm volatile("{load} add %r31,%r8,%r16");
+ asm volatile("{store} add %r31,%r8,%r16");
+ asm volatile("adox (%r15,%r31,1),%r8");
+ asm volatile("adox (%r15,%r31,1),%r8d,%r18d");
+ asm volatile("and $0x1234,%ax,%r30w");
+ asm volatile("and %r15b,%r17b,%r18b");
+ asm volatile("and %r15d,(%r8),%r18d");
+ asm volatile("and (%r15,%rax,1),%r16b,%r8b");
+ asm volatile("and (%r15,%rax,1),%r16w,%r8w");
+ asm volatile("andl $0x11,(%r19,%rax,4),%r20d");
+ asm volatile("cmova 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovae 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovb 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovbe 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmove 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovg 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovge 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovl 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovle 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovne 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovno 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovnp 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovns 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovo 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovp 0x90909090(%eax),%edx,%r8d");
+ asm volatile("cmovs 0x90909090(%eax),%edx,%r8d");
+ asm volatile("dec %rax,%r17");
+ asm volatile("decb (%r31,%r12,1),%r8b");
+ asm volatile("imul 0x909(%rax,%r31,8),%rdx,%r25");
+ asm volatile("imul 0x90909(%eax),%edx,%r8d");
+ asm volatile("inc %r31,%r16");
+ asm volatile("inc %r31,%r8");
+ asm volatile("inc %rax,%rbx");
+ asm volatile("neg %rax,%r17");
+ asm volatile("negb (%r31,%r12,1),%r8b");
+ asm volatile("not %rax,%r17");
+ asm volatile("notb (%r31,%r12,1),%r8b");
+ asm volatile("or $0x1234,%ax,%r30w");
+ asm volatile("or %r15b,%r17b,%r18b");
+ asm volatile("or %r15d,(%r8),%r18d");
+ asm volatile("or (%r15,%rax,1),%r16b,%r8b");
+ asm volatile("or (%r15,%rax,1),%r16w,%r8w");
+ asm volatile("orl $0x11,(%r19,%rax,4),%r20d");
+ asm volatile("rcl $0x2,%r12b,%r31b");
+ asm volatile("rcl %cl,%r16b,%r8b");
+ asm volatile("rclb $0x1,(%rax),%r31b");
+ asm volatile("rcll $0x2,(%rax),%r31d");
+ asm volatile("rclw $0x1,(%rax),%r31w");
+ asm volatile("rclw %cl,(%r19,%rax,4),%r31w");
+ asm volatile("rcr $0x2,%r12b,%r31b");
+ asm volatile("rcr %cl,%r16b,%r8b");
+ asm volatile("rcrb $0x1,(%rax),%r31b");
+ asm volatile("rcrl $0x2,(%rax),%r31d");
+ asm volatile("rcrw $0x1,(%rax),%r31w");
+ asm volatile("rcrw %cl,(%r19,%rax,4),%r31w");
+ asm volatile("rol $0x2,%r12b,%r31b");
+ asm volatile("rol %cl,%r16b,%r8b");
+ asm volatile("rolb $0x1,(%rax),%r31b");
+ asm volatile("roll $0x2,(%rax),%r31d");
+ asm volatile("rolw $0x1,(%rax),%r31w");
+ asm volatile("rolw %cl,(%r19,%rax,4),%r31w");
+ asm volatile("ror $0x2,%r12b,%r31b");
+ asm volatile("ror %cl,%r16b,%r8b");
+ asm volatile("rorb $0x1,(%rax),%r31b");
+ asm volatile("rorl $0x2,(%rax),%r31d");
+ asm volatile("rorw $0x1,(%rax),%r31w");
+ asm volatile("rorw %cl,(%r19,%rax,4),%r31w");
+ asm volatile("sar $0x2,%r12b,%r31b");
+ asm volatile("sar %cl,%r16b,%r8b");
+ asm volatile("sarb $0x1,(%rax),%r31b");
+ asm volatile("sarl $0x2,(%rax),%r31d");
+ asm volatile("sarw $0x1,(%rax),%r31w");
+ asm volatile("sarw %cl,(%r19,%rax,4),%r31w");
+ asm volatile("sbb $0x1234,%ax,%r30w");
+ asm volatile("sbb %r15b,%r17b,%r18b");
+ asm volatile("sbb %r15d,(%r8),%r18d");
+ asm volatile("sbb (%r15,%rax,1),%r16b,%r8b");
+ asm volatile("sbb (%r15,%rax,1),%r16w,%r8w");
+ asm volatile("sbbl $0x11,(%r19,%rax,4),%r20d");
+ asm volatile("shl $0x2,%r12b,%r31b");
+ asm volatile("shl $0x2,%r12b,%r31b");
+ asm volatile("shl %cl,%r16b,%r8b");
+ asm volatile("shl %cl,%r16b,%r8b");
+ asm volatile("shlb $0x1,(%rax),%r31b");
+ asm volatile("shlb $0x1,(%rax),%r31b");
+ asm volatile("shld $0x1,%r12,(%rax),%r31");
+ asm volatile("shld $0x2,%r15d,(%rax),%r31d");
+ asm volatile("shld $0x2,%r8w,%r12w,%r31w");
+ asm volatile("shld %cl,%r12,%r16,%r8");
+ asm volatile("shld %cl,%r13w,(%r19,%rax,4),%r31w");
+ asm volatile("shld %cl,%r9w,(%rax),%r31w");
+ asm volatile("shll $0x2,(%rax),%r31d");
+ asm volatile("shll $0x2,(%rax),%r31d");
+ asm volatile("shlw $0x1,(%rax),%r31w");
+ asm volatile("shlw $0x1,(%rax),%r31w");
+ asm volatile("shlw %cl,(%r19,%rax,4),%r31w");
+ asm volatile("shlw %cl,(%r19,%rax,4),%r31w");
+ asm volatile("shr $0x2,%r12b,%r31b");
+ asm volatile("shr %cl,%r16b,%r8b");
+ asm volatile("shrb $0x1,(%rax),%r31b");
+ asm volatile("shrd $0x1,%r12,(%rax),%r31");
+ asm volatile("shrd $0x2,%r15d,(%rax),%r31d");
+ asm volatile("shrd $0x2,%r8w,%r12w,%r31w");
+ asm volatile("shrd %cl,%r12,%r16,%r8");
+ asm volatile("shrd %cl,%r13w,(%r19,%rax,4),%r31w");
+ asm volatile("shrd %cl,%r9w,(%rax),%r31w");
+ asm volatile("shrl $0x2,(%rax),%r31d");
+ asm volatile("shrw $0x1,(%rax),%r31w");
+ asm volatile("shrw %cl,(%r19,%rax,4),%r31w");
+ asm volatile("sub $0x1234,%ax,%r30w");
+ asm volatile("sub %r15b,%r17b,%r18b");
+ asm volatile("sub %r15d,(%r8),%r18d");
+ asm volatile("sub (%r15,%rax,1),%r16b,%r8b");
+ asm volatile("sub (%r15,%rax,1),%r16w,%r8w");
+ asm volatile("subl $0x11,(%r19,%rax,4),%r20d");
+ asm volatile("xor $0x1234,%ax,%r30w");
+ asm volatile("xor %r15b,%r17b,%r18b");
+ asm volatile("xor %r15d,(%r8),%r18d");
+ asm volatile("xor (%r15,%rax,1),%r16b,%r8b");
+ asm volatile("xor (%r15,%rax,1),%r16w,%r8w");
+ asm volatile("xorl $0x11,(%r19,%rax,4),%r20d");
+
+ /* APX suppress status flags */
+
+ asm volatile("{nf} add %bl,%dl,%r8b");
+ asm volatile("{nf} add %dx,%ax,%r9w");
+ asm volatile("{nf} add 0x123(%r8,%rax,4),%bl,%dl");
+ asm volatile("{nf} add 0x123(%r8,%rax,4),%dx,%ax");
+ asm volatile("{nf} or %bl,%dl,%r8b");
+ asm volatile("{nf} or %dx,%ax,%r9w");
+ asm volatile("{nf} or 0x123(%r8,%rax,4),%bl,%dl");
+ asm volatile("{nf} or 0x123(%r8,%rax,4),%dx,%ax");
+ asm volatile("{nf} and %bl,%dl,%r8b");
+ asm volatile("{nf} and %dx,%ax,%r9w");
+ asm volatile("{nf} and 0x123(%r8,%rax,4),%bl,%dl");
+ asm volatile("{nf} and 0x123(%r8,%rax,4),%dx,%ax");
+ asm volatile("{nf} shld $0x7b,%dx,%ax,%r9w");
+ asm volatile("{nf} sub %bl,%dl,%r8b");
+ asm volatile("{nf} sub %dx,%ax,%r9w");
+ asm volatile("{nf} sub 0x123(%r8,%rax,4),%bl,%dl");
+ asm volatile("{nf} sub 0x123(%r8,%rax,4),%dx,%ax");
+ asm volatile("{nf} shrd $0x7b,%dx,%ax,%r9w");
+ asm volatile("{nf} xor %bl,%dl,%r8b");
+ asm volatile("{nf} xor %r31,%r31");
+ asm volatile("{nf} xor 0x123(%r8,%rax,4),%bl,%dl");
+ asm volatile("{nf} xor 0x123(%r8,%rax,4),%dx,%ax");
+ asm volatile("{nf} imul $0xff90,%r9,%r15");
+ asm volatile("{nf} imul $0x7b,%r9,%r15");
+ asm volatile("{nf} xor $0x7b,%bl,%dl");
+ asm volatile("{nf} xor $0x7b,%dx,%ax");
+ asm volatile("{nf} popcnt %r9,%r31");
+ asm volatile("{nf} shld %cl,%dx,%ax,%r9w");
+ asm volatile("{nf} shrd %cl,%dx,%ax,%r9w");
+ asm volatile("{nf} imul %r9,%r31,%r11");
+ asm volatile("{nf} sar $0x7b,%bl,%dl");
+ asm volatile("{nf} sar $0x7b,%dx,%ax");
+ asm volatile("{nf} sar $1,%bl,%dl");
+ asm volatile("{nf} sar $1,%dx,%ax");
+ asm volatile("{nf} sar %cl,%bl,%dl");
+ asm volatile("{nf} sar %cl,%dx,%ax");
+ asm volatile("{nf} andn %r9,%r31,%r11");
+ asm volatile("{nf} blsi %r9,%r31");
+ asm volatile("{nf} tzcnt %r9,%r31");
+ asm volatile("{nf} lzcnt %r9,%r31");
+ asm volatile("{nf} idiv %bl");
+ asm volatile("{nf} idiv %dx");
+ asm volatile("{nf} dec %bl,%dl");
+ asm volatile("{nf} dec %dx,%ax");
+
#else /* #ifdef __x86_64__ */
/* bound r32, mem (same op code as EVEX prefix) */
@@ -4848,6 +5354,97 @@ int main(void)
#endif /* #ifndef __x86_64__ */
+ /* Key Locker */
+
+ asm volatile(" loadiwkey %xmm1, %xmm2");
+ asm volatile(" encodekey128 %eax, %edx");
+ asm volatile(" encodekey256 %eax, %edx");
+ asm volatile(" aesenc128kl 0x77(%edx), %xmm3");
+ asm volatile(" aesenc256kl 0x77(%edx), %xmm3");
+ asm volatile(" aesdec128kl 0x77(%edx), %xmm3");
+ asm volatile(" aesdec256kl 0x77(%edx), %xmm3");
+ asm volatile(" aesencwide128kl 0x77(%edx)");
+ asm volatile(" aesencwide256kl 0x77(%edx)");
+ asm volatile(" aesdecwide128kl 0x77(%edx)");
+ asm volatile(" aesdecwide256kl 0x77(%edx)");
+
+ /* Remote Atomic Operations */
+
+ asm volatile("aadd %ecx,(%eax)");
+ asm volatile("aadd %edx,(0x12345678)");
+ asm volatile("aadd %edx,0x12345678(%eax,%ecx,8)");
+
+ asm volatile("aand %ecx,(%eax)");
+ asm volatile("aand %edx,(0x12345678)");
+ asm volatile("aand %edx,0x12345678(%eax,%ecx,8)");
+
+ asm volatile("aor %ecx,(%eax)");
+ asm volatile("aor %edx,(0x12345678)");
+ asm volatile("aor %edx,0x12345678(%eax,%ecx,8)");
+
+ asm volatile("axor %ecx,(%eax)");
+ asm volatile("axor %edx,(0x12345678)");
+ asm volatile("axor %edx,0x12345678(%eax,%ecx,8)");
+
+ /* AVX NE Convert */
+
+ asm volatile("vbcstnebf162ps (%ecx),%xmm6");
+ asm volatile("vbcstnesh2ps (%ecx),%xmm6");
+ asm volatile("vcvtneebf162ps (%ecx),%xmm6");
+ asm volatile("vcvtneeph2ps (%ecx),%xmm6");
+ asm volatile("vcvtneobf162ps (%ecx),%xmm6");
+ asm volatile("vcvtneoph2ps (%ecx),%xmm6");
+ asm volatile("vcvtneps2bf16 %xmm1,%xmm6");
+
+ /* AVX VNNI INT16 */
+
+ asm volatile("vpdpbssd %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpbssds %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpbsud %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpbsuds %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpbuud %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpbuuds %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpwsud %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpwsuds %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpwusd %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpwusds %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpwuud %xmm1,%xmm2,%xmm3");
+ asm volatile("vpdpwuuds %xmm1,%xmm2,%xmm3");
+
+ /* AVX IFMA */
+
+ asm volatile("vpmadd52huq %xmm1,%xmm2,%xmm3");
+ asm volatile("vpmadd52luq %xmm1,%xmm2,%xmm3");
+
+ /* AVX SHA512 */
+
+ asm volatile("vsha512msg1 %xmm1,%ymm2");
+ asm volatile("vsha512msg2 %ymm1,%ymm2");
+ asm volatile("vsha512rnds2 %xmm1,%ymm2,%ymm3");
+
+ /* AVX SM3 */
+
+ asm volatile("vsm3msg1 %xmm1,%xmm2,%xmm3");
+ asm volatile("vsm3msg2 %xmm1,%xmm2,%xmm3");
+ asm volatile("vsm3rnds2 $0xa1,%xmm1,%xmm2,%xmm3");
+
+ /* AVX SM4 */
+
+ asm volatile("vsm4key4 %xmm1,%xmm2,%xmm3");
+ asm volatile("vsm4rnds4 %xmm1,%xmm2,%xmm3");
+
+ /* Pre-fetch */
+
+ asm volatile("prefetch (%eax)");
+ asm volatile("prefetcht0 (%eax)");
+ asm volatile("prefetcht1 (%eax)");
+ asm volatile("prefetcht2 (%eax)");
+ asm volatile("prefetchnta (%eax)");
+
+ /* Non-serializing write MSR */
+
+ asm volatile("wrmsrns");
+
/* Prediction history reset */
asm volatile("hreset $0");