diff options
Diffstat (limited to 'arch/x86/lib')
| -rw-r--r-- | arch/x86/lib/Makefile | 3 | ||||
| -rw-r--r-- | arch/x86/lib/copy_user_64.S | 8 | ||||
| -rw-r--r-- | arch/x86/lib/csum-wrappers_64.c | 1 | ||||
| -rw-r--r-- | arch/x86/lib/getuser.S | 20 | ||||
| -rw-r--r-- | arch/x86/lib/hweight.S | 77 | ||||
| -rw-r--r-- | arch/x86/lib/insn.c | 18 | ||||
| -rw-r--r-- | arch/x86/lib/kaslr.c | 90 | ||||
| -rw-r--r-- | arch/x86/lib/putuser.S | 10 | ||||
| -rw-r--r-- | arch/x86/lib/usercopy_64.c | 2 | ||||
| -rw-r--r-- | arch/x86/lib/x86-opcode-map.txt | 265 | 
10 files changed, 377 insertions, 117 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 72a576752a7e..34a74131a12c 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -24,8 +24,9 @@ lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o  lib-y += memcpy_$(BITS).o  lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o  lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o +lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o -obj-y += msr.o msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o  ifeq ($(CONFIG_X86_32),y)          obj-y += atomic64_32.o diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 2b0ef26da0bd..bf603ebbfd8e 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -17,11 +17,11 @@  /* Standard copy_to_user with segment limit checking */  ENTRY(_copy_to_user) -	GET_THREAD_INFO(%rax) +	mov PER_CPU_VAR(current_task), %rax  	movq %rdi,%rcx  	addq %rdx,%rcx  	jc bad_to_user -	cmpq TI_addr_limit(%rax),%rcx +	cmpq TASK_addr_limit(%rax),%rcx  	ja bad_to_user  	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\  		      "jmp copy_user_generic_string",		\ @@ -32,11 +32,11 @@ ENDPROC(_copy_to_user)  /* Standard copy_from_user with segment limit checking */  ENTRY(_copy_from_user) -	GET_THREAD_INFO(%rax) +	mov PER_CPU_VAR(current_task), %rax  	movq %rsi,%rcx  	addq %rdx,%rcx  	jc bad_from_user -	cmpq TI_addr_limit(%rax),%rcx +	cmpq TASK_addr_limit(%rax),%rcx  	ja bad_from_user  	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\  		      "jmp copy_user_generic_string",		\ diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 28a6654f0d08..b6fcb9a9ddbc 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -6,6 +6,7 @@   */  #include <asm/checksum.h>  #include <linux/module.h> +#include <linux/uaccess.h>  #include <asm/smap.h>  /** diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 46668cda4ffd..0ef5128c2de8 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -35,8 +35,8 @@  	.text  ENTRY(__get_user_1) -	GET_THREAD_INFO(%_ASM_DX) -	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX +	mov PER_CPU_VAR(current_task), %_ASM_DX +	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX  	jae bad_get_user  	ASM_STAC  1:	movzbl (%_ASM_AX),%edx @@ -48,8 +48,8 @@ ENDPROC(__get_user_1)  ENTRY(__get_user_2)  	add $1,%_ASM_AX  	jc bad_get_user -	GET_THREAD_INFO(%_ASM_DX) -	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX +	mov PER_CPU_VAR(current_task), %_ASM_DX +	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX  	jae bad_get_user  	ASM_STAC  2:	movzwl -1(%_ASM_AX),%edx @@ -61,8 +61,8 @@ ENDPROC(__get_user_2)  ENTRY(__get_user_4)  	add $3,%_ASM_AX  	jc bad_get_user -	GET_THREAD_INFO(%_ASM_DX) -	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX +	mov PER_CPU_VAR(current_task), %_ASM_DX +	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX  	jae bad_get_user  	ASM_STAC  3:	movl -3(%_ASM_AX),%edx @@ -75,8 +75,8 @@ ENTRY(__get_user_8)  #ifdef CONFIG_X86_64  	add $7,%_ASM_AX  	jc bad_get_user -	GET_THREAD_INFO(%_ASM_DX) -	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX +	mov PER_CPU_VAR(current_task), %_ASM_DX +	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX  	jae bad_get_user  	ASM_STAC  4:	movq -7(%_ASM_AX),%rdx @@ -86,8 +86,8 @@ ENTRY(__get_user_8)  #else  	add $7,%_ASM_AX  	jc bad_get_user_8 -	GET_THREAD_INFO(%_ASM_DX) -	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX +	mov PER_CPU_VAR(current_task), %_ASM_DX +	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX  	jae bad_get_user_8  	ASM_STAC  4:	movl -7(%_ASM_AX),%edx diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S new file mode 100644 index 000000000000..02de3d74d2c5 --- /dev/null +++ b/arch/x86/lib/hweight.S @@ -0,0 +1,77 @@ +#include <linux/linkage.h> + +#include <asm/asm.h> + +/* + * unsigned int __sw_hweight32(unsigned int w) + * %rdi: w + */ +ENTRY(__sw_hweight32) + +#ifdef CONFIG_X86_64 +	movl %edi, %eax				# w +#endif +	__ASM_SIZE(push,) %__ASM_REG(dx) +	movl %eax, %edx				# w -> t +	shrl %edx				# t >>= 1 +	andl $0x55555555, %edx			# t &= 0x55555555 +	subl %edx, %eax				# w -= t + +	movl %eax, %edx				# w -> t +	shrl $2, %eax				# w_tmp >>= 2 +	andl $0x33333333, %edx			# t	&= 0x33333333 +	andl $0x33333333, %eax			# w_tmp &= 0x33333333 +	addl %edx, %eax				# w = w_tmp + t + +	movl %eax, %edx				# w -> t +	shrl $4, %edx				# t >>= 4 +	addl %edx, %eax				# w_tmp += t +	andl  $0x0f0f0f0f, %eax			# w_tmp &= 0x0f0f0f0f +	imull $0x01010101, %eax, %eax		# w_tmp *= 0x01010101 +	shrl $24, %eax				# w = w_tmp >> 24 +	__ASM_SIZE(pop,) %__ASM_REG(dx) +	ret +ENDPROC(__sw_hweight32) + +ENTRY(__sw_hweight64) +#ifdef CONFIG_X86_64 +	pushq   %rdx + +	movq    %rdi, %rdx                      # w -> t +	movabsq $0x5555555555555555, %rax +	shrq    %rdx                            # t >>= 1 +	andq    %rdx, %rax                      # t &= 0x5555555555555555 +	movabsq $0x3333333333333333, %rdx +	subq    %rax, %rdi                      # w -= t + +	movq    %rdi, %rax                      # w -> t +	shrq    $2, %rdi                        # w_tmp >>= 2 +	andq    %rdx, %rax                      # t     &= 0x3333333333333333 +	andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333 +	addq    %rdx, %rax                      # w = w_tmp + t + +	movq    %rax, %rdx                      # w -> t +	shrq    $4, %rdx                        # t >>= 4 +	addq    %rdx, %rax                      # w_tmp += t +	movabsq $0x0f0f0f0f0f0f0f0f, %rdx +	andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f +	movabsq $0x0101010101010101, %rdx +	imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101 +	shrq    $56, %rax                       # w = w_tmp >> 56 + +	popq    %rdx +	ret +#else /* CONFIG_X86_32 */ +	/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ +	pushl   %ecx + +	call    __sw_hweight32 +	movl    %eax, %ecx                      # stash away result +	movl    %edx, %eax                      # second part of input +	call    __sw_hweight32 +	addl    %ecx, %eax                      # result + +	popl    %ecx +	ret +#endif +ENDPROC(__sw_hweight64) diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 1a416935bac9..1088eb8f3a5f 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -155,14 +155,24 @@ found:  			/*  			 * In 32-bits mode, if the [7:6] bits (mod bits of  			 * ModRM) on the second byte are not 11b, it is -			 * LDS or LES. +			 * LDS or LES or BOUND.  			 */  			if (X86_MODRM_MOD(b2) != 3)  				goto vex_end;  		}  		insn->vex_prefix.bytes[0] = b;  		insn->vex_prefix.bytes[1] = b2; -		if (inat_is_vex3_prefix(attr)) { +		if (inat_is_evex_prefix(attr)) { +			b2 = peek_nbyte_next(insn_byte_t, insn, 2); +			insn->vex_prefix.bytes[2] = b2; +			b2 = peek_nbyte_next(insn_byte_t, insn, 3); +			insn->vex_prefix.bytes[3] = b2; +			insn->vex_prefix.nbytes = 4; +			insn->next_byte += 4; +			if (insn->x86_64 && X86_VEX_W(b2)) +				/* VEX.W overrides opnd_size */ +				insn->opnd_bytes = 8; +		} else if (inat_is_vex3_prefix(attr)) {  			b2 = peek_nbyte_next(insn_byte_t, insn, 2);  			insn->vex_prefix.bytes[2] = b2;  			insn->vex_prefix.nbytes = 3; @@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn)  		m = insn_vex_m_bits(insn);  		p = insn_vex_p_bits(insn);  		insn->attr = inat_get_avx_attribute(op, m, p); -		if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) +		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || +		    (!inat_accept_vex(insn->attr) && +		     !inat_is_group(insn->attr)))  			insn->attr = 0;	/* This instruction is bad */  		goto end;	/* VEX has only 1 byte for opcode */  	} diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c new file mode 100644 index 000000000000..f7dfeda83e5c --- /dev/null +++ b/arch/x86/lib/kaslr.c @@ -0,0 +1,90 @@ +/* + * Entropy functions used on early boot for KASLR base and memory + * randomization. The base randomization is done in the compressed + * kernel and memory randomization is done early when the regular + * kernel starts. This file is included in the compressed kernel and + * normally linked in the regular. + */ +#include <asm/kaslr.h> +#include <asm/msr.h> +#include <asm/archrandom.h> +#include <asm/e820.h> +#include <asm/io.h> + +/* + * When built for the regular kernel, several functions need to be stubbed out + * or changed to their regular kernel equivalent. + */ +#ifndef KASLR_COMPRESSED_BOOT +#include <asm/cpufeature.h> +#include <asm/setup.h> + +#define debug_putstr(v) early_printk(v) +#define has_cpuflag(f) boot_cpu_has(f) +#define get_boot_seed() kaslr_offset() +#endif + +#define I8254_PORT_CONTROL	0x43 +#define I8254_PORT_COUNTER0	0x40 +#define I8254_CMD_READBACK	0xC0 +#define I8254_SELECT_COUNTER0	0x02 +#define I8254_STATUS_NOTREADY	0x40 +static inline u16 i8254(void) +{ +	u16 status, timer; + +	do { +		outb(I8254_PORT_CONTROL, +		     I8254_CMD_READBACK | I8254_SELECT_COUNTER0); +		status = inb(I8254_PORT_COUNTER0); +		timer  = inb(I8254_PORT_COUNTER0); +		timer |= inb(I8254_PORT_COUNTER0) << 8; +	} while (status & I8254_STATUS_NOTREADY); + +	return timer; +} + +unsigned long kaslr_get_random_long(const char *purpose) +{ +#ifdef CONFIG_X86_64 +	const unsigned long mix_const = 0x5d6008cbf3848dd3UL; +#else +	const unsigned long mix_const = 0x3f39e593UL; +#endif +	unsigned long raw, random = get_boot_seed(); +	bool use_i8254 = true; + +	debug_putstr(purpose); +	debug_putstr(" KASLR using"); + +	if (has_cpuflag(X86_FEATURE_RDRAND)) { +		debug_putstr(" RDRAND"); +		if (rdrand_long(&raw)) { +			random ^= raw; +			use_i8254 = false; +		} +	} + +	if (has_cpuflag(X86_FEATURE_TSC)) { +		debug_putstr(" RDTSC"); +		raw = rdtsc(); + +		random ^= raw; +		use_i8254 = false; +	} + +	if (use_i8254) { +		debug_putstr(" i8254"); +		random ^= i8254(); +	} + +	/* Circular multiply for better bit diffusion */ +	asm("mul %3" +	    : "=a" (random), "=d" (raw) +	    : "a" (random), "rm" (mix_const)); +	random += raw; + +	debug_putstr("...\n"); + +	return random; +} diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index e0817a12d323..c891ece81e5b 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -29,14 +29,14 @@   * as they get called from within inline assembly.   */ -#define ENTER	GET_THREAD_INFO(%_ASM_BX) +#define ENTER	mov PER_CPU_VAR(current_task), %_ASM_BX  #define EXIT	ASM_CLAC ;	\  		ret  .text  ENTRY(__put_user_1)  	ENTER -	cmp TI_addr_limit(%_ASM_BX),%_ASM_CX +	cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX  	jae bad_put_user  	ASM_STAC  1:	movb %al,(%_ASM_CX) @@ -46,7 +46,7 @@ ENDPROC(__put_user_1)  ENTRY(__put_user_2)  	ENTER -	mov TI_addr_limit(%_ASM_BX),%_ASM_BX +	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX  	sub $1,%_ASM_BX  	cmp %_ASM_BX,%_ASM_CX  	jae bad_put_user @@ -58,7 +58,7 @@ ENDPROC(__put_user_2)  ENTRY(__put_user_4)  	ENTER -	mov TI_addr_limit(%_ASM_BX),%_ASM_BX +	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX  	sub $3,%_ASM_BX  	cmp %_ASM_BX,%_ASM_CX  	jae bad_put_user @@ -70,7 +70,7 @@ ENDPROC(__put_user_4)  ENTRY(__put_user_8)  	ENTER -	mov TI_addr_limit(%_ASM_BX),%_ASM_BX +	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX  	sub $7,%_ASM_BX  	cmp %_ASM_BX,%_ASM_CX  	jae bad_put_user diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 0a42327a59d7..9f760cdcaf40 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -6,7 +6,7 @@   * Copyright 2002 Andi Kleen <[email protected]>   */  #include <linux/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h>  /*   * Zero Userspace diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index d388de72eaca..767be7c76034 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -13,12 +13,17 @@  # opcode: escape # escaped-name  # EndTable  # +# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix +# mnemonics that begin with lowercase 'k' accept a VEX prefix +#  #<group maps>  # GrpTable: GrpXXX  # reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]  # EndTable  #  # AVX Superscripts +#  (ev): this opcode requires EVEX prefix. +#  (evo): this opcode is changed by EVEX prefix (EVEX opcode)  #  (v): this opcode requires VEX prefix.  #  (v1): this opcode only supports 128bit VEX.  # @@ -137,7 +142,7 @@ AVXcode:  # 0x60 - 0x6f  60: PUSHA/PUSHAD (i64)  61: POPA/POPAD (i64) -62: BOUND Gv,Ma (i64) +62: BOUND Gv,Ma (i64) | EVEX (Prefix)  63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)  64: SEG=FS (Prefix)  65: SEG=GS (Prefix) @@ -399,17 +404,17 @@ AVXcode: 1  3f:  # 0x0f 0x40-0x4f  40: CMOVO Gv,Ev -41: CMOVNO Gv,Ev -42: CMOVB/C/NAE Gv,Ev +41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) +42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)  43: CMOVAE/NB/NC Gv,Ev -44: CMOVE/Z Gv,Ev -45: CMOVNE/NZ Gv,Ev -46: CMOVBE/NA Gv,Ev -47: CMOVA/NBE Gv,Ev +44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) +45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) +46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) +47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)  48: CMOVS Gv,Ev  49: CMOVNS Gv,Ev -4a: CMOVP/PE Gv,Ev -4b: CMOVNP/PO Gv,Ev +4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) +4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk  4c: CMOVL/NGE Gv,Ev  4d: CMOVNL/GE Gv,Ev  4e: CMOVLE/NG Gv,Ev @@ -426,7 +431,7 @@ AVXcode: 1  58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)  59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)  5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) -5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)  5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)  5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)  5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) @@ -447,7 +452,7 @@ AVXcode: 1  6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)  6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)  6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) -6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)  # 0x0f 0x70-0x7f  70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)  71: Grp12 (1A) @@ -458,14 +463,14 @@ AVXcode: 1  76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)  # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.  77: emms | vzeroupper | vzeroall -78: VMREAD Ey,Gy -79: VMWRITE Gy,Ey -7a: -7b: +78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) +79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) +7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) +7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)  7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)  7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)  7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) -7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)  # 0x0f 0x80-0x8f  # Note: "forced64" is Intel CPU behavior (see comment about CALL insn).  80: JO Jz (f64) @@ -485,16 +490,16 @@ AVXcode: 1  8e: JLE/JNG Jz (f64)  8f: JNLE/JG Jz (f64)  # 0x0f 0x90-0x9f -90: SETO Eb -91: SETNO Eb -92: SETB/C/NAE Eb -93: SETAE/NB/NC Eb +90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) +91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) +92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) +93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)  94: SETE/Z Eb  95: SETNE/NZ Eb  96: SETBE/NA Eb  97: SETA/NBE Eb -98: SETS Eb -99: SETNS Eb +98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) +99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)  9a: SETP/PE Eb  9b: SETNP/PO Eb  9c: SETL/NGE Eb @@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)  d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)  d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)  da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) -db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)  dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)  dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)  de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) -df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)  # 0x0f 0xe0-0xef  e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)  e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) @@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)  e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)  e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)  e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) -e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)  e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)  e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)  e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)  ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) -eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)  ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)  ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)  ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) -ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)  # 0x0f 0xf0-0xff  f0: vlddqu Vx,Mx (F2)  f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) @@ -626,81 +631,105 @@ AVXcode: 2  0e: vtestps Vx,Wx (66),(v)  0f: vtestpd Vx,Wx (66),(v)  # 0x0f 0x38 0x10-0x1f -10: pblendvb Vdq,Wdq (66) -11: -12: -13: vcvtph2ps Vx,Wx,Ib (66),(v) -14: blendvps Vdq,Wdq (66) -15: blendvpd Vdq,Wdq (66) -16: vpermps Vqq,Hqq,Wqq (66),(v) +10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) +11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) +12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) +13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) +14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) +15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) +16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)  17: vptest Vx,Wx (66)  18: vbroadcastss Vx,Wd (66),(v) -19: vbroadcastsd Vqq,Wq (66),(v) -1a: vbroadcastf128 Vqq,Mdq (66),(v) -1b: +19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) +1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) +1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)  1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)  1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)  1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) -1f: +1f: vpabsq Vx,Wx (66),(ev)  # 0x0f 0x38 0x20-0x2f -20: vpmovsxbw Vx,Ux/Mq (66),(v1) -21: vpmovsxbd Vx,Ux/Md (66),(v1) -22: vpmovsxbq Vx,Ux/Mw (66),(v1) -23: vpmovsxwd Vx,Ux/Mq (66),(v1) -24: vpmovsxwq Vx,Ux/Md (66),(v1) -25: vpmovsxdq Vx,Ux/Mq (66),(v1) -26: -27: -28: vpmuldq Vx,Hx,Wx (66),(v1) -29: vpcmpeqq Vx,Hx,Wx (66),(v1) -2a: vmovntdqa Vx,Mx (66),(v1) +20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) +21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) +24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) +26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) +27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) +28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) +2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)  2b: vpackusdw Vx,Hx,Wx (66),(v1) -2c: vmaskmovps Vx,Hx,Mx (66),(v) -2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)  2e: vmaskmovps Mx,Hx,Vx (66),(v)  2f: vmaskmovpd Mx,Hx,Vx (66),(v)  # 0x0f 0x38 0x30-0x3f -30: vpmovzxbw Vx,Ux/Mq (66),(v1) -31: vpmovzxbd Vx,Ux/Md (66),(v1) -32: vpmovzxbq Vx,Ux/Mw (66),(v1) -33: vpmovzxwd Vx,Ux/Mq (66),(v1) -34: vpmovzxwq Vx,Ux/Md (66),(v1) -35: vpmovzxdq Vx,Ux/Mq (66),(v1) -36: vpermd Vqq,Hqq,Wqq (66),(v) +30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) +31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) +34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) +36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)  37: vpcmpgtq Vx,Hx,Wx (66),(v1) -38: vpminsb Vx,Hx,Wx (66),(v1) -39: vpminsd Vx,Hx,Wx (66),(v1) -3a: vpminuw Vx,Hx,Wx (66),(v1) -3b: vpminud Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) +39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) +3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) +3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)  3c: vpmaxsb Vx,Hx,Wx (66),(v1) -3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)  3e: vpmaxuw Vx,Hx,Wx (66),(v1) -3f: vpmaxud Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)  # 0x0f 0x38 0x40-0x8f -40: vpmulld Vx,Hx,Wx (66),(v1) +40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)  41: vphminposuw Vdq,Wdq (66),(v1) -42: -43: -44: +42: vgetexpps/d Vx,Wx (66),(ev) +43: vgetexpss/d Vx,Hx,Wx (66),(ev) +44: vplzcntd/q Vx,Wx (66),(ev)  45: vpsrlvd/q Vx,Hx,Wx (66),(v) -46: vpsravd Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)  47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x57 +# Skip 0x48-0x4b +4c: vrcp14ps/d Vpd,Wpd (66),(ev) +4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) +4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) +4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) +# Skip 0x50-0x57  58: vpbroadcastd Vx,Wx (66),(v) -59: vpbroadcastq Vx,Wx (66),(v) -5a: vbroadcasti128 Vqq,Mdq (66),(v) -# Skip 0x5b-0x77 +59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) +5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) +5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) +# Skip 0x5c-0x63 +64: vpblendmd/q Vx,Hx,Wx (66),(ev) +65: vblendmps/d Vx,Hx,Wx (66),(ev) +66: vpblendmb/w Vx,Hx,Wx (66),(ev) +# Skip 0x67-0x74 +75: vpermi2b/w Vx,Hx,Wx (66),(ev) +76: vpermi2d/q Vx,Hx,Wx (66),(ev) +77: vpermi2ps/d Vx,Hx,Wx (66),(ev)  78: vpbroadcastb Vx,Wx (66),(v)  79: vpbroadcastw Vx,Wx (66),(v) -# Skip 0x7a-0x7f +7a: vpbroadcastb Vx,Rv (66),(ev) +7b: vpbroadcastw Vx,Rv (66),(ev) +7c: vpbroadcastd/q Vx,Rv (66),(ev) +7d: vpermt2b/w Vx,Hx,Wx (66),(ev) +7e: vpermt2d/q Vx,Hx,Wx (66),(ev) +7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)  80: INVEPT Gy,Mdq (66)  81: INVPID Gy,Mdq (66)  82: INVPCID Gy,Mdq (66) +83: vpmultishiftqb Vx,Hx,Wx (66),(ev) +88: vexpandps/d Vpd,Wpd (66),(ev) +89: vpexpandd/q Vx,Wx (66),(ev) +8a: vcompressps/d Wx,Vx (66),(ev) +8b: vpcompressd/q Wx,Vx (66),(ev)  8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8d: vpermb/w Vx,Hx,Wx (66),(ev)  8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)  # 0x0f 0x38 0x90-0xbf (FMA) -90: vgatherdd/q Vx,Hx,Wx (66),(v) -91: vgatherqd/q Vx,Hx,Wx (66),(v) +90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) +91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)  92: vgatherdps/d Vx,Hx,Wx (66),(v)  93: vgatherqps/d Vx,Hx,Wx (66),(v)  94: @@ -715,6 +744,10 @@ AVXcode: 2  9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)  9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)  9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a0: vpscatterdd/q Wx,Vx (66),(ev) +a1: vpscatterqd/q Wx,Vx (66),(ev) +a2: vscatterdps/d Wx,Vx (66),(ev) +a3: vscatterqps/d Wx,Vx (66),(ev)  a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)  a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)  a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) @@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)  ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)  ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)  af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b4: vpmadd52luq Vx,Hx,Wx (66),(ev) +b5: vpmadd52huq Vx,Hx,Wx (66),(ev)  b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)  b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)  b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) @@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)  be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)  bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)  # 0x0f 0x38 0xc0-0xff -c8: sha1nexte Vdq,Wdq +c4: vpconflictd/q Vx,Wx (66),(ev) +c6: Grp18 (1A) +c7: Grp19 (1A) +c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)  c9: sha1msg1 Vdq,Wdq -ca: sha1msg2 Vdq,Wdq -cb: sha256rnds2 Vdq,Wdq -cc: sha256msg1 Vdq,Wdq -cd: sha256msg2 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) +cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) +cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) +cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)  db: VAESIMC Vdq,Wdq (66),(v1)  dc: VAESENC Vdq,Hdq,Wdq (66),(v1)  dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) @@ -763,15 +801,15 @@ AVXcode: 3  00: vpermq Vqq,Wqq,Ib (66),(v)  01: vpermpd Vqq,Wqq,Ib (66),(v)  02: vpblendd Vx,Hx,Wx,Ib (66),(v) -03: +03: valignd/q Vx,Hx,Wx,Ib (66),(ev)  04: vpermilps Vx,Wx,Ib (66),(v)  05: vpermilpd Vx,Wx,Ib (66),(v)  06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)  07: -08: vroundps Vx,Wx,Ib (66) -09: vroundpd Vx,Wx,Ib (66) -0a: vroundss Vss,Wss,Ib (66),(v1) -0b: vroundsd Vsd,Wsd,Ib (66),(v1) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)  0c: vblendps Vx,Hx,Wx,Ib (66)  0d: vblendpd Vx,Hx,Wx,Ib (66)  0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) @@ -780,26 +818,51 @@ AVXcode: 3  15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)  16: vpextrd/q Ey,Vdq,Ib (66),(v1)  17: vextractps Ed,Vdq,Ib (66),(v1) -18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) -19: vextractf128 Wdq,Vqq,Ib (66),(v) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) +1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)  1d: vcvtps2ph Wx,Vx,Ib (66),(v) +1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) +1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)  20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)  21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)  22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) -38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) -39: vextracti128 Wdq,Vqq,Ib (66),(v) +23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) +25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +30: kshiftrb/w Vk,Uk,Ib (66),(v) +31: kshiftrd/q Vk,Uk,Ib (66),(v) +32: kshiftlb/w Vk,Uk,Ib (66),(v) +33: kshiftld/q Vk,Uk,Ib (66),(v) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) +3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) +3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) +3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)  40: vdpps Vx,Hx,Wx,Ib (66)  41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) -42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) +43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)  44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)  46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)  4a: vblendvps Vx,Hx,Wx,Lx (66),(v)  4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)  4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) +51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) +54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) +55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)  60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)  61: vpcmpestri Vdq,Wdq,Ib (66),(v1)  62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)  63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev)  cc: sha1rnds4 Vdq,Wdq,Ib  df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)  f0: RORX Gy,Ey,Ib (F2),(v) @@ -927,8 +990,10 @@ GrpTable: Grp12  EndTable  GrpTable: Grp13 +0: vprord/q Hx,Wx,Ib (66),(ev) +1: vprold/q Hx,Wx,Ib (66),(ev)  2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) -4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)  6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)  EndTable @@ -947,7 +1012,7 @@ GrpTable: Grp15  4: XSAVE  5: XRSTOR | lfence (11B)  6: XSAVEOPT | clwb (66) | mfence (11B) -7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) +7: clflush | clflushopt (66) | sfence (11B)  EndTable  GrpTable: Grp16 @@ -963,6 +1028,20 @@ GrpTable: Grp17  3: BLSI By,Ey (v)  EndTable +GrpTable: Grp18 +1: vgatherpf0dps/d Wx (66),(ev) +2: vgatherpf1dps/d Wx (66),(ev) +5: vscatterpf0dps/d Wx (66),(ev) +6: vscatterpf1dps/d Wx (66),(ev) +EndTable + +GrpTable: Grp19 +1: vgatherpf0qps/d Wx (66),(ev) +2: vgatherpf1qps/d Wx (66),(ev) +5: vscatterpf0qps/d Wx (66),(ev) +6: vscatterpf1qps/d Wx (66),(ev) +EndTable +  # AMD's Prefetch Group  GrpTable: GrpP  0: PREFETCH  |