diff options
Diffstat (limited to 'arch/x86/lib')
| -rw-r--r-- | arch/x86/lib/atomic64_cx8_32.S | 50 | ||||
| -rw-r--r-- | arch/x86/lib/checksum_32.S | 64 | ||||
| -rw-r--r-- | arch/x86/lib/clear_page_64.S | 66 | ||||
| -rw-r--r-- | arch/x86/lib/copy_page_64.S | 37 | ||||
| -rw-r--r-- | arch/x86/lib/copy_user_64.S | 46 | ||||
| -rw-r--r-- | arch/x86/lib/csum-copy_64.S | 2 | ||||
| -rw-r--r-- | arch/x86/lib/insn.c | 13 | ||||
| -rw-r--r-- | arch/x86/lib/memcpy_64.S | 68 | ||||
| -rw-r--r-- | arch/x86/lib/memmove_64.S | 19 | ||||
| -rw-r--r-- | arch/x86/lib/memset_64.S | 61 | ||||
| -rw-r--r-- | arch/x86/lib/msr-reg.S | 24 | ||||
| -rw-r--r-- | arch/x86/lib/rwsem.S | 44 | ||||
| -rw-r--r-- | arch/x86/lib/thunk_32.S | 18 | ||||
| -rw-r--r-- | arch/x86/lib/thunk_64.S | 28 | ||||
| -rw-r--r-- | arch/x86/lib/x86-opcode-map.txt | 9 | 
15 files changed, 223 insertions, 326 deletions
| diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index f5cc9eb1d51b..082a85167a5b 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -13,16 +13,6 @@  #include <asm/alternative-asm.h>  #include <asm/dwarf2.h> -.macro SAVE reg -	pushl_cfi %\reg -	CFI_REL_OFFSET \reg, 0 -.endm - -.macro RESTORE reg -	popl_cfi %\reg -	CFI_RESTORE \reg -.endm -  .macro read64 reg  	movl %ebx, %eax  	movl %ecx, %edx @@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)  .macro addsub_return func ins insc  ENTRY(atomic64_\func\()_return_cx8)  	CFI_STARTPROC -	SAVE ebp -	SAVE ebx -	SAVE esi -	SAVE edi +	pushl_cfi_reg ebp +	pushl_cfi_reg ebx +	pushl_cfi_reg esi +	pushl_cfi_reg edi  	movl %eax, %esi  	movl %edx, %edi @@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)  10:  	movl %ebx, %eax  	movl %ecx, %edx -	RESTORE edi -	RESTORE esi -	RESTORE ebx -	RESTORE ebp +	popl_cfi_reg edi +	popl_cfi_reg esi +	popl_cfi_reg ebx +	popl_cfi_reg ebp  	ret  	CFI_ENDPROC  ENDPROC(atomic64_\func\()_return_cx8) @@ -104,7 +94,7 @@ addsub_return sub sub sbb  .macro incdec_return func ins insc  ENTRY(atomic64_\func\()_return_cx8)  	CFI_STARTPROC -	SAVE ebx +	pushl_cfi_reg ebx  	read64 %esi  1: @@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)  10:  	movl %ebx, %eax  	movl %ecx, %edx -	RESTORE ebx +	popl_cfi_reg ebx  	ret  	CFI_ENDPROC  ENDPROC(atomic64_\func\()_return_cx8) @@ -130,7 +120,7 @@ incdec_return dec sub sbb  ENTRY(atomic64_dec_if_positive_cx8)  	CFI_STARTPROC -	SAVE ebx +	pushl_cfi_reg ebx  	read64 %esi  1: @@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)  2:  	movl %ebx, %eax  	movl %ecx, %edx -	RESTORE ebx +	popl_cfi_reg ebx  	ret  	CFI_ENDPROC  ENDPROC(atomic64_dec_if_positive_cx8)  ENTRY(atomic64_add_unless_cx8)  	CFI_STARTPROC -	SAVE ebp -	SAVE ebx +	pushl_cfi_reg ebp +	pushl_cfi_reg ebx  /* these just push these two parameters on the stack */ -	SAVE edi -	SAVE ecx +	pushl_cfi_reg edi +	pushl_cfi_reg ecx  	movl %eax, %ebp  	movl %edx, %edi @@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)  3:  	addl $8, %esp  	CFI_ADJUST_CFA_OFFSET -8 -	RESTORE ebx -	RESTORE ebp +	popl_cfi_reg ebx +	popl_cfi_reg ebp  	ret  4:  	cmpl %edx, 4(%esp) @@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)  ENTRY(atomic64_inc_not_zero_cx8)  	CFI_STARTPROC -	SAVE ebx +	pushl_cfi_reg ebx  	read64 %esi  1: @@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)  	movl $1, %eax  3: -	RESTORE ebx +	popl_cfi_reg ebx  	ret  	CFI_ENDPROC  ENDPROC(atomic64_inc_not_zero_cx8) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index e78b8eee6615..9bc944a91274 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)  	   */		  ENTRY(csum_partial)  	CFI_STARTPROC -	pushl_cfi %esi -	CFI_REL_OFFSET esi, 0 -	pushl_cfi %ebx -	CFI_REL_OFFSET ebx, 0 +	pushl_cfi_reg esi +	pushl_cfi_reg ebx  	movl 20(%esp),%eax	# Function arg: unsigned int sum  	movl 16(%esp),%ecx	# Function arg: int len  	movl 12(%esp),%esi	# Function arg: unsigned char *buff @@ -127,14 +125,12 @@ ENTRY(csum_partial)  6:	addl %ecx,%eax  	adcl $0, %eax   7:	 -	testl $1, 12(%esp) +	testb $1, 12(%esp)  	jz 8f  	roll $8, %eax  8: -	popl_cfi %ebx -	CFI_RESTORE ebx -	popl_cfi %esi -	CFI_RESTORE esi +	popl_cfi_reg ebx +	popl_cfi_reg esi  	ret  	CFI_ENDPROC  ENDPROC(csum_partial) @@ -145,10 +141,8 @@ ENDPROC(csum_partial)  ENTRY(csum_partial)  	CFI_STARTPROC -	pushl_cfi %esi -	CFI_REL_OFFSET esi, 0 -	pushl_cfi %ebx -	CFI_REL_OFFSET ebx, 0 +	pushl_cfi_reg esi +	pushl_cfi_reg ebx  	movl 20(%esp),%eax	# Function arg: unsigned int sum  	movl 16(%esp),%ecx	# Function arg: int len  	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf @@ -251,14 +245,12 @@ ENTRY(csum_partial)  	addl %ebx,%eax  	adcl $0,%eax  80:  -	testl $1, 12(%esp) +	testb $1, 12(%esp)  	jz 90f  	roll $8, %eax  90:  -	popl_cfi %ebx -	CFI_RESTORE ebx -	popl_cfi %esi -	CFI_RESTORE esi +	popl_cfi_reg ebx +	popl_cfi_reg esi  	ret  	CFI_ENDPROC  ENDPROC(csum_partial) @@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)  	CFI_STARTPROC  	subl  $4,%esp	  	CFI_ADJUST_CFA_OFFSET 4 -	pushl_cfi %edi -	CFI_REL_OFFSET edi, 0 -	pushl_cfi %esi -	CFI_REL_OFFSET esi, 0 -	pushl_cfi %ebx -	CFI_REL_OFFSET ebx, 0 +	pushl_cfi_reg edi +	pushl_cfi_reg esi +	pushl_cfi_reg ebx  	movl ARGBASE+16(%esp),%eax	# sum  	movl ARGBASE+12(%esp),%ecx	# len  	movl ARGBASE+4(%esp),%esi	# src @@ -412,12 +401,9 @@ DST(	movb %cl, (%edi)	)  .previous -	popl_cfi %ebx -	CFI_RESTORE ebx -	popl_cfi %esi -	CFI_RESTORE esi -	popl_cfi %edi -	CFI_RESTORE edi +	popl_cfi_reg ebx +	popl_cfi_reg esi +	popl_cfi_reg edi  	popl_cfi %ecx			# equivalent to addl $4,%esp  	ret	  	CFI_ENDPROC @@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)  ENTRY(csum_partial_copy_generic)  	CFI_STARTPROC -	pushl_cfi %ebx -	CFI_REL_OFFSET ebx, 0 -	pushl_cfi %edi -	CFI_REL_OFFSET edi, 0 -	pushl_cfi %esi -	CFI_REL_OFFSET esi, 0 +	pushl_cfi_reg ebx +	pushl_cfi_reg edi +	pushl_cfi_reg esi  	movl ARGBASE+4(%esp),%esi	#src  	movl ARGBASE+8(%esp),%edi	#dst	  	movl ARGBASE+12(%esp),%ecx	#len @@ -506,12 +489,9 @@ DST(	movb %dl, (%edi)         )  	jmp  7b			  .previous				 -	popl_cfi %esi -	CFI_RESTORE esi -	popl_cfi %edi -	CFI_RESTORE edi -	popl_cfi %ebx -	CFI_RESTORE ebx +	popl_cfi_reg esi +	popl_cfi_reg edi +	popl_cfi_reg ebx  	ret  	CFI_ENDPROC  ENDPROC(csum_partial_copy_generic) diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index f2145cfa12a6..e67e579c93bd 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,31 +1,35 @@  #include <linux/linkage.h>  #include <asm/dwarf2.h> +#include <asm/cpufeature.h>  #include <asm/alternative-asm.h>  /* - * Zero a page. 	 - * rdi	page - */			 -ENTRY(clear_page_c) + * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is + * recommended to use this when possible and we do use them by default. + * If enhanced REP MOVSB/STOSB is not available, try to use fast string. + * Otherwise, use original. + */ + +/* + * Zero a page. + * %rdi	- page + */ +ENTRY(clear_page)  	CFI_STARTPROC + +	ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \ +		      "jmp clear_page_c_e", X86_FEATURE_ERMS +  	movl $4096/8,%ecx  	xorl %eax,%eax  	rep stosq  	ret  	CFI_ENDPROC -ENDPROC(clear_page_c) +ENDPROC(clear_page) -ENTRY(clear_page_c_e) +ENTRY(clear_page_orig)  	CFI_STARTPROC -	movl $4096,%ecx -	xorl %eax,%eax -	rep stosb -	ret -	CFI_ENDPROC -ENDPROC(clear_page_c_e) -ENTRY(clear_page) -	CFI_STARTPROC  	xorl   %eax,%eax  	movl   $4096/64,%ecx  	.p2align 4 @@ -45,29 +49,13 @@ ENTRY(clear_page)  	nop  	ret  	CFI_ENDPROC -.Lclear_page_end: -ENDPROC(clear_page) - -	/* -	 * Some CPUs support enhanced REP MOVSB/STOSB instructions. -	 * It is recommended to use this when possible. -	 * If enhanced REP MOVSB/STOSB is not available, try to use fast string. -	 * Otherwise, use original function. -	 * -	 */ +ENDPROC(clear_page_orig) -#include <asm/cpufeature.h> - -	.section .altinstr_replacement,"ax" -1:	.byte 0xeb					/* jmp <disp8> */ -	.byte (clear_page_c - clear_page) - (2f - 1b)	/* offset */ -2:	.byte 0xeb					/* jmp <disp8> */ -	.byte (clear_page_c_e - clear_page) - (3f - 2b)	/* offset */ -3: -	.previous -	.section .altinstructions,"a" -	altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ -			     .Lclear_page_end-clear_page, 2b-1b -	altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \ -			     .Lclear_page_end-clear_page,3b-2b -	.previous +ENTRY(clear_page_c_e) +	CFI_STARTPROC +	movl $4096,%ecx +	xorl %eax,%eax +	rep stosb +	ret +	CFI_ENDPROC +ENDPROC(clear_page_c_e) diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 176cca67212b..8239dbcbf984 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -2,23 +2,26 @@  #include <linux/linkage.h>  #include <asm/dwarf2.h> +#include <asm/cpufeature.h>  #include <asm/alternative-asm.h> +/* + * Some CPUs run faster using the string copy instructions (sane microcode). + * It is also a lot simpler. Use this when possible. But, don't use streaming + * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the + * prefetch distance based on SMP/UP. + */  	ALIGN -copy_page_rep: +ENTRY(copy_page)  	CFI_STARTPROC +	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD  	movl	$4096/8, %ecx  	rep	movsq  	ret  	CFI_ENDPROC -ENDPROC(copy_page_rep) - -/* - *  Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD. - *  Could vary the prefetch distance based on SMP/UP. -*/ +ENDPROC(copy_page) -ENTRY(copy_page) +ENTRY(copy_page_regs)  	CFI_STARTPROC  	subq	$2*8,	%rsp  	CFI_ADJUST_CFA_OFFSET 2*8 @@ -90,21 +93,5 @@ ENTRY(copy_page)  	addq	$2*8, %rsp  	CFI_ADJUST_CFA_OFFSET -2*8  	ret -.Lcopy_page_end:  	CFI_ENDPROC -ENDPROC(copy_page) - -	/* Some CPUs run faster using the string copy instructions. -	   It is also a lot simpler. Use this when possible */ - -#include <asm/cpufeature.h> - -	.section .altinstr_replacement,"ax" -1:	.byte 0xeb					/* jmp <disp8> */ -	.byte (copy_page_rep - copy_page) - (2f - 1b)	/* offset */ -2: -	.previous -	.section .altinstructions,"a" -	altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD,	\ -		.Lcopy_page_end-copy_page, 2b-1b -	.previous +ENDPROC(copy_page_regs) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index dee945d55594..fa997dfaef24 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -8,9 +8,6 @@  #include <linux/linkage.h>  #include <asm/dwarf2.h> - -#define FIX_ALIGNMENT 1 -  #include <asm/current.h>  #include <asm/asm-offsets.h>  #include <asm/thread_info.h> @@ -19,33 +16,7 @@  #include <asm/asm.h>  #include <asm/smap.h> -/* - * By placing feature2 after feature1 in altinstructions section, we logically - * implement: - * If CPU has feature2, jmp to alt2 is used - * else if CPU has feature1, jmp to alt1 is used - * else jmp to orig is used. - */ -	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 -0: -	.byte 0xe9	/* 32bit jump */ -	.long \orig-1f	/* by default jump to orig */ -1: -	.section .altinstr_replacement,"ax" -2:	.byte 0xe9			/* near jump with 32bit immediate */ -	.long \alt1-1b /* offset */   /* or alternatively to alt1 */ -3:	.byte 0xe9			/* near jump with 32bit immediate */ -	.long \alt2-1b /* offset */   /* or alternatively to alt2 */ -	.previous - -	.section .altinstructions,"a" -	altinstruction_entry 0b,2b,\feature1,5,5 -	altinstruction_entry 0b,3b,\feature2,5,5 -	.previous -	.endm -  	.macro ALIGN_DESTINATION -#ifdef FIX_ALIGNMENT  	/* check for bad alignment of destination */  	movl %edi,%ecx  	andl $7,%ecx @@ -67,7 +38,6 @@  	_ASM_EXTABLE(100b,103b)  	_ASM_EXTABLE(101b,103b) -#endif  	.endm  /* Standard copy_to_user with segment limit checking */ @@ -79,9 +49,11 @@ ENTRY(_copy_to_user)  	jc bad_to_user  	cmpq TI_addr_limit(%rax),%rcx  	ja bad_to_user -	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\ -		copy_user_generic_unrolled,copy_user_generic_string,	\ -		copy_user_enhanced_fast_string +	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\ +		      "jmp copy_user_generic_string",		\ +		      X86_FEATURE_REP_GOOD,			\ +		      "jmp copy_user_enhanced_fast_string",	\ +		      X86_FEATURE_ERMS  	CFI_ENDPROC  ENDPROC(_copy_to_user) @@ -94,9 +66,11 @@ ENTRY(_copy_from_user)  	jc bad_from_user  	cmpq TI_addr_limit(%rax),%rcx  	ja bad_from_user -	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\ -		copy_user_generic_unrolled,copy_user_generic_string,	\ -		copy_user_enhanced_fast_string +	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\ +		      "jmp copy_user_generic_string",		\ +		      X86_FEATURE_REP_GOOD,			\ +		      "jmp copy_user_enhanced_fast_string",	\ +		      X86_FEATURE_ERMS  	CFI_ENDPROC  ENDPROC(_copy_from_user) diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 2419d5fefae3..9734182966f3 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)  	/* handle last odd byte */  .Lhandle_1: -	testl $1, %r10d +	testb $1, %r10b  	jz    .Lende  	xorl  %ebx, %ebx  	source diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 1313ae6b478b..8f72b334aea0 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -52,6 +52,13 @@   */  void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)  { +	/* +	 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid +	 * even if the input buffer is long enough to hold them. +	 */ +	if (buf_len > MAX_INSN_SIZE) +		buf_len = MAX_INSN_SIZE; +  	memset(insn, 0, sizeof(*insn));  	insn->kaddr = kaddr;  	insn->end_kaddr = kaddr + buf_len; @@ -164,6 +171,12 @@ found:  				/* VEX.W overrides opnd_size */  				insn->opnd_bytes = 8;  		} else { +			/* +			 * For VEX2, fake VEX3-like byte#2. +			 * Makes it easier to decode vex.W, vex.vvvv, +			 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. +			 */ +			insn->vex_prefix.bytes[2] = b2 & 0x7f;  			insn->vex_prefix.nbytes = 2;  			insn->next_byte += 2;  		} diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 89b53c9968e7..b046664f5a1c 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -1,12 +1,20 @@  /* Copyright 2002 Andi Kleen */  #include <linux/linkage.h> -  #include <asm/cpufeature.h>  #include <asm/dwarf2.h>  #include <asm/alternative-asm.h>  /* + * We build a jump to memcpy_orig by default which gets NOPped out on + * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which + * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs + * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. + */ + +.weak memcpy + +/*   * memcpy - Copy a memory block.   *   * Input: @@ -17,15 +25,11 @@   * Output:   * rax original destination   */ +ENTRY(__memcpy) +ENTRY(memcpy) +	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ +		      "jmp memcpy_erms", X86_FEATURE_ERMS -/* - * memcpy_c() - fast string ops (REP MOVSQ) based variant. - * - * This gets patched over the unrolled variant (below) via the - * alternative instructions framework: - */ -	.section .altinstr_replacement, "ax", @progbits -.Lmemcpy_c:  	movq %rdi, %rax  	movq %rdx, %rcx  	shrq $3, %rcx @@ -34,29 +38,21 @@  	movl %edx, %ecx  	rep movsb  	ret -.Lmemcpy_e: -	.previous +ENDPROC(memcpy) +ENDPROC(__memcpy)  /* - * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than - * memcpy_c. Use memcpy_c_e when possible. - * - * This gets patched over the unrolled variant (below) via the - * alternative instructions framework: + * memcpy_erms() - enhanced fast string memcpy. This is faster and + * simpler than memcpy. Use memcpy_erms when possible.   */ -	.section .altinstr_replacement, "ax", @progbits -.Lmemcpy_c_e: +ENTRY(memcpy_erms)  	movq %rdi, %rax  	movq %rdx, %rcx  	rep movsb  	ret -.Lmemcpy_e_e: -	.previous - -.weak memcpy +ENDPROC(memcpy_erms) -ENTRY(__memcpy) -ENTRY(memcpy) +ENTRY(memcpy_orig)  	CFI_STARTPROC  	movq %rdi, %rax @@ -183,26 +179,4 @@ ENTRY(memcpy)  .Lend:  	retq  	CFI_ENDPROC -ENDPROC(memcpy) -ENDPROC(__memcpy) - -	/* -	 * Some CPUs are adding enhanced REP MOVSB/STOSB feature -	 * If the feature is supported, memcpy_c_e() is the first choice. -	 * If enhanced rep movsb copy is not available, use fast string copy -	 * memcpy_c() when possible. This is faster and code is simpler than -	 * original memcpy(). -	 * Otherwise, original memcpy() is used. -	 * In .altinstructions section, ERMS feature is placed after REG_GOOD -         * feature to implement the right patch order. -	 * -	 * Replace only beginning, memcpy is used to apply alternatives, -	 * so it is silly to overwrite itself with nops - reboot is the -	 * only outcome... -	 */ -	.section .altinstructions, "a" -	altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ -			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c -	altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ -			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e -	.previous +ENDPROC(memcpy_orig) diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 9c4b530575da..0f8a0d0331b9 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -5,7 +5,6 @@   * This assembly file is re-written from memmove_64.c file.   *	- Copyright 2011 Fenghua Yu <[email protected]>   */ -#define _STRING_C  #include <linux/linkage.h>  #include <asm/dwarf2.h>  #include <asm/cpufeature.h> @@ -44,6 +43,8 @@ ENTRY(__memmove)  	jg 2f  .Lmemmove_begin_forward: +	ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS +  	/*  	 * movsq instruction have many startup latency  	 * so we handle small size by general register. @@ -207,21 +208,5 @@ ENTRY(__memmove)  13:  	retq  	CFI_ENDPROC - -	.section .altinstr_replacement,"ax" -.Lmemmove_begin_forward_efs: -	/* Forward moving data. */ -	movq %rdx, %rcx -	rep movsb -	retq -.Lmemmove_end_forward_efs: -	.previous - -	.section .altinstructions,"a" -	altinstruction_entry .Lmemmove_begin_forward,		\ -		.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS,	\ -		.Lmemmove_end_forward-.Lmemmove_begin_forward,	\ -		.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs -	.previous  ENDPROC(__memmove)  ENDPROC(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 6f44935c6a60..93118fb23976 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -5,19 +5,30 @@  #include <asm/cpufeature.h>  #include <asm/alternative-asm.h> +.weak memset +  /*   * ISO C memset - set a memory block to a byte value. This function uses fast   * string to get better performance than the original function. The code is   * simpler and shorter than the orignal function as well. - *	 + *   * rdi   destination - * rsi   value (char)  - * rdx   count (bytes)  - *  + * rsi   value (char) + * rdx   count (bytes) + *   * rax   original destination - */	 -	.section .altinstr_replacement, "ax", @progbits -.Lmemset_c: + */ +ENTRY(memset) +ENTRY(__memset) +	/* +	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended +	 * to use it when possible. If not available, use fast string instructions. +	 * +	 * Otherwise, use original memset function. +	 */ +	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ +		      "jmp memset_erms", X86_FEATURE_ERMS +  	movq %rdi,%r9  	movq %rdx,%rcx  	andl $7,%edx @@ -31,8 +42,8 @@  	rep stosb  	movq %r9,%rax  	ret -.Lmemset_e: -	.previous +ENDPROC(memset) +ENDPROC(__memset)  /*   * ISO C memset - set a memory block to a byte value. This function uses @@ -45,21 +56,16 @@   *   * rax   original destination   */ -	.section .altinstr_replacement, "ax", @progbits -.Lmemset_c_e: +ENTRY(memset_erms)  	movq %rdi,%r9  	movb %sil,%al  	movq %rdx,%rcx  	rep stosb  	movq %r9,%rax  	ret -.Lmemset_e_e: -	.previous - -.weak memset +ENDPROC(memset_erms) -ENTRY(memset) -ENTRY(__memset) +ENTRY(memset_orig)  	CFI_STARTPROC  	movq %rdi,%r10 @@ -134,23 +140,4 @@ ENTRY(__memset)  	jmp .Lafter_bad_alignment  .Lfinal:  	CFI_ENDPROC -ENDPROC(memset) -ENDPROC(__memset) - -	/* Some CPUs support enhanced REP MOVSB/STOSB feature. -	 * It is recommended to use this when possible. -	 * -	 * If enhanced REP MOVSB/STOSB feature is not available, use fast string -	 * instructions. -	 * -	 * Otherwise, use original memset function. -	 * -	 * In .altinstructions section, ERMS feature is placed after REG_GOOD -         * feature to implement the right patch order. -	 */ -	.section .altinstructions,"a" -	altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ -			     .Lfinal-__memset,.Lmemset_e-.Lmemset_c -	altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ -			     .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e -	.previous +ENDPROC(memset_orig) diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index f6d13eefad10..3ca5218fbece 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -14,8 +14,8 @@  .macro op_safe_regs op  ENTRY(\op\()_safe_regs)  	CFI_STARTPROC -	pushq_cfi %rbx -	pushq_cfi %rbp +	pushq_cfi_reg rbx +	pushq_cfi_reg rbp  	movq	%rdi, %r10	/* Save pointer */  	xorl	%r11d, %r11d	/* Return value */  	movl    (%rdi), %eax @@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)  	movl    %ebp, 20(%r10)  	movl    %esi, 24(%r10)  	movl    %edi, 28(%r10) -	popq_cfi %rbp -	popq_cfi %rbx +	popq_cfi_reg rbp +	popq_cfi_reg rbx  	ret  3:  	CFI_RESTORE_STATE @@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)  .macro op_safe_regs op  ENTRY(\op\()_safe_regs)  	CFI_STARTPROC -	pushl_cfi %ebx -	pushl_cfi %ebp -	pushl_cfi %esi -	pushl_cfi %edi +	pushl_cfi_reg ebx +	pushl_cfi_reg ebp +	pushl_cfi_reg esi +	pushl_cfi_reg edi  	pushl_cfi $0              /* Return value */  	pushl_cfi %eax  	movl    4(%eax), %ecx @@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)  	movl    %esi, 24(%eax)  	movl    %edi, 28(%eax)  	popl_cfi %eax -	popl_cfi %edi -	popl_cfi %esi -	popl_cfi %ebp -	popl_cfi %ebx +	popl_cfi_reg edi +	popl_cfi_reg esi +	popl_cfi_reg ebp +	popl_cfi_reg ebx  	ret  3:  	CFI_RESTORE_STATE diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index 5dff5f042468..2322abe4da3b 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S @@ -34,10 +34,10 @@   */  #define save_common_regs \ -	pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0 +	pushl_cfi_reg ecx  #define restore_common_regs \ -	popl_cfi %ecx; CFI_RESTORE ecx +	popl_cfi_reg ecx  	/* Avoid uglifying the argument copying x86-64 needs to do. */  	.macro movq src, dst @@ -64,22 +64,22 @@   */  #define save_common_regs \ -	pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ -	pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ -	pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ -	pushq_cfi %r8;  CFI_REL_OFFSET r8,  0; \ -	pushq_cfi %r9;  CFI_REL_OFFSET r9,  0; \ -	pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ -	pushq_cfi %r11; CFI_REL_OFFSET r11, 0 +	pushq_cfi_reg rdi; \ +	pushq_cfi_reg rsi; \ +	pushq_cfi_reg rcx; \ +	pushq_cfi_reg r8;  \ +	pushq_cfi_reg r9;  \ +	pushq_cfi_reg r10; \ +	pushq_cfi_reg r11  #define restore_common_regs \ -	popq_cfi %r11; CFI_RESTORE r11; \ -	popq_cfi %r10; CFI_RESTORE r10; \ -	popq_cfi %r9;  CFI_RESTORE r9; \ -	popq_cfi %r8;  CFI_RESTORE r8; \ -	popq_cfi %rcx; CFI_RESTORE rcx; \ -	popq_cfi %rsi; CFI_RESTORE rsi; \ -	popq_cfi %rdi; CFI_RESTORE rdi +	popq_cfi_reg r11; \ +	popq_cfi_reg r10; \ +	popq_cfi_reg r9; \ +	popq_cfi_reg r8; \ +	popq_cfi_reg rcx; \ +	popq_cfi_reg rsi; \ +	popq_cfi_reg rdi  #endif @@ -87,12 +87,10 @@  ENTRY(call_rwsem_down_read_failed)  	CFI_STARTPROC  	save_common_regs -	__ASM_SIZE(push,_cfi) %__ASM_REG(dx) -	CFI_REL_OFFSET __ASM_REG(dx), 0 +	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)  	movq %rax,%rdi  	call rwsem_down_read_failed -	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx) -	CFI_RESTORE __ASM_REG(dx) +	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)  	restore_common_regs  	ret  	CFI_ENDPROC @@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)  ENTRY(call_rwsem_downgrade_wake)  	CFI_STARTPROC  	save_common_regs -	__ASM_SIZE(push,_cfi) %__ASM_REG(dx) -	CFI_REL_OFFSET __ASM_REG(dx), 0 +	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)  	movq %rax,%rdi  	call rwsem_downgrade_wake -	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx) -	CFI_RESTORE __ASM_REG(dx) +	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)  	restore_common_regs  	ret  	CFI_ENDPROC diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index e28cdaf5ac2c..5eb715087b80 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -13,12 +13,9 @@  	.globl \name  \name:  	CFI_STARTPROC -	pushl_cfi %eax -	CFI_REL_OFFSET eax, 0 -	pushl_cfi %ecx -	CFI_REL_OFFSET ecx, 0 -	pushl_cfi %edx -	CFI_REL_OFFSET edx, 0 +	pushl_cfi_reg eax +	pushl_cfi_reg ecx +	pushl_cfi_reg edx  	.if \put_ret_addr_in_eax  	/* Place EIP in the arg1 */ @@ -26,12 +23,9 @@  	.endif  	call \func -	popl_cfi %edx -	CFI_RESTORE edx -	popl_cfi %ecx -	CFI_RESTORE ecx -	popl_cfi %eax -	CFI_RESTORE eax +	popl_cfi_reg edx +	popl_cfi_reg ecx +	popl_cfi_reg eax  	ret  	CFI_ENDPROC  	_ASM_NOKPROBE(\name) diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index b30b5ebd614a..f89ba4e93025 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -17,9 +17,18 @@  	CFI_STARTPROC  	/* this one pushes 9 elems, the next one would be %rIP */ -	SAVE_ARGS +	pushq_cfi_reg rdi +	pushq_cfi_reg rsi +	pushq_cfi_reg rdx +	pushq_cfi_reg rcx +	pushq_cfi_reg rax +	pushq_cfi_reg r8 +	pushq_cfi_reg r9 +	pushq_cfi_reg r10 +	pushq_cfi_reg r11  	.if \put_ret_addr_in_rdi +	/* 9*8(%rsp) is return addr on stack */  	movq_cfi_restore 9*8, rdi  	.endif @@ -45,11 +54,22 @@  #endif  #endif -	/* SAVE_ARGS below is used only for the .cfi directives it contains. */ +#if defined(CONFIG_TRACE_IRQFLAGS) \ + || defined(CONFIG_DEBUG_LOCK_ALLOC) \ + || defined(CONFIG_PREEMPT)  	CFI_STARTPROC -	SAVE_ARGS +	CFI_ADJUST_CFA_OFFSET 9*8  restore: -	RESTORE_ARGS +	popq_cfi_reg r11 +	popq_cfi_reg r10 +	popq_cfi_reg r9 +	popq_cfi_reg r8 +	popq_cfi_reg rax +	popq_cfi_reg rcx +	popq_cfi_reg rdx +	popq_cfi_reg rsi +	popq_cfi_reg rdi  	ret  	CFI_ENDPROC  	_ASM_NOKPROBE(restore) +#endif diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 1a2be7c6895d..816488c0b97e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -273,6 +273,9 @@ dd: ESC  de: ESC  df: ESC  # 0xe0 - 0xef +# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix +# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation +# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.  e0: LOOPNE/LOOPNZ Jb (f64)  e1: LOOPE/LOOPZ Jb (f64)  e2: LOOP Jb (f64) @@ -281,6 +284,10 @@ e4: IN AL,Ib  e5: IN eAX,Ib  e6: OUT Ib,AL  e7: OUT Ib,eAX +# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset +# in "near" jumps and calls is 16-bit. For CALL, +# push of return address is 16-bit wide, RSP is decremented by 2 +# but is not truncated to 16 bits, unlike RIP.  e8: CALL Jz (f64)  e9: JMP-near Jz (f64)  ea: JMP-far Ap (i64) @@ -456,6 +463,7 @@ AVXcode: 1  7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)  7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)  # 0x0f 0x80-0x8f +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).  80: JO Jz (f64)  81: JNO Jz (f64)  82: JB/JC/JNAE Jz (f64) @@ -842,6 +850,7 @@ EndTable  GrpTable: Grp5  0: INC Ev  1: DEC Ev +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).  2: CALLN Ev (f64)  3: CALLF Ep  4: JMPN Ev (f64) |