diff options
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/lib/cmpxchg16b_emu.S | 43 | ||||
-rw-r--r-- | arch/x86/lib/cmpxchg8b_emu.S | 67 | ||||
-rw-r--r-- | arch/x86/lib/csum-partial_64.c | 101 | ||||
-rw-r--r-- | arch/x86/lib/getuser.S | 32 | ||||
-rw-r--r-- | arch/x86/lib/memmove_64.S | 13 | ||||
-rw-r--r-- | arch/x86/lib/msr.c | 32 | ||||
-rw-r--r-- | arch/x86/lib/putuser.S | 24 | ||||
-rw-r--r-- | arch/x86/lib/retpoline.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/usercopy_64.c | 1 |
10 files changed, 202 insertions, 116 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 01932af64193..ea3a28e7b613 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y) lib-y += strstr_32.o lib-y += string_32.o lib-y += memmove_32.o + lib-y += cmpxchg8b_emu.o ifneq ($(CONFIG_X86_CMPXCHG64),y) - lib-y += cmpxchg8b_emu.o atomic64_386_32.o + lib-y += atomic64_386_32.o endif else obj-y += iomap_copy_64.o diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 33c70c0160ea..6962df315793 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -1,47 +1,54 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/linkage.h> #include <asm/percpu.h> +#include <asm/processor-flags.h> .text /* + * Emulate 'cmpxchg16b %gs:(%rsi)' + * * Inputs: * %rsi : memory location to compare * %rax : low 64 bits of old value * %rdx : high 64 bits of old value * %rbx : low 64 bits of new value * %rcx : high 64 bits of new value - * %al : Operation successful + * + * Notably this is not LOCK prefixed and is not safe against NMIs */ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) -# -# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not -# via the ZF. Caller will access %al to get result. -# -# Note that this is only useful for a cpuops operation. Meaning that we -# do *not* have a fully atomic operation but just an operation that is -# *atomic* on a single cpu (as provided by the this_cpu_xx class of -# macros). -# pushfq cli - cmpq PER_CPU_VAR((%rsi)), %rax - jne .Lnot_same - cmpq PER_CPU_VAR(8(%rsi)), %rdx - jne .Lnot_same + /* if (*ptr == old) */ + cmpq PER_CPU_VAR(0(%rsi)), %rax + jne .Lnot_same + cmpq PER_CPU_VAR(8(%rsi)), %rdx + jne .Lnot_same - movq %rbx, PER_CPU_VAR((%rsi)) - movq %rcx, PER_CPU_VAR(8(%rsi)) + /* *ptr = new */ + movq %rbx, PER_CPU_VAR(0(%rsi)) + movq %rcx, PER_CPU_VAR(8(%rsi)) + + /* set ZF in EFLAGS to indicate success */ + orl $X86_EFLAGS_ZF, (%rsp) popfq - mov $1, %al RET .Lnot_same: + /* *ptr != old */ + + /* old = *ptr */ + movq PER_CPU_VAR(0(%rsi)), %rax + movq PER_CPU_VAR(8(%rsi)), %rdx + + /* clear ZF in EFLAGS to indicate failure */ + andl $(~X86_EFLAGS_ZF), (%rsp) + popfq - xor %al,%al RET SYM_FUNC_END(this_cpu_cmpxchg16b_emu) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 6a912d58fecc..49805257b125 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -2,10 +2,16 @@ #include <linux/linkage.h> #include <asm/export.h> +#include <asm/percpu.h> +#include <asm/processor-flags.h> .text +#ifndef CONFIG_X86_CMPXCHG64 + /* + * Emulate 'cmpxchg8b (%esi)' on UP + * * Inputs: * %esi : memory location to compare * %eax : low 32 bits of old value @@ -15,32 +21,65 @@ */ SYM_FUNC_START(cmpxchg8b_emu) -# -# Emulate 'cmpxchg8b (%esi)' on UP except we don't -# set the whole ZF thing (caller will just compare -# eax:edx with the expected value) -# pushfl cli - cmpl (%esi), %eax - jne .Lnot_same - cmpl 4(%esi), %edx - jne .Lhalf_same + cmpl 0(%esi), %eax + jne .Lnot_same + cmpl 4(%esi), %edx + jne .Lnot_same + + movl %ebx, 0(%esi) + movl %ecx, 4(%esi) - movl %ebx, (%esi) - movl %ecx, 4(%esi) + orl $X86_EFLAGS_ZF, (%esp) popfl RET .Lnot_same: - movl (%esi), %eax -.Lhalf_same: - movl 4(%esi), %edx + movl 0(%esi), %eax + movl 4(%esi), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) popfl RET SYM_FUNC_END(cmpxchg8b_emu) EXPORT_SYMBOL(cmpxchg8b_emu) + +#endif + +#ifndef CONFIG_UML + +SYM_FUNC_START(this_cpu_cmpxchg8b_emu) + + pushfl + cli + + cmpl PER_CPU_VAR(0(%esi)), %eax + jne .Lnot_same2 + cmpl PER_CPU_VAR(4(%esi)), %edx + jne .Lnot_same2 + + movl %ebx, PER_CPU_VAR(0(%esi)) + movl %ecx, PER_CPU_VAR(4(%esi)) + + orl $X86_EFLAGS_ZF, (%esp) + + popfl + RET + +.Lnot_same2: + movl PER_CPU_VAR(0(%esi)), %eax + movl PER_CPU_VAR(4(%esi)), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) + + popfl + RET + +SYM_FUNC_END(this_cpu_cmpxchg8b_emu) + +#endif diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index 50734a23034c..cea25ca8b8cf 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -5,22 +5,34 @@ * This file contains network checksum routines that are better done * in an architecture-specific manner due to speed. */ - + #include <linux/compiler.h> #include <linux/export.h> #include <asm/checksum.h> #include <asm/word-at-a-time.h> -static inline unsigned short from32to16(unsigned a) +static inline unsigned short from32to16(unsigned a) { - unsigned short b = a >> 16; + unsigned short b = a >> 16; asm("addw %w2,%w0\n\t" - "adcw $0,%w0\n" + "adcw $0,%w0\n" : "=r" (b) : "0" (b), "r" (a)); return b; } +static inline __wsum csum_tail(u64 temp64, int odd) +{ + unsigned int result; + + result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); + if (unlikely(odd)) { + result = from32to16(result); + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); + } + return (__force __wsum)result; +} + /* * Do a checksum on an arbitrary memory area. * Returns a 32bit checksum. @@ -35,7 +47,7 @@ static inline unsigned short from32to16(unsigned a) __wsum csum_partial(const void *buff, int len, __wsum sum) { u64 temp64 = (__force u64)sum; - unsigned odd, result; + unsigned odd; odd = 1 & (unsigned long) buff; if (unlikely(odd)) { @@ -47,21 +59,52 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) buff++; } - while (unlikely(len >= 64)) { + /* + * len == 40 is the hot case due to IPv6 headers, but annotating it likely() + * has noticeable negative affect on codegen for all other cases with + * minimal performance benefit here. + */ + if (len == 40) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq 1*8(%[src]),%[res]\n\t" "adcq 2*8(%[src]),%[res]\n\t" "adcq 3*8(%[src]),%[res]\n\t" "adcq 4*8(%[src]),%[res]\n\t" - "adcq 5*8(%[src]),%[res]\n\t" - "adcq 6*8(%[src]),%[res]\n\t" - "adcq 7*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); - buff += 64; - len -= 64; + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[40])buff)); + return csum_tail(temp64, odd); + } + if (unlikely(len >= 64)) { + /* + * Extra accumulators for better ILP in the loop. + */ + u64 tmp_accum, tmp_carries; + + asm("xorl %k[tmp_accum],%k[tmp_accum]\n\t" + "xorl %k[tmp_carries],%k[tmp_carries]\n\t" + "subl $64, %[len]\n\t" + "1:\n\t" + "addq 0*8(%[src]),%[res]\n\t" + "adcq 1*8(%[src]),%[res]\n\t" + "adcq 2*8(%[src]),%[res]\n\t" + "adcq 3*8(%[src]),%[res]\n\t" + "adcl $0,%k[tmp_carries]\n\t" + "addq 4*8(%[src]),%[tmp_accum]\n\t" + "adcq 5*8(%[src]),%[tmp_accum]\n\t" + "adcq 6*8(%[src]),%[tmp_accum]\n\t" + "adcq 7*8(%[src]),%[tmp_accum]\n\t" + "adcl $0,%k[tmp_carries]\n\t" + "addq $64, %[src]\n\t" + "subl $64, %[len]\n\t" + "jge 1b\n\t" + "addq %[tmp_accum],%[res]\n\t" + "adcq %[tmp_carries],%[res]\n\t" + "adcq $0,%[res]" + : [tmp_accum] "=&r"(tmp_accum), + [tmp_carries] "=&r"(tmp_carries), [res] "+r"(temp64), + [len] "+r"(len), [src] "+r"(buff) + : "m"(*(const char *)buff)); } if (len & 32) { @@ -70,45 +113,37 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) "adcq 2*8(%[src]),%[res]\n\t" "adcq 3*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[32])buff)); buff += 32; } if (len & 16) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq 1*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[16])buff)); buff += 16; } if (len & 8) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[8])buff)); buff += 8; } if (len & 7) { - unsigned int shift = (8 - (len & 7)) * 8; + unsigned int shift = (-len << 3) & 63; unsigned long trail; trail = (load_unaligned_zeropad(buff) << shift) >> shift; asm("addq %[trail],%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [trail] "r" (trail)); + : [res] "+r"(temp64) + : [trail] "r"(trail)); } - result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); - if (unlikely(odd)) { - result = from32to16(result); - result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); - } - return (__force __wsum)result; + return csum_tail(temp64, odd); } EXPORT_SYMBOL(csum_partial); @@ -118,6 +153,6 @@ EXPORT_SYMBOL(csum_partial); */ __sum16 ip_compute_csum(const void *buff, int len) { - return csum_fold(csum_partial(buff,len,0)); + return csum_fold(csum_partial(buff, len, 0)); } EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index b64a2bd1a1ef..9c63713477bb 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -143,43 +143,43 @@ SYM_FUNC_END(__get_user_nocheck_8) EXPORT_SYMBOL(__get_user_nocheck_8) -SYM_CODE_START_LOCAL(.Lbad_get_user_clac) +SYM_CODE_START_LOCAL(__get_user_handle_exception) ASM_CLAC .Lbad_get_user: xor %edx,%edx mov $(-EFAULT),%_ASM_AX RET -SYM_CODE_END(.Lbad_get_user_clac) +SYM_CODE_END(__get_user_handle_exception) #ifdef CONFIG_X86_32 -SYM_CODE_START_LOCAL(.Lbad_get_user_8_clac) +SYM_CODE_START_LOCAL(__get_user_8_handle_exception) ASM_CLAC bad_get_user_8: xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX RET -SYM_CODE_END(.Lbad_get_user_8_clac) +SYM_CODE_END(__get_user_8_handle_exception) #endif /* get_user */ - _ASM_EXTABLE(1b, .Lbad_get_user_clac) - _ASM_EXTABLE(2b, .Lbad_get_user_clac) - _ASM_EXTABLE(3b, .Lbad_get_user_clac) + _ASM_EXTABLE(1b, __get_user_handle_exception) + _ASM_EXTABLE(2b, __get_user_handle_exception) + _ASM_EXTABLE(3b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(4b, .Lbad_get_user_clac) + _ASM_EXTABLE(4b, __get_user_handle_exception) #else - _ASM_EXTABLE(4b, .Lbad_get_user_8_clac) - _ASM_EXTABLE(5b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(4b, __get_user_8_handle_exception) + _ASM_EXTABLE(5b, __get_user_8_handle_exception) #endif /* __get_user */ - _ASM_EXTABLE(6b, .Lbad_get_user_clac) - _ASM_EXTABLE(7b, .Lbad_get_user_clac) - _ASM_EXTABLE(8b, .Lbad_get_user_clac) + _ASM_EXTABLE(6b, __get_user_handle_exception) + _ASM_EXTABLE(7b, __get_user_handle_exception) + _ASM_EXTABLE(8b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(9b, .Lbad_get_user_clac) + _ASM_EXTABLE(9b, __get_user_handle_exception) #else - _ASM_EXTABLE(9b, .Lbad_get_user_8_clac) - _ASM_EXTABLE(10b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(9b, __get_user_8_handle_exception) + _ASM_EXTABLE(10b, __get_user_8_handle_exception) #endif diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 02661861e5dd..0559b206fb11 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -38,10 +38,12 @@ SYM_FUNC_START(__memmove) cmp %rdi, %r8 jg 2f - /* FSRM implies ERMS => no length checks, do the copy directly */ +#define CHECK_LEN cmp $0x20, %rdx; jb 1f +#define MEMMOVE_BYTES movq %rdx, %rcx; rep movsb; RET .Lmemmove_begin_forward: - ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS + ALTERNATIVE_2 __stringify(CHECK_LEN), \ + __stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \ + __stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM /* * movsq instruction have many startup latency @@ -207,11 +209,6 @@ SYM_FUNC_START(__memmove) movb %r11b, (%rdi) 13: RET - -.Lmemmove_erms: - movq %rdx, %rcx - rep movsb - RET SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index b09cd2ad426c..47fd9bd6b91d 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -27,14 +27,14 @@ void msrs_free(struct msr *msrs) EXPORT_SYMBOL(msrs_free); /** - * Read an MSR with error handling - * + * msr_read - Read an MSR with error handling * @msr: MSR to read * @m: value to read into * * It returns read data only on success, otherwise it doesn't change the output * argument @m. * + * Return: %0 for success, otherwise an error code */ static int msr_read(u32 msr, struct msr *m) { @@ -49,10 +49,12 @@ static int msr_read(u32 msr, struct msr *m) } /** - * Write an MSR with error handling + * msr_write - Write an MSR with error handling * * @msr: MSR to write * @m: value to write + * + * Return: %0 for success, otherwise an error code */ static int msr_write(u32 msr, struct msr *m) { @@ -88,12 +90,14 @@ static inline int __flip_bit(u32 msr, u8 bit, bool set) } /** - * Set @bit in a MSR @msr. + * msr_set_bit - Set @bit in a MSR @msr. + * @msr: MSR to write + * @bit: bit number to set * - * Retval: - * < 0: An error was encountered. - * = 0: Bit was already set. - * > 0: Hardware accepted the MSR write. + * Return: + * * < 0: An error was encountered. + * * = 0: Bit was already set. + * * > 0: Hardware accepted the MSR write. */ int msr_set_bit(u32 msr, u8 bit) { @@ -101,12 +105,14 @@ int msr_set_bit(u32 msr, u8 bit) } /** - * Clear @bit in a MSR @msr. + * msr_clear_bit - Clear @bit in a MSR @msr. + * @msr: MSR to write + * @bit: bit number to clear * - * Retval: - * < 0: An error was encountered. - * = 0: Bit was already cleared. - * > 0: Hardware accepted the MSR write. + * Return: + * * < 0: An error was encountered. + * * = 0: Bit was already cleared. + * * > 0: Hardware accepted the MSR write. */ int msr_clear_bit(u32 msr, u8 bit) { diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 3062d09a776d..1451e0c4ae22 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -131,22 +131,22 @@ SYM_FUNC_START(__put_user_nocheck_8) SYM_FUNC_END(__put_user_nocheck_8) EXPORT_SYMBOL(__put_user_nocheck_8) -SYM_CODE_START_LOCAL(.Lbad_put_user_clac) +SYM_CODE_START_LOCAL(__put_user_handle_exception) ASM_CLAC .Lbad_put_user: movl $-EFAULT,%ecx RET -SYM_CODE_END(.Lbad_put_user_clac) +SYM_CODE_END(__put_user_handle_exception) - _ASM_EXTABLE(1b, .Lbad_put_user_clac) - _ASM_EXTABLE(2b, .Lbad_put_user_clac) - _ASM_EXTABLE(3b, .Lbad_put_user_clac) - _ASM_EXTABLE(4b, .Lbad_put_user_clac) - _ASM_EXTABLE(5b, .Lbad_put_user_clac) - _ASM_EXTABLE(6b, .Lbad_put_user_clac) - _ASM_EXTABLE(7b, .Lbad_put_user_clac) - _ASM_EXTABLE(9b, .Lbad_put_user_clac) + _ASM_EXTABLE(1b, __put_user_handle_exception) + _ASM_EXTABLE(2b, __put_user_handle_exception) + _ASM_EXTABLE(3b, __put_user_handle_exception) + _ASM_EXTABLE(4b, __put_user_handle_exception) + _ASM_EXTABLE(5b, __put_user_handle_exception) + _ASM_EXTABLE(6b, __put_user_handle_exception) + _ASM_EXTABLE(7b, __put_user_handle_exception) + _ASM_EXTABLE(9b, __put_user_handle_exception) #ifdef CONFIG_X86_32 - _ASM_EXTABLE(8b, .Lbad_put_user_clac) - _ASM_EXTABLE(10b, .Lbad_put_user_clac) + _ASM_EXTABLE(8b, __put_user_handle_exception) + _ASM_EXTABLE(10b, __put_user_handle_exception) #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b3b1e376dce8..3fd066d42ec0 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -143,7 +143,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) * from re-poisioning the BTB prediction. */ .align 64 - .skip 63, 0xcc + .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR /* diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 003d90138e20..e9251b89a9e9 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -9,6 +9,7 @@ #include <linux/export.h> #include <linux/uaccess.h> #include <linux/highmem.h> +#include <linux/libnvdimm.h> /* * Zero Userspace |