diff options
author | Uros Bizjak <[email protected]> | 2018-06-05 18:39:09 +0200 |
---|---|---|
committer | Ingo Molnar <[email protected]> | 2018-06-21 15:21:47 +0200 |
commit | 1966c5e5bd9802cf62d3744ef4d2d6d32e22604d (patch) | |
tree | 926b83555049513976c105f3a85af46e9dfcc87e | |
parent | 1abd8a8f39cd9a2925149000056494523c85643a (diff) |
x86/asm: Use CC_SET/CC_OUT in percpu_cmpxchg8b_double() to micro-optimize code generation
Use CC_SET(z)/CC_OUT(z) instead of explicit SETZ instruction.
Using these two defines, the compiler that supports generation of
condition code outputs from inline assembly flags generates e.g.:
cmpxchg8b %fs:(%esi)
jne 172255 <__kmalloc+0x65>
instead of:
cmpxchg8b %fs:(%esi)
sete %al
test %al,%al
je 172255 <__kmalloc+0x65>
Note that older compilers now generate:
cmpxchg8b %fs:(%esi)
sete %cl
test %cl,%cl
je 173a85 <__kmalloc+0x65>
since we have to mark that cmpxchg8b instruction outputs to %eax
register and this way clobbers the value in the register.
Signed-off-by: Uros Bizjak <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/lkml/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
-rw-r--r-- | arch/x86/include/asm/percpu.h | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index a06b07399d17..e9202a0de8f0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -450,9 +450,10 @@ do { \ bool __ret; \ typeof(pcp1) __o1 = (o1), __n1 = (n1); \ typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ - : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \ - : "b" (__n1), "c" (__n2), "a" (__o1)); \ + asm volatile("cmpxchg8b "__percpu_arg(1) \ + CC_SET(z) \ + : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \ + : "b" (__n1), "c" (__n2)); \ __ret; \ }) |