diff options
Diffstat (limited to 'arch/powerpc/lib/memcmp_64.S')
| -rw-r--r-- | arch/powerpc/lib/memcmp_64.S | 233 | 
1 files changed, 233 insertions, 0 deletions
| diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S new file mode 100644 index 000000000000..8953d2382a65 --- /dev/null +++ b/arch/powerpc/lib/memcmp_64.S @@ -0,0 +1,233 @@ +/* + * Author: Anton Blanchard <[email protected]> + * Copyright 2015 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/ppc_asm.h> + +#define off8	r6 +#define off16	r7 +#define off24	r8 + +#define rA	r9 +#define rB	r10 +#define rC	r11 +#define rD	r27 +#define rE	r28 +#define rF	r29 +#define rG	r30 +#define rH	r31 + +#ifdef __LITTLE_ENDIAN__ +#define LD	ldbrx +#else +#define LD	ldx +#endif + +_GLOBAL(memcmp) +	cmpdi	cr1,r5,0 + +	/* Use the short loop if both strings are not 8B aligned */ +	or	r6,r3,r4 +	andi.	r6,r6,7 + +	/* Use the short loop if length is less than 32B */ +	cmpdi	cr6,r5,31 + +	beq	cr1,.Lzero +	bne	.Lshort +	bgt	cr6,.Llong + +.Lshort: +	mtctr	r5 + +1:	lbz	rA,0(r3) +	lbz	rB,0(r4) +	subf.	rC,rB,rA +	bne	.Lnon_zero +	bdz	.Lzero + +	lbz	rA,1(r3) +	lbz	rB,1(r4) +	subf.	rC,rB,rA +	bne	.Lnon_zero +	bdz	.Lzero + +	lbz	rA,2(r3) +	lbz	rB,2(r4) +	subf.	rC,rB,rA +	bne	.Lnon_zero +	bdz	.Lzero + +	lbz	rA,3(r3) +	lbz	rB,3(r4) +	subf.	rC,rB,rA +	bne	.Lnon_zero + +	addi	r3,r3,4 +	addi	r4,r4,4 + +	bdnz	1b + +.Lzero: +	li	r3,0 +	blr + +.Lnon_zero: +	mr	r3,rC +	blr + +.Llong: +	li	off8,8 +	li	off16,16 +	li	off24,24 + +	std	r31,-8(r1) +	std	r30,-16(r1) +	std	r29,-24(r1) +	std	r28,-32(r1) +	std	r27,-40(r1) + +	srdi	r0,r5,5 +	mtctr	r0 +	andi.	r5,r5,31 + +	LD	rA,0,r3 +	LD	rB,0,r4 + +	LD	rC,off8,r3 +	LD	rD,off8,r4 + +	LD	rE,off16,r3 +	LD	rF,off16,r4 + +	LD	rG,off24,r3 +	LD	rH,off24,r4 +	cmpld	cr0,rA,rB + +	addi	r3,r3,32 +	addi	r4,r4,32 + +	bdz	.Lfirst32 + +	LD	rA,0,r3 +	LD	rB,0,r4 +	cmpld	cr1,rC,rD + +	LD	rC,off8,r3 +	LD	rD,off8,r4 +	cmpld	cr6,rE,rF + +	LD	rE,off16,r3 +	LD	rF,off16,r4 +	cmpld	cr7,rG,rH +	bne	cr0,.LcmpAB + +	LD	rG,off24,r3 +	LD	rH,off24,r4 +	cmpld	cr0,rA,rB +	bne	cr1,.LcmpCD + +	addi	r3,r3,32 +	addi	r4,r4,32 + +	bdz	.Lsecond32 + +	.balign	16 + +1:	LD	rA,0,r3 +	LD	rB,0,r4 +	cmpld	cr1,rC,rD +	bne	cr6,.LcmpEF + +	LD	rC,off8,r3 +	LD	rD,off8,r4 +	cmpld	cr6,rE,rF +	bne	cr7,.LcmpGH + +	LD	rE,off16,r3 +	LD	rF,off16,r4 +	cmpld	cr7,rG,rH +	bne	cr0,.LcmpAB + +	LD	rG,off24,r3 +	LD	rH,off24,r4 +	cmpld	cr0,rA,rB +	bne	cr1,.LcmpCD + +	addi	r3,r3,32 +	addi	r4,r4,32 + +	bdnz	1b + +.Lsecond32: +	cmpld	cr1,rC,rD +	bne	cr6,.LcmpEF + +	cmpld	cr6,rE,rF +	bne	cr7,.LcmpGH + +	cmpld	cr7,rG,rH +	bne	cr0,.LcmpAB + +	bne	cr1,.LcmpCD +	bne	cr6,.LcmpEF +	bne	cr7,.LcmpGH + +.Ltail: +	ld	r31,-8(r1) +	ld	r30,-16(r1) +	ld	r29,-24(r1) +	ld	r28,-32(r1) +	ld	r27,-40(r1) + +	cmpdi	r5,0 +	beq	.Lzero +	b	.Lshort + +.Lfirst32: +	cmpld	cr1,rC,rD +	cmpld	cr6,rE,rF +	cmpld	cr7,rG,rH + +	bne	cr0,.LcmpAB +	bne	cr1,.LcmpCD +	bne	cr6,.LcmpEF +	bne	cr7,.LcmpGH + +	b	.Ltail + +.LcmpAB: +	li	r3,1 +	bgt	cr0,.Lout +	li	r3,-1 +	b	.Lout + +.LcmpCD: +	li	r3,1 +	bgt	cr1,.Lout +	li	r3,-1 +	b	.Lout + +.LcmpEF: +	li	r3,1 +	bgt	cr6,.Lout +	li	r3,-1 +	b	.Lout + +.LcmpGH: +	li	r3,1 +	bgt	cr7,.Lout +	li	r3,-1 + +.Lout: +	ld	r31,-8(r1) +	ld	r30,-16(r1) +	ld	r29,-24(r1) +	ld	r28,-32(r1) +	ld	r27,-40(r1) +	blr |