From 6dd7a82cc54ebd2936763befd3dcd4beb727a704 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 1 Jul 2016 08:19:45 +1000 Subject: crypto: powerpc - Add POWER8 optimised crc32c Use the vector polynomial multiply-sum instructions in POWER8 to speed up crc32c. This is just over 41x faster than the slice-by-8 method that it replaces. Measurements on a 4.1 GHz POWER8 show it sustaining 52 GiB/sec. A simple btrfs write performance test: dd if=/dev/zero of=/mnt/tmpfile bs=1M count=4096 sync is over 3.7x faster. Signed-off-by: Anton Blanchard Signed-off-by: Herbert Xu --- arch/powerpc/include/asm/ppc-opcode.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/powerpc/include/asm/ppc-opcode.h') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 1d035c1cc889..49cd8760aa7c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -174,6 +174,8 @@ #define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1fffff #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1fffff +#define PPC_INST_MFVSRD 0x7c000066 +#define PPC_INST_MTVSRD 0x7c000166 #define PPC_INST_SLBFEE 0x7c0007a7 #define PPC_INST_STRING 0x7c00042a @@ -188,6 +190,8 @@ #define PPC_INST_WAIT 0x7c00007c #define PPC_INST_TLBIVAX 0x7c000624 #define PPC_INST_TLBSRX_DOT 0x7c0006a5 +#define PPC_INST_VPMSUMW 0x10000488 +#define PPC_INST_VPMSUMD 0x100004c8 #define PPC_INST_XXLOR 0xf0000510 #define PPC_INST_XXSWAPD 0xf0000250 #define PPC_INST_XVCPSGNDP 0xf0000780 @@ -359,6 +363,14 @@ VSX_XX1((s), a, b)) #define LXVD2X(s, a, b) stringify_in_c(.long PPC_INST_LXVD2X | \ VSX_XX1((s), a, b)) +#define MFVRD(a, t) stringify_in_c(.long PPC_INST_MFVSRD | \ + VSX_XX1((t)+32, a, R0)) +#define MTVRD(t, a) stringify_in_c(.long PPC_INST_MTVSRD | \ + VSX_XX1((t)+32, a, R0)) +#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMW | \ + VSX_XX3((t), a, b)) +#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMD | \ + VSX_XX3((t), a, b)) #define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \ VSX_XX3((t), a, b)) #define XXSWAPD(t, a) stringify_in_c(.long PPC_INST_XXSWAPD | \ -- cgit