aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/crypto/devel-algos.rst2
-rw-r--r--Documentation/crypto/userspace-if.rst15
-rw-r--r--Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml127
-rw-r--r--Documentation/devicetree/bindings/crypto/rockchip-crypto.txt28
-rw-r--r--Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml4
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/arm/crypto/Kconfig2
-rw-r--r--arch/arm64/crypto/Kconfig49
-rw-r--r--arch/arm64/crypto/Makefile9
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sm3-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sm3-neon-core.S600
-rw-r--r--arch/arm64/crypto/sm3-neon-glue.c103
-rw-r--r--arch/arm64/crypto/sm4-ce-asm.h209
-rw-r--r--arch/arm64/crypto/sm4-ce-ccm-core.S328
-rw-r--r--arch/arm64/crypto/sm4-ce-ccm-glue.c303
-rw-r--r--arch/arm64/crypto/sm4-ce-core.S1205
-rw-r--r--arch/arm64/crypto/sm4-ce-gcm-core.S741
-rw-r--r--arch/arm64/crypto/sm4-ce-gcm-glue.c286
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c575
-rw-r--r--arch/arm64/crypto/sm4-ce.h16
-rw-r--r--arch/arm64/crypto/sm4-neon-core.S630
-rw-r--r--arch/arm64/crypto/sm4-neon-glue.c172
-rw-r--r--arch/um/drivers/random.c1
-rw-r--r--arch/x86/crypto/polyval-clmulni_glue.c19
-rw-r--r--crypto/Kconfig9
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/af_alg.c135
-rw-r--r--crypto/ccm.c2
-rw-r--r--crypto/cryptd.c36
-rw-r--r--crypto/skcipher.c2
-rw-r--r--crypto/tcrypt.c308
-rw-r--r--crypto/testmgr.c19
-rw-r--r--crypto/testmgr.h977
-rw-r--r--drivers/char/hw_random/cavium-rng-vf.c1
-rw-r--r--drivers/char/hw_random/cn10k-rng.c1
-rw-r--r--drivers/char/hw_random/core.c9
-rw-r--r--drivers/char/hw_random/mpfs-rng.c1
-rw-r--r--drivers/char/hw_random/mtk-rng.c5
-rw-r--r--drivers/char/hw_random/npcm-rng.c15
-rw-r--r--drivers/char/hw_random/s390-trng.c1
-rw-r--r--drivers/char/hw_random/stm32-rng.c8
-rw-r--r--drivers/char/hw_random/timeriomem-rng.c2
-rw-r--r--drivers/char/hw_random/virtio-rng.c1
-rw-r--r--drivers/crypto/Kconfig15
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c1
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c2
-rw-r--r--drivers/crypto/atmel-sha204a.c1
-rw-r--r--drivers/crypto/caam/caamrng.c1
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_mbx.c1
-rw-r--r--drivers/crypto/ccp/ccp-crypto-main.c11
-rw-r--r--drivers/crypto/ccp/sp-pci.c11
-rw-r--r--drivers/crypto/ccree/cc_debugfs.c2
-rw-r--r--drivers/crypto/ccree/cc_driver.c10
-rw-r--r--drivers/crypto/chelsio/Kconfig2
-rw-r--r--drivers/crypto/hisilicon/Makefile2
-rw-r--r--drivers/crypto/hisilicon/debugfs.c1147
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_main.c17
-rw-r--r--drivers/crypto/hisilicon/qm.c1217
-rw-r--r--drivers/crypto/hisilicon/qm_common.h87
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_crypto.c5
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c10
-rw-r--r--drivers/crypto/hisilicon/zip/zip_main.c7
-rw-r--r--drivers/crypto/inside-secure/safexcel.c69
-rw-r--r--drivers/crypto/inside-secure/safexcel.h10
-rw-r--r--drivers/crypto/ixp4xx_crypto.c10
-rw-r--r--drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h2
-rw-r--r--drivers/crypto/n2_core.c6
-rw-r--r--drivers/crypto/qat/qat_c3xxxvf/adf_drv.c2
-rw-r--r--drivers/crypto/qat/qat_c62xvf/adf_drv.c2
-rw-r--r--drivers/crypto/qat/qat_dh895xccvf/adf_drv.c2
-rw-r--r--drivers/crypto/rockchip/rk3288_crypto.c505
-rw-r--r--drivers/crypto/rockchip/rk3288_crypto.h107
-rw-r--r--drivers/crypto/rockchip/rk3288_crypto_ahash.c267
-rw-r--r--drivers/crypto/rockchip/rk3288_crypto_skcipher.c543
-rw-r--r--drivers/crypto/stm32/stm32-cryp.c4
-rw-r--r--drivers/crypto/talitos.c6
-rw-r--r--drivers/crypto/talitos.h4
-rw-r--r--drivers/firmware/turris-mox-rwtm.c1
-rw-r--r--drivers/s390/crypto/zcrypt_api.c6
-rw-r--r--drivers/usb/misc/chaoskey.c1
-rw-r--r--include/crypto/gcm.h22
-rw-r--r--include/crypto/internal/skcipher.h8
-rw-r--r--include/crypto/scatterwalk.h1
-rw-r--r--include/linux/hisi_acc_qm.h14
-rw-r--r--include/linux/hw_random.h2
-rw-r--r--include/uapi/linux/if_alg.h1
-rw-r--r--lib/crypto/Kconfig9
-rw-r--r--lib/crypto/Makefile5
-rw-r--r--lib/crypto/aesgcm.c727
-rw-r--r--lib/crypto/gf128mul.c (renamed from crypto/gf128mul.c)58
91 files changed, 8946 insertions, 2965 deletions
diff --git a/Documentation/crypto/devel-algos.rst b/Documentation/crypto/devel-algos.rst
index f225a953ab4b..3506899ef83e 100644
--- a/Documentation/crypto/devel-algos.rst
+++ b/Documentation/crypto/devel-algos.rst
@@ -172,7 +172,7 @@ Here are schematics of how these functions are called when operated from
other part of the kernel. Note that the .setkey() call might happen
before or after any of these schematics happen, but must not happen
during any of these are in-flight. Please note that calling .init()
-followed immediately by .finish() is also a perfectly valid
+followed immediately by .final() is also a perfectly valid
transformation.
::
diff --git a/Documentation/crypto/userspace-if.rst b/Documentation/crypto/userspace-if.rst
index b45dabbf69d6..f80f243e227e 100644
--- a/Documentation/crypto/userspace-if.rst
+++ b/Documentation/crypto/userspace-if.rst
@@ -131,9 +131,9 @@ from the kernel crypto API. If the buffer is too small for the message
digest, the flag MSG_TRUNC is set by the kernel.
In order to set a message digest key, the calling application must use
-the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC
-operation is performed without the initial HMAC state change caused by
-the key.
+the setsockopt() option of ALG_SET_KEY or ALG_SET_KEY_BY_KEY_SERIAL. If the
+key is not set the HMAC operation is performed without the initial HMAC state
+change caused by the key.
Symmetric Cipher API
--------------------
@@ -382,6 +382,15 @@ mentioned optname:
- the RNG cipher type to provide the seed
+- ALG_SET_KEY_BY_KEY_SERIAL -- Setting the key via keyring key_serial_t.
+ This operation behaves the same as ALG_SET_KEY. The decrypted
+ data is copied from a keyring key, and uses that data as the
+ key for symmetric encryption.
+
+ The passed in key_serial_t must have the KEY_(POS|USR|GRP|OTH)_SEARCH
+ permission set, otherwise -EPERM is returned. Supports key types: user,
+ logon, encrypted, and trusted.
+
- ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size for
AEAD ciphers. For a encryption operation, the authentication tag of
the given size will be generated. For a decryption operation, the
diff --git a/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
new file mode 100644
index 000000000000..f1a9da8bff7a
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/rockchip,rk3288-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip Electronics Security Accelerator
+
+maintainers:
+ - Heiko Stuebner <[email protected]>
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk3288-crypto
+ - rockchip,rk3328-crypto
+ - rockchip,rk3399-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ minItems: 3
+ maxItems: 4
+
+ clock-names:
+ minItems: 3
+ maxItems: 4
+
+ resets:
+ minItems: 1
+ maxItems: 3
+
+ reset-names:
+ minItems: 1
+ maxItems: 3
+
+allOf:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: rockchip,rk3288-crypto
+ then:
+ properties:
+ clocks:
+ minItems: 4
+ clock-names:
+ items:
+ - const: aclk
+ - const: hclk
+ - const: sclk
+ - const: apb_pclk
+ resets:
+ maxItems: 1
+ reset-names:
+ items:
+ - const: crypto-rst
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: rockchip,rk3328-crypto
+ then:
+ properties:
+ clocks:
+ maxItems: 3
+ clock-names:
+ items:
+ - const: hclk_master
+ - const: hclk_slave
+ - const: sclk
+ resets:
+ maxItems: 1
+ reset-names:
+ items:
+ - const: crypto-rst
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: rockchip,rk3399-crypto
+ then:
+ properties:
+ clocks:
+ maxItems: 3
+ clock-names:
+ items:
+ - const: hclk_master
+ - const: hclk_slave
+ - const: sclk
+ resets:
+ minItems: 3
+ reset-names:
+ items:
+ - const: master
+ - const: slave
+ - const: crypto-rst
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - reset-names
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/rk3288-cru.h>
+ crypto@ff8a0000 {
+ compatible = "rockchip,rk3288-crypto";
+ reg = <0xff8a0000 0x4000>;
+ interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>,
+ <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>;
+ clock-names = "aclk", "hclk", "sclk", "apb_pclk";
+ resets = <&cru SRST_CRYPTO>;
+ reset-names = "crypto-rst";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt b/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt
deleted file mode 100644
index 5e2ba385b8c9..000000000000
--- a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Rockchip Electronics And Security Accelerator
-
-Required properties:
-- compatible: Should be "rockchip,rk3288-crypto"
-- reg: Base physical address of the engine and length of memory mapped
- region
-- interrupts: Interrupt number
-- clocks: Reference to the clocks about crypto
-- clock-names: "aclk" used to clock data
- "hclk" used to clock data
- "sclk" used to clock crypto accelerator
- "apb_pclk" used to clock dma
-- resets: Must contain an entry for each entry in reset-names.
- See ../reset/reset.txt for details.
-- reset-names: Must include the name "crypto-rst".
-
-Examples:
-
- crypto: cypto-controller@ff8a0000 {
- compatible = "rockchip,rk3288-crypto";
- reg = <0xff8a0000 0x4000>;
- interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>,
- <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>;
- clock-names = "aclk", "hclk", "sclk", "apb_pclk";
- resets = <&cru SRST_CRYPTO>;
- reset-names = "crypto-rst";
- };
diff --git a/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml
index abd134c9d400..e8e4ab1e5b95 100644
--- a/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml
+++ b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml
@@ -16,7 +16,9 @@ maintainers:
properties:
compatible:
- const: nuvoton,npcm750-rng
+ enum:
+ - nuvoton,npcm750-rng
+ - nuvoton,npcm845-rng
reg:
maxItems: 1
diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f18502372..3489126acd1f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17753,6 +17753,13 @@ F: Documentation/ABI/*/sysfs-driver-hid-roccat*
F: drivers/hid/hid-roccat*
F: include/linux/hid-roccat*
+ROCKCHIP CRYPTO DRIVERS
+M: Corentin Labbe <[email protected]>
+S: Maintained
+F: Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
+F: drivers/crypto/rockchip/
+
ROCKCHIP I2S TDM DRIVER
M: Nicolas Frattaroli <[email protected]>
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 3858c4d4cb98..7b2b7d043d9b 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -18,7 +18,7 @@ config CRYPTO_GHASH_ARM_CE
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_CRYPTD
- select CRYPTO_GF128MUL
+ select CRYPTO_LIB_GF128MUL
help
GCM GHASH function (NIST SP800-38D)
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 8bd80508a710..6d06b448a66e 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -6,8 +6,8 @@ config CRYPTO_GHASH_ARM64_CE
tristate "Hash functions: GHASH (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
- select CRYPTO_GF128MUL
select CRYPTO_LIB_AES
+ select CRYPTO_LIB_GF128MUL
select CRYPTO_AEAD
help
GCM GHASH function (NIST SP800-38D)
@@ -96,6 +96,17 @@ config CRYPTO_SHA3_ARM64
Architecture: arm64 using:
- ARMv8.2 Crypto Extensions
+config CRYPTO_SM3_NEON
+ tristate "Hash functions: SM3 (NEON)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_HASH
+ select CRYPTO_SM3
+ help
+ SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
+
+ Architecture: arm64 using:
+ - NEON (Advanced SIMD) extensions
+
config CRYPTO_SM3_ARM64_CE
tristate "Hash functions: SM3 (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
@@ -220,7 +231,7 @@ config CRYPTO_SM4_ARM64_CE
- NEON (Advanced SIMD) extensions
config CRYPTO_SM4_ARM64_CE_BLK
- tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR (ARMv8 Crypto Extensions)"
+ tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR/XTS (ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
select CRYPTO_SM4
@@ -231,6 +242,8 @@ config CRYPTO_SM4_ARM64_CE_BLK
- CBC (Cipher Block Chaining) mode (NIST SP800-38A)
- CFB (Cipher Feedback) mode (NIST SP800-38A)
- CTR (Counter) mode (NIST SP800-38A)
+ - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
+ and IEEE 1619)
Architecture: arm64 using:
- ARMv8 Crypto Extensions
@@ -268,6 +281,38 @@ config CRYPTO_AES_ARM64_CE_CCM
- ARMv8 Crypto Extensions
- NEON (Advanced SIMD) extensions
+config CRYPTO_SM4_ARM64_CE_CCM
+ tristate "AEAD cipher: SM4 in CCM mode (ARMv8 Crypto Extensions)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_AEAD
+ select CRYPTO_SM4
+ select CRYPTO_SM4_ARM64_CE_BLK
+ help
+ AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with
+ CCM (Counter with Cipher Block Chaining-Message Authentication Code)
+ authenticated encryption mode (NIST SP800-38C)
+
+ Architecture: arm64 using:
+ - ARMv8 Crypto Extensions
+ - NEON (Advanced SIMD) extensions
+
+config CRYPTO_SM4_ARM64_CE_GCM
+ tristate "AEAD cipher: SM4 in GCM mode (ARMv8 Crypto Extensions)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_AEAD
+ select CRYPTO_SM4
+ select CRYPTO_SM4_ARM64_CE_BLK
+ help
+ AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with
+ GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
+
+ Architecture: arm64 using:
+ - ARMv8 Crypto Extensions
+ - PMULL (Polynomial Multiply Long) instructions
+ - NEON (Advanced SIMD) extensions
+
config CRYPTO_CRCT10DIF_ARM64_CE
tristate "CRCT10DIF (PMULL)"
depends on KERNEL_MODE_NEON && CRC_T10DIF
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 24bb0c4610de..4818e204c2ac 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -17,6 +17,9 @@ sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o
obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
+obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o
+sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o
+
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
@@ -26,6 +29,12 @@ sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o
sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
+obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o
+sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o
+
+obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_GCM) += sm4-ce-gcm.o
+sm4-ce-gcm-y := sm4-ce-gcm-glue.o sm4-ce-gcm-core.o
+
obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 15794fe21a0b..e5e9adc1fcf4 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -508,7 +508,7 @@ static void __exit ghash_ce_mod_exit(void)
crypto_unregister_shash(&ghash_alg);
}
-static const struct cpu_feature ghash_cpu_feature[] = {
+static const struct cpu_feature __maybe_unused ghash_cpu_feature[] = {
{ cpu_feature(PMULL) }, { }
};
MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index ee98954ae8ca..54bf6ebcfffb 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -84,7 +84,7 @@ static struct shash_alg sm3_alg = {
.base.cra_driver_name = "sm3-ce",
.base.cra_blocksize = SM3_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
- .base.cra_priority = 200,
+ .base.cra_priority = 400,
};
static int __init sm3_ce_mod_init(void)
diff --git a/arch/arm64/crypto/sm3-neon-core.S b/arch/arm64/crypto/sm3-neon-core.S
new file mode 100644
index 000000000000..3e3b4e5c736f
--- /dev/null
+++ b/arch/arm64/crypto/sm3-neon-core.S
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * sm3-neon-core.S - SM3 secure hash using NEON instructions
+ *
+ * Linux/arm64 port of the libgcrypt SM3 implementation for AArch64
+ *
+ * Copyright (C) 2021 Jussi Kivilinna <[email protected]>
+ * Copyright (c) 2022 Tianjia Zhang <[email protected]>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+#define state_h5 20
+#define state_h6 24
+#define state_h7 28
+
+/* Stack structure */
+
+#define STACK_W_SIZE (32 * 2 * 3)
+
+#define STACK_W (0)
+#define STACK_SIZE (STACK_W + STACK_W_SIZE)
+
+/* Register macros */
+
+#define RSTATE x0
+#define RDATA x1
+#define RNBLKS x2
+#define RKPTR x28
+#define RFRAME x29
+
+#define ra w3
+#define rb w4
+#define rc w5
+#define rd w6
+#define re w7
+#define rf w8
+#define rg w9
+#define rh w10
+
+#define t0 w11
+#define t1 w12
+#define t2 w13
+#define t3 w14
+#define t4 w15
+#define t5 w16
+#define t6 w17
+
+#define k_even w19
+#define k_odd w20
+
+#define addr0 x21
+#define addr1 x22
+
+#define s0 w23
+#define s1 w24
+#define s2 w25
+#define s3 w26
+
+#define W0 v0
+#define W1 v1
+#define W2 v2
+#define W3 v3
+#define W4 v4
+#define W5 v5
+
+#define XTMP0 v6
+#define XTMP1 v7
+#define XTMP2 v16
+#define XTMP3 v17
+#define XTMP4 v18
+#define XTMP5 v19
+#define XTMP6 v20
+
+/* Helper macros. */
+
+#define _(...) /*_*/
+
+#define clear_vec(x) \
+ movi x.8h, #0;
+
+#define rolw(o, a, n) \
+ ror o, a, #(32 - n);
+
+/* Round function macros. */
+
+#define GG1_1(x, y, z, o, t) \
+ eor o, x, y;
+#define GG1_2(x, y, z, o, t) \
+ eor o, o, z;
+#define GG1_3(x, y, z, o, t)
+
+#define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t)
+#define FF1_2(x, y, z, o, t)
+#define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t)
+
+#define GG2_1(x, y, z, o, t) \
+ bic o, z, x;
+#define GG2_2(x, y, z, o, t) \
+ and t, y, x;
+#define GG2_3(x, y, z, o, t) \
+ eor o, o, t;
+
+#define FF2_1(x, y, z, o, t) \
+ eor o, x, y;
+#define FF2_2(x, y, z, o, t) \
+ and t, x, y; \
+ and o, o, z;
+#define FF2_3(x, y, z, o, t) \
+ eor o, o, t;
+
+#define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
+ K_LOAD(round); \
+ ldr t5, [sp, #(wtype##_W1_ADDR(round, widx))]; \
+ rolw(t0, a, 12); /* rol(a, 12) => t0 */ \
+ IOP(1, iop_param); \
+ FF##i##_1(a, b, c, t1, t2); \
+ ldr t6, [sp, #(wtype##_W1W2_ADDR(round, widx))]; \
+ add k, k, e; \
+ IOP(2, iop_param); \
+ GG##i##_1(e, f, g, t3, t4); \
+ FF##i##_2(a, b, c, t1, t2); \
+ IOP(3, iop_param); \
+ add k, k, t0; \
+ add h, h, t5; \
+ add d, d, t6; /* w1w2 + d => d */ \
+ IOP(4, iop_param); \
+ rolw(k, k, 7); /* rol (t0 + e + t), 7) => k */ \
+ GG##i##_2(e, f, g, t3, t4); \
+ add h, h, k; /* h + w1 + k => h */ \
+ IOP(5, iop_param); \
+ FF##i##_3(a, b, c, t1, t2); \
+ eor t0, t0, k; /* k ^ t0 => t0 */ \
+ GG##i##_3(e, f, g, t3, t4); \
+ add d, d, t1; /* FF(a,b,c) + d => d */ \
+ IOP(6, iop_param); \
+ add t3, t3, h; /* GG(e,f,g) + h => t3 */ \
+ rolw(b, b, 9); /* rol(b, 9) => b */ \
+ eor h, t3, t3, ror #(32-9); \
+ IOP(7, iop_param); \
+ add d, d, t0; /* t0 + d => d */ \
+ rolw(f, f, 19); /* rol(f, 19) => f */ \
+ IOP(8, iop_param); \
+ eor h, h, t3, ror #(32-17); /* P0(t3) => h */
+
+#define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
+ R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
+
+#define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
+ R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
+
+#define KL(round) \
+ ldp k_even, k_odd, [RKPTR, #(4*(round))];
+
+/* Input expansion macros. */
+
+/* Byte-swapped input address. */
+#define IW_W_ADDR(round, widx, offs) \
+ (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))
+
+/* Expanded input address. */
+#define XW_W_ADDR(round, widx, offs) \
+ (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))
+
+/* Rounds 1-12, byte-swapped input block addresses. */
+#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 32)
+#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 48)
+
+/* Rounds 1-12, expanded input block addresses. */
+#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0)
+#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 16)
+
+/* Input block loading.
+ * Interleaving within round function needed for in-order CPUs. */
+#define LOAD_W_VEC_1_1() \
+ add addr0, sp, #IW_W1_ADDR(0, 0);
+#define LOAD_W_VEC_1_2() \
+ add addr1, sp, #IW_W1_ADDR(4, 0);
+#define LOAD_W_VEC_1_3() \
+ ld1 {W0.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_4() \
+ ld1 {W1.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_5() \
+ ld1 {W2.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_6() \
+ ld1 {W3.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_7() \
+ rev32 XTMP0.16b, W0.16b;
+#define LOAD_W_VEC_1_8() \
+ rev32 XTMP1.16b, W1.16b;
+#define LOAD_W_VEC_2_1() \
+ rev32 XTMP2.16b, W2.16b;
+#define LOAD_W_VEC_2_2() \
+ rev32 XTMP3.16b, W3.16b;
+#define LOAD_W_VEC_2_3() \
+ eor XTMP4.16b, XTMP1.16b, XTMP0.16b;
+#define LOAD_W_VEC_2_4() \
+ eor XTMP5.16b, XTMP2.16b, XTMP1.16b;
+#define LOAD_W_VEC_2_5() \
+ st1 {XTMP0.16b}, [addr0], #16;
+#define LOAD_W_VEC_2_6() \
+ st1 {XTMP4.16b}, [addr0]; \
+ add addr0, sp, #IW_W1_ADDR(8, 0);
+#define LOAD_W_VEC_2_7() \
+ eor XTMP6.16b, XTMP3.16b, XTMP2.16b;
+#define LOAD_W_VEC_2_8() \
+ ext W0.16b, XTMP0.16b, XTMP0.16b, #8; /* W0: xx, w0, xx, xx */
+#define LOAD_W_VEC_3_1() \
+ mov W2.16b, XTMP1.16b; /* W2: xx, w6, w5, w4 */
+#define LOAD_W_VEC_3_2() \
+ st1 {XTMP1.16b}, [addr1], #16;
+#define LOAD_W_VEC_3_3() \
+ st1 {XTMP5.16b}, [addr1]; \
+ ext W1.16b, XTMP0.16b, XTMP0.16b, #4; /* W1: xx, w3, w2, w1 */
+#define LOAD_W_VEC_3_4() \
+ ext W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */
+#define LOAD_W_VEC_3_5() \
+ ext W4.16b, XTMP2.16b, XTMP3.16b, #8; /* W4: xx, w12, w11, w10 */
+#define LOAD_W_VEC_3_6() \
+ st1 {XTMP2.16b}, [addr0], #16;
+#define LOAD_W_VEC_3_7() \
+ st1 {XTMP6.16b}, [addr0];
+#define LOAD_W_VEC_3_8() \
+ ext W5.16b, XTMP3.16b, XTMP3.16b, #4; /* W5: xx, w15, w14, w13 */
+
+#define LOAD_W_VEC_1(iop_num, ...) \
+ LOAD_W_VEC_1_##iop_num()
+#define LOAD_W_VEC_2(iop_num, ...) \
+ LOAD_W_VEC_2_##iop_num()
+#define LOAD_W_VEC_3(iop_num, ...) \
+ LOAD_W_VEC_3_##iop_num()
+
+/* Message scheduling. Note: 3 words per vector register.
+ * Interleaving within round function needed for in-order CPUs. */
+#define SCHED_W_1_1(round, w0, w1, w2, w3, w4, w5) \
+ /* Load (w[i - 16]) => XTMP0 */ \
+ /* Load (w[i - 13]) => XTMP5 */ \
+ ext XTMP0.16b, w0.16b, w0.16b, #12; /* XTMP0: w0, xx, xx, xx */
+#define SCHED_W_1_2(round, w0, w1, w2, w3, w4, w5) \
+ ext XTMP5.16b, w1.16b, w1.16b, #12;
+#define SCHED_W_1_3(round, w0, w1, w2, w3, w4, w5) \
+ ext XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */
+#define SCHED_W_1_4(round, w0, w1, w2, w3, w4, w5) \
+ ext XTMP5.16b, XTMP5.16b, w2.16b, #12;
+#define SCHED_W_1_5(round, w0, w1, w2, w3, w4, w5) \
+ /* w[i - 9] == w3 */ \
+ /* W3 ^ XTMP0 => XTMP0 */ \
+ eor XTMP0.16b, XTMP0.16b, w3.16b;
+#define SCHED_W_1_6(round, w0, w1, w2, w3, w4, w5) \
+ /* w[i - 3] == w5 */ \
+ /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \
+ /* rol(XTMP5, 7) => XTMP1 */ \
+ add addr0, sp, #XW_W1_ADDR((round), 0); \
+ shl XTMP2.4s, w5.4s, #15;
+#define SCHED_W_1_7(round, w0, w1, w2, w3, w4, w5) \
+ shl XTMP1.4s, XTMP5.4s, #7;
+#define SCHED_W_1_8(round, w0, w1, w2, w3, w4, w5) \
+ sri XTMP2.4s, w5.4s, #(32-15);
+#define SCHED_W_2_1(round, w0, w1, w2, w3, w4, w5) \
+ sri XTMP1.4s, XTMP5.4s, #(32-7);
+#define SCHED_W_2_2(round, w0, w1, w2, w3, w4, w5) \
+ eor XTMP0.16b, XTMP0.16b, XTMP2.16b;
+#define SCHED_W_2_3(round, w0, w1, w2, w3, w4, w5) \
+ /* w[i - 6] == W4 */ \
+ /* W4 ^ XTMP1 => XTMP1 */ \
+ eor XTMP1.16b, XTMP1.16b, w4.16b;
+#define SCHED_W_2_4(round, w0, w1, w2, w3, w4, w5) \
+ /* P1(XTMP0) ^ XTMP1 => W0 */ \
+ shl XTMP3.4s, XTMP0.4s, #15;
+#define SCHED_W_2_5(round, w0, w1, w2, w3, w4, w5) \
+ shl XTMP4.4s, XTMP0.4s, #23;
+#define SCHED_W_2_6(round, w0, w1, w2, w3, w4, w5) \
+ eor w0.16b, XTMP1.16b, XTMP0.16b;
+#define SCHED_W_2_7(round, w0, w1, w2, w3, w4, w5) \
+ sri XTMP3.4s, XTMP0.4s, #(32-15);
+#define SCHED_W_2_8(round, w0, w1, w2, w3, w4, w5) \
+ sri XTMP4.4s, XTMP0.4s, #(32-23);
+#define SCHED_W_3_1(round, w0, w1, w2, w3, w4, w5) \
+ eor w0.16b, w0.16b, XTMP3.16b;
+#define SCHED_W_3_2(round, w0, w1, w2, w3, w4, w5) \
+ /* Load (w[i - 3]) => XTMP2 */ \
+ ext XTMP2.16b, w4.16b, w4.16b, #12;
+#define SCHED_W_3_3(round, w0, w1, w2, w3, w4, w5) \
+ eor w0.16b, w0.16b, XTMP4.16b;
+#define SCHED_W_3_4(round, w0, w1, w2, w3, w4, w5) \
+ ext XTMP2.16b, XTMP2.16b, w5.16b, #12;
+#define SCHED_W_3_5(round, w0, w1, w2, w3, w4, w5) \
+ /* W1 ^ W2 => XTMP3 */ \
+ eor XTMP3.16b, XTMP2.16b, w0.16b;
+#define SCHED_W_3_6(round, w0, w1, w2, w3, w4, w5)
+#define SCHED_W_3_7(round, w0, w1, w2, w3, w4, w5) \
+ st1 {XTMP2.16b-XTMP3.16b}, [addr0];
+#define SCHED_W_3_8(round, w0, w1, w2, w3, w4, w5)
+
+#define SCHED_W_W0W1W2W3W4W5_1(iop_num, round) \
+ SCHED_W_1_##iop_num(round, W0, W1, W2, W3, W4, W5)
+#define SCHED_W_W0W1W2W3W4W5_2(iop_num, round) \
+ SCHED_W_2_##iop_num(round, W0, W1, W2, W3, W4, W5)
+#define SCHED_W_W0W1W2W3W4W5_3(iop_num, round) \
+ SCHED_W_3_##iop_num(round, W0, W1, W2, W3, W4, W5)
+
+#define SCHED_W_W1W2W3W4W5W0_1(iop_num, round) \
+ SCHED_W_1_##iop_num(round, W1, W2, W3, W4, W5, W0)
+#define SCHED_W_W1W2W3W4W5W0_2(iop_num, round) \
+ SCHED_W_2_##iop_num(round, W1, W2, W3, W4, W5, W0)
+#define SCHED_W_W1W2W3W4W5W0_3(iop_num, round) \
+ SCHED_W_3_##iop_num(round, W1, W2, W3, W4, W5, W0)
+
+#define SCHED_W_W2W3W4W5W0W1_1(iop_num, round) \
+ SCHED_W_1_##iop_num(round, W2, W3, W4, W5, W0, W1)
+#define SCHED_W_W2W3W4W5W0W1_2(iop_num, round) \
+ SCHED_W_2_##iop_num(round, W2, W3, W4, W5, W0, W1)
+#define SCHED_W_W2W3W4W5W0W1_3(iop_num, round) \
+ SCHED_W_3_##iop_num(round, W2, W3, W4, W5, W0, W1)
+
+#define SCHED_W_W3W4W5W0W1W2_1(iop_num, round) \
+ SCHED_W_1_##iop_num(round, W3, W4, W5, W0, W1, W2)
+#define SCHED_W_W3W4W5W0W1W2_2(iop_num, round) \
+ SCHED_W_2_##iop_num(round, W3, W4, W5, W0, W1, W2)
+#define SCHED_W_W3W4W5W0W1W2_3(iop_num, round) \
+ SCHED_W_3_##iop_num(round, W3, W4, W5, W0, W1, W2)
+
+#define SCHED_W_W4W5W0W1W2W3_1(iop_num, round) \
+ SCHED_W_1_##iop_num(round, W4, W5, W0, W1, W2, W3)
+#define SCHED_W_W4W5W0W1W2W3_2(iop_num, round) \
+ SCHED_W_2_##iop_num(round, W4, W5, W0, W1, W2, W3)
+#define SCHED_W_W4W5W0W1W2W3_3(iop_num, round) \
+ SCHED_W_3_##iop_num(round, W4, W5, W0, W1, W2, W3)
+
+#define SCHED_W_W5W0W1W2W3W4_1(iop_num, round) \
+ SCHED_W_1_##iop_num(round, W5, W0, W1, W2, W3, W4)
+#define SCHED_W_W5W0W1W2W3W4_2(iop_num, round) \
+ SCHED_W_2_##iop_num(round, W5, W0, W1, W2, W3, W4)
+#define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \
+ SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4)
+
+
+ /*
+ * Transform blocks*64 bytes (blocks*16 32-bit words) at 'src'.
+ *
+ * void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
+ * int blocks)
+ */
+ .text
+.align 3
+SYM_FUNC_START(sm3_neon_transform)
+ ldp ra, rb, [RSTATE, #0]
+ ldp rc, rd, [RSTATE, #8]
+ ldp re, rf, [RSTATE, #16]
+ ldp rg, rh, [RSTATE, #24]
+
+ stp x28, x29, [sp, #-16]!
+ stp x19, x20, [sp, #-16]!
+ stp x21, x22, [sp, #-16]!
+ stp x23, x24, [sp, #-16]!
+ stp x25, x26, [sp, #-16]!
+ mov RFRAME, sp
+
+ sub addr0, sp, #STACK_SIZE
+ adr_l RKPTR, .LKtable
+ and sp, addr0, #(~63)
+
+ /* Preload first block. */
+ LOAD_W_VEC_1(1, 0)
+ LOAD_W_VEC_1(2, 0)
+ LOAD_W_VEC_1(3, 0)
+ LOAD_W_VEC_1(4, 0)
+ LOAD_W_VEC_1(5, 0)
+ LOAD_W_VEC_1(6, 0)
+ LOAD_W_VEC_1(7, 0)
+ LOAD_W_VEC_1(8, 0)
+ LOAD_W_VEC_2(1, 0)
+ LOAD_W_VEC_2(2, 0)
+ LOAD_W_VEC_2(3, 0)
+ LOAD_W_VEC_2(4, 0)
+ LOAD_W_VEC_2(5, 0)
+ LOAD_W_VEC_2(6, 0)
+ LOAD_W_VEC_2(7, 0)
+ LOAD_W_VEC_2(8, 0)
+ LOAD_W_VEC_3(1, 0)
+ LOAD_W_VEC_3(2, 0)
+ LOAD_W_VEC_3(3, 0)
+ LOAD_W_VEC_3(4, 0)
+ LOAD_W_VEC_3(5, 0)
+ LOAD_W_VEC_3(6, 0)
+ LOAD_W_VEC_3(7, 0)
+ LOAD_W_VEC_3(8, 0)
+
+.balign 16
+.Loop:
+ /* Transform 0-3 */
+ R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0)
+ R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 1, 1, IW, _, 0)
+ R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0)
+ R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 3, 3, IW, _, 0)
+
+ /* Transform 4-7 + Precalc 12-14 */
+ R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0)
+ R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 5, 1, IW, _, 0)
+ R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, SCHED_W_W0W1W2W3W4W5_1, 12)
+ R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 7, 3, IW, SCHED_W_W0W1W2W3W4W5_2, 12)
+
+ /* Transform 8-11 + Precalc 12-17 */
+ R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, SCHED_W_W0W1W2W3W4W5_3, 12)
+ R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 9, 1, IW, SCHED_W_W1W2W3W4W5W0_1, 15)
+ R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, SCHED_W_W1W2W3W4W5W0_2, 15)
+ R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 11, 3, IW, SCHED_W_W1W2W3W4W5W0_3, 15)
+
+ /* Transform 12-14 + Precalc 18-20 */
+ R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 18)
+ R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 13, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 18)
+ R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 18)
+
+ /* Transform 15-17 + Precalc 21-23 */
+ R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 15, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 21)
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 21)
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 17, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 21)
+
+ /* Transform 18-20 + Precalc 24-26 */
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 24)
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 19, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 24)
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 24)
+
+ /* Transform 21-23 + Precalc 27-29 */
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 21, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 27)
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 27)
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 23, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 27)
+
+ /* Transform 24-26 + Precalc 30-32 */
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 30)
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 25, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 30)
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 30)
+
+ /* Transform 27-29 + Precalc 33-35 */
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 27, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 33)
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 33)
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 29, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 33)
+
+ /* Transform 30-32 + Precalc 36-38 */
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 36)
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 31, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 36)
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36)
+
+ /* Transform 33-35 + Precalc 39-41 */
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 33, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 39)
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 39)
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 35, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 39)
+
+ /* Transform 36-38 + Precalc 42-44 */
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 42)
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 37, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 42)
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 42)
+
+ /* Transform 39-41 + Precalc 45-47 */
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 39, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 45)
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 45)
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 41, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 45)
+
+ /* Transform 42-44 + Precalc 48-50 */
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 48)
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 43, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 48)
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 48)
+
+ /* Transform 45-47 + Precalc 51-53 */
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 45, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 51)
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 51)
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 47, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 51)
+
+ /* Transform 48-50 + Precalc 54-56 */
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 54)
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 49, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 54)
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 54)
+
+ /* Transform 51-53 + Precalc 57-59 */
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 51, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 57)
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 57)
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 53, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 57)
+
+ /* Transform 54-56 + Precalc 60-62 */
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 60)
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 55, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 60)
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 60)
+
+ /* Transform 57-59 + Precalc 63 */
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 57, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 63)
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 63)
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 59, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 63)
+
+ /* Transform 60 */
+ R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _)
+ subs RNBLKS, RNBLKS, #1
+ b.eq .Lend
+
+ /* Transform 61-63 + Preload next block */
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, LOAD_W_VEC_1, _)
+ ldp s0, s1, [RSTATE, #0]
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _)
+ ldp s2, s3, [RSTATE, #8]
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, LOAD_W_VEC_3, _)
+
+ /* Update the chaining variables. */
+ eor ra, ra, s0
+ eor rb, rb, s1
+ ldp s0, s1, [RSTATE, #16]
+ eor rc, rc, s2
+ ldp k_even, k_odd, [RSTATE, #24]
+ eor rd, rd, s3
+ eor re, re, s0
+ stp ra, rb, [RSTATE, #0]
+ eor rf, rf, s1
+ stp rc, rd, [RSTATE, #8]
+ eor rg, rg, k_even
+ stp re, rf, [RSTATE, #16]
+ eor rh, rh, k_odd
+ stp rg, rh, [RSTATE, #24]
+ b .Loop
+
+.Lend:
+ /* Transform 61-63 */
+ R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, _, _)
+ ldp s0, s1, [RSTATE, #0]
+ R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _)
+ ldp s2, s3, [RSTATE, #8]
+ R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, _, _)
+
+ /* Update the chaining variables. */
+ eor ra, ra, s0
+ clear_vec(W0)
+ eor rb, rb, s1
+ clear_vec(W1)
+ ldp s0, s1, [RSTATE, #16]
+ clear_vec(W2)
+ eor rc, rc, s2
+ clear_vec(W3)
+ ldp k_even, k_odd, [RSTATE, #24]
+ clear_vec(W4)
+ eor rd, rd, s3
+ clear_vec(W5)
+ eor re, re, s0
+ clear_vec(XTMP0)
+ stp ra, rb, [RSTATE, #0]
+ clear_vec(XTMP1)
+ eor rf, rf, s1
+ clear_vec(XTMP2)
+ stp rc, rd, [RSTATE, #8]
+ clear_vec(XTMP3)
+ eor rg, rg, k_even
+ clear_vec(XTMP4)
+ stp re, rf, [RSTATE, #16]
+ clear_vec(XTMP5)
+ eor rh, rh, k_odd
+ clear_vec(XTMP6)
+ stp rg, rh, [RSTATE, #24]
+
+ /* Clear message expansion area */
+ add addr0, sp, #STACK_W
+ st1 {W0.16b-W3.16b}, [addr0], #64
+ st1 {W0.16b-W3.16b}, [addr0], #64
+ st1 {W0.16b-W3.16b}, [addr0]
+
+ mov sp, RFRAME
+
+ ldp x25, x26, [sp], #16
+ ldp x23, x24, [sp], #16
+ ldp x21, x22, [sp], #16
+ ldp x19, x20, [sp], #16
+ ldp x28, x29, [sp], #16
+
+ ret
+SYM_FUNC_END(sm3_neon_transform)
+
+
+ .section ".rodata", "a"
+
+ .align 4
+.LKtable:
+ .long 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb
+ .long 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc
+ .long 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce
+ .long 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6
+ .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
+ .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
+ .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
+ .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+ .long 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53
+ .long 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d
+ .long 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4
+ .long 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43
+ .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
+ .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
+ .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
+ .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c
new file mode 100644
index 000000000000..7182ee683f14
--- /dev/null
+++ b/arch/arm64/crypto/sm3-neon-glue.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * sm3-neon-glue.c - SM3 secure hash using NEON instructions
+ *
+ * Copyright (C) 2022 Tianjia Zhang <[email protected]>
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sm3.h>
+#include <crypto/sm3_base.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+
+asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
+ int blocks);
+
+static int sm3_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ if (!crypto_simd_usable()) {
+ sm3_update(shash_desc_ctx(desc), data, len);
+ return 0;
+ }
+
+ kernel_neon_begin();
+ sm3_base_do_update(desc, data, len, sm3_neon_transform);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int sm3_neon_final(struct shash_desc *desc, u8 *out)
+{
+ if (!crypto_simd_usable()) {
+ sm3_final(shash_desc_ctx(desc), out);
+ return 0;
+ }
+
+ kernel_neon_begin();
+ sm3_base_do_finalize(desc, sm3_neon_transform);
+ kernel_neon_end();
+
+ return sm3_base_finish(desc, out);
+}
+
+static int sm3_neon_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ if (!crypto_simd_usable()) {
+ struct sm3_state *sctx = shash_desc_ctx(desc);
+
+ if (len)
+ sm3_update(sctx, data, len);
+ sm3_final(sctx, out);
+ return 0;
+ }
+
+ kernel_neon_begin();
+ if (len)
+ sm3_base_do_update(desc, data, len, sm3_neon_transform);
+ sm3_base_do_finalize(desc, sm3_neon_transform);
+ kernel_neon_end();
+
+ return sm3_base_finish(desc, out);
+}
+
+static struct shash_alg sm3_alg = {
+ .digestsize = SM3_DIGEST_SIZE,
+ .init = sm3_base_init,
+ .update = sm3_neon_update,
+ .final = sm3_neon_final,
+ .finup = sm3_neon_finup,
+ .descsize = sizeof(struct sm3_state),
+ .base.cra_name = "sm3",
+ .base.cra_driver_name = "sm3-neon",
+ .base.cra_blocksize = SM3_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+ .base.cra_priority = 200,
+};
+
+static int __init sm3_neon_init(void)
+{
+ return crypto_register_shash(&sm3_alg);
+}
+
+static void __exit sm3_neon_fini(void)
+{
+ crypto_unregister_shash(&sm3_alg);
+}
+
+module_init(sm3_neon_init);
+module_exit(sm3_neon_fini);
+
+MODULE_DESCRIPTION("SM3 secure hash using NEON instructions");
+MODULE_AUTHOR("Jussi Kivilinna <[email protected]>");
+MODULE_AUTHOR("Tianjia Zhang <[email protected]>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/sm4-ce-asm.h b/arch/arm64/crypto/sm4-ce-asm.h
new file mode 100644
index 000000000000..7ea98e42e779
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-asm.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 helper macros for Crypto Extensions
+ * Copyright (C) 2022 Tianjia Zhang <[email protected]>
+ */
+
+#define SM4_PREPARE(ptr) \
+ ld1 {v24.16b-v27.16b}, [ptr], #64; \
+ ld1 {v28.16b-v31.16b}, [ptr];
+
+#define SM4_CRYPT_BLK_BE(b0) \
+ sm4e b0.4s, v24.4s; \
+ sm4e b0.4s, v25.4s; \
+ sm4e b0.4s, v26.4s; \
+ sm4e b0.4s, v27.4s; \
+ sm4e b0.4s, v28.4s; \
+ sm4e b0.4s, v29.4s; \
+ sm4e b0.4s, v30.4s; \
+ sm4e b0.4s, v31.4s; \
+ rev64 b0.4s, b0.4s; \
+ ext b0.16b, b0.16b, b0.16b, #8; \
+ rev32 b0.16b, b0.16b;
+
+#define SM4_CRYPT_BLK(b0) \
+ rev32 b0.16b, b0.16b; \
+ SM4_CRYPT_BLK_BE(b0);
+
+#define SM4_CRYPT_BLK2_BE(b0, b1) \
+ sm4e b0.4s, v24.4s; \
+ sm4e b1.4s, v24.4s; \
+ sm4e b0.4s, v25.4s; \
+ sm4e b1.4s, v25.4s; \
+ sm4e b0.4s, v26.4s; \
+ sm4e b1.4s, v26.4s; \
+ sm4e b0.4s, v27.4s; \
+ sm4e b1.4s, v27.4s; \
+ sm4e b0.4s, v28.4s; \
+ sm4e b1.4s, v28.4s; \
+ sm4e b0.4s, v29.4s; \
+ sm4e b1.4s, v29.4s; \
+ sm4e b0.4s, v30.4s; \
+ sm4e b1.4s, v30.4s; \
+ sm4e b0.4s, v31.4s; \
+ sm4e b1.4s, v31.4s; \
+ rev64 b0.4s, b0.4s; \
+ rev64 b1.4s, b1.4s; \
+ ext b0.16b, b0.16b, b0.16b, #8; \
+ ext b1.16b, b1.16b, b1.16b, #8; \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+
+#define SM4_CRYPT_BLK2(b0, b1) \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ SM4_CRYPT_BLK2_BE(b0, b1);
+
+#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3) \
+ sm4e b0.4s, v24.4s; \
+ sm4e b1.4s, v24.4s; \
+ sm4e b2.4s, v24.4s; \
+ sm4e b3.4s, v24.4s; \
+ sm4e b0.4s, v25.4s; \
+ sm4e b1.4s, v25.4s; \
+ sm4e b2.4s, v25.4s; \
+ sm4e b3.4s, v25.4s; \
+ sm4e b0.4s, v26.4s; \
+ sm4e b1.4s, v26.4s; \
+ sm4e b2.4s, v26.4s; \
+ sm4e b3.4s, v26.4s; \
+ sm4e b0.4s, v27.4s; \
+ sm4e b1.4s, v27.4s; \
+ sm4e b2.4s, v27.4s; \
+ sm4e b3.4s, v27.4s; \
+ sm4e b0.4s, v28.4s; \
+ sm4e b1.4s, v28.4s; \
+ sm4e b2.4s, v28.4s; \
+ sm4e b3.4s, v28.4s; \
+ sm4e b0.4s, v29.4s; \
+ sm4e b1.4s, v29.4s; \
+ sm4e b2.4s, v29.4s; \
+ sm4e b3.4s, v29.4s; \
+ sm4e b0.4s, v30.4s; \
+ sm4e b1.4s, v30.4s; \
+ sm4e b2.4s, v30.4s; \
+ sm4e b3.4s, v30.4s; \
+ sm4e b0.4s, v31.4s; \
+ sm4e b1.4s, v31.4s; \
+ sm4e b2.4s, v31.4s; \
+ sm4e b3.4s, v31.4s; \
+ rev64 b0.4s, b0.4s; \
+ rev64 b1.4s, b1.4s; \
+ rev64 b2.4s, b2.4s; \
+ rev64 b3.4s, b3.4s; \
+ ext b0.16b, b0.16b, b0.16b, #8; \
+ ext b1.16b, b1.16b, b1.16b, #8; \
+ ext b2.16b, b2.16b, b2.16b, #8; \
+ ext b3.16b, b3.16b, b3.16b, #8; \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b;
+
+#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ SM4_CRYPT_BLK4_BE(b0, b1, b2, b3);
+
+#define SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7) \
+ sm4e b0.4s, v24.4s; \
+ sm4e b1.4s, v24.4s; \
+ sm4e b2.4s, v24.4s; \
+ sm4e b3.4s, v24.4s; \
+ sm4e b4.4s, v24.4s; \
+ sm4e b5.4s, v24.4s; \
+ sm4e b6.4s, v24.4s; \
+ sm4e b7.4s, v24.4s; \
+ sm4e b0.4s, v25.4s; \
+ sm4e b1.4s, v25.4s; \
+ sm4e b2.4s, v25.4s; \
+ sm4e b3.4s, v25.4s; \
+ sm4e b4.4s, v25.4s; \
+ sm4e b5.4s, v25.4s; \
+ sm4e b6.4s, v25.4s; \
+ sm4e b7.4s, v25.4s; \
+ sm4e b0.4s, v26.4s; \
+ sm4e b1.4s, v26.4s; \
+ sm4e b2.4s, v26.4s; \
+ sm4e b3.4s, v26.4s; \
+ sm4e b4.4s, v26.4s; \
+ sm4e b5.4s, v26.4s; \
+ sm4e b6.4s, v26.4s; \
+ sm4e b7.4s, v26.4s; \
+ sm4e b0.4s, v27.4s; \
+ sm4e b1.4s, v27.4s; \
+ sm4e b2.4s, v27.4s; \
+ sm4e b3.4s, v27.4s; \
+ sm4e b4.4s, v27.4s; \
+ sm4e b5.4s, v27.4s; \
+ sm4e b6.4s, v27.4s; \
+ sm4e b7.4s, v27.4s; \
+ sm4e b0.4s, v28.4s; \
+ sm4e b1.4s, v28.4s; \
+ sm4e b2.4s, v28.4s; \
+ sm4e b3.4s, v28.4s; \
+ sm4e b4.4s, v28.4s; \
+ sm4e b5.4s, v28.4s; \
+ sm4e b6.4s, v28.4s; \
+ sm4e b7.4s, v28.4s; \
+ sm4e b0.4s, v29.4s; \
+ sm4e b1.4s, v29.4s; \
+ sm4e b2.4s, v29.4s; \
+ sm4e b3.4s, v29.4s; \
+ sm4e b4.4s, v29.4s; \
+ sm4e b5.4s, v29.4s; \
+ sm4e b6.4s, v29.4s; \
+ sm4e b7.4s, v29.4s; \
+ sm4e b0.4s, v30.4s; \
+ sm4e b1.4s, v30.4s; \
+ sm4e b2.4s, v30.4s; \
+ sm4e b3.4s, v30.4s; \
+ sm4e b4.4s, v30.4s; \
+ sm4e b5.4s, v30.4s; \
+ sm4e b6.4s, v30.4s; \
+ sm4e b7.4s, v30.4s; \
+ sm4e b0.4s, v31.4s; \
+ sm4e b1.4s, v31.4s; \
+ sm4e b2.4s, v31.4s; \
+ sm4e b3.4s, v31.4s; \
+ sm4e b4.4s, v31.4s; \
+ sm4e b5.4s, v31.4s; \
+ sm4e b6.4s, v31.4s; \
+ sm4e b7.4s, v31.4s; \
+ rev64 b0.4s, b0.4s; \
+ rev64 b1.4s, b1.4s; \
+ rev64 b2.4s, b2.4s; \
+ rev64 b3.4s, b3.4s; \
+ rev64 b4.4s, b4.4s; \
+ rev64 b5.4s, b5.4s; \
+ rev64 b6.4s, b6.4s; \
+ rev64 b7.4s, b7.4s; \
+ ext b0.16b, b0.16b, b0.16b, #8; \
+ ext b1.16b, b1.16b, b1.16b, #8; \
+ ext b2.16b, b2.16b, b2.16b, #8; \
+ ext b3.16b, b3.16b, b3.16b, #8; \
+ ext b4.16b, b4.16b, b4.16b, #8; \
+ ext b5.16b, b5.16b, b5.16b, #8; \
+ ext b6.16b, b6.16b, b6.16b, #8; \
+ ext b7.16b, b7.16b, b7.16b, #8; \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ rev32 b4.16b, b4.16b; \
+ rev32 b5.16b, b5.16b; \
+ rev32 b6.16b, b6.16b; \
+ rev32 b7.16b, b7.16b;
+
+#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ rev32 b4.16b, b4.16b; \
+ rev32 b5.16b, b5.16b; \
+ rev32 b6.16b, b6.16b; \
+ rev32 b7.16b, b7.16b; \
+ SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7);
diff --git a/arch/arm64/crypto/sm4-ce-ccm-core.S b/arch/arm64/crypto/sm4-ce-ccm-core.S
new file mode 100644
index 000000000000..028207c4afd0
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-ccm-core.S
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
+ * as specified in rfc8998
+ * https://datatracker.ietf.org/doc/html/rfc8998
+ *
+ * Copyright (C) 2022 Tianjia Zhang <[email protected]>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "sm4-ce-asm.h"
+
+.arch armv8-a+crypto
+
+.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31
+ .set .Lv\b\().4s, \b
+.endr
+
+.macro sm4e, vd, vn
+ .inst 0xcec08400 | (.L\vn << 5) | .L\vd
+.endm
+
+/* Register macros */
+
+#define RMAC v16
+
+/* Helper macros. */
+
+#define inc_le128(vctr) \
+ mov vctr.d[1], x8; \
+ mov vctr.d[0], x7; \
+ adds x8, x8, #1; \
+ rev64 vctr.16b, vctr.16b; \
+ adc x7, x7, xzr;
+
+
+.align 3
+SYM_FUNC_START(sm4_ce_cbcmac_update)
+ /* input:
+ * x0: round key array, CTX
+ * x1: mac
+ * x2: src
+ * w3: nblocks
+ */
+ SM4_PREPARE(x0)
+
+ ld1 {RMAC.16b}, [x1]
+
+.Lcbcmac_loop_4x:
+ cmp w3, #4
+ blt .Lcbcmac_loop_1x
+
+ sub w3, w3, #4
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+
+ SM4_CRYPT_BLK(RMAC)
+ eor RMAC.16b, RMAC.16b, v0.16b
+ SM4_CRYPT_BLK(RMAC)
+ eor RMAC.16b, RMAC.16b, v1.16b
+ SM4_CRYPT_BLK(RMAC)
+ eor RMAC.16b, RMAC.16b, v2.16b
+ SM4_CRYPT_BLK(RMAC)
+ eor RMAC.16b, RMAC.16b, v3.16b
+
+ cbz w3, .Lcbcmac_end
+ b .Lcbcmac_loop_4x
+
+.Lcbcmac_loop_1x:
+ sub w3, w3, #1
+
+ ld1 {v0.16b}, [x2], #16
+
+ SM4_CRYPT_BLK(RMAC)
+ eor RMAC.16b, RMAC.16b, v0.16b
+
+ cbnz w3, .Lcbcmac_loop_1x
+
+.Lcbcmac_end:
+ st1 {RMAC.16b}, [x1]
+ ret
+SYM_FUNC_END(sm4_ce_cbcmac_update)
+
+.align 3
+SYM_FUNC_START(sm4_ce_ccm_final)
+ /* input:
+ * x0: round key array, CTX
+ * x1: ctr0 (big endian, 128 bit)
+ * x2: mac
+ */
+ SM4_PREPARE(x0)
+
+ ld1 {RMAC.16b}, [x2]
+ ld1 {v0.16b}, [x1]
+
+ SM4_CRYPT_BLK2(RMAC, v0)
+
+ /* en-/decrypt the mac with ctr0 */
+ eor RMAC.16b, RMAC.16b, v0.16b
+ st1 {RMAC.16b}, [x2]
+
+ ret
+SYM_FUNC_END(sm4_ce_ccm_final)
+
+.align 3
+SYM_FUNC_START(sm4_ce_ccm_enc)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: ctr (big endian, 128 bit)
+ * w4: nbytes
+ * x5: mac
+ */
+ SM4_PREPARE(x0)
+
+ ldp x7, x8, [x3]
+ rev x7, x7
+ rev x8, x8
+
+ ld1 {RMAC.16b}, [x5]
+
+.Lccm_enc_loop_4x:
+ cmp w4, #(4 * 16)
+ blt .Lccm_enc_loop_1x
+
+ sub w4, w4, #(4 * 16)
+
+ /* construct CTRs */
+ inc_le128(v8) /* +0 */
+ inc_le128(v9) /* +1 */
+ inc_le128(v10) /* +2 */
+ inc_le128(v11) /* +3 */
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+
+ SM4_CRYPT_BLK2(v8, RMAC)
+ eor v8.16b, v8.16b, v0.16b
+ eor RMAC.16b, RMAC.16b, v0.16b
+ SM4_CRYPT_BLK2(v9, RMAC)
+ eor v9.16b, v9.16b, v1.16b
+ eor RMAC.16b, RMAC.16b, v1.16b
+ SM4_CRYPT_BLK2(v10, RMAC)
+ eor v10.16b, v10.16b, v2.16b
+ eor RMAC.16b, RMAC.16b, v2.16b
+ SM4_CRYPT_BLK2(v11, RMAC)
+ eor v11.16b, v11.16b, v3.16b
+ eor RMAC.16b, RMAC.16b, v3.16b
+
+ st1 {v8.16b-v11.16b}, [x1], #64
+
+ cbz w4, .Lccm_enc_end
+ b .Lccm_enc_loop_4x
+
+.Lccm_enc_loop_1x:
+ cmp w4, #16
+ blt .Lccm_enc_tail
+
+ sub w4, w4, #16
+
+ /* construct CTRs */
+ inc_le128(v8)
+
+ ld1 {v0.16b}, [x2], #16
+
+ SM4_CRYPT_BLK2(v8, RMAC)
+ eor v8.16b, v8.16b, v0.16b
+ eor RMAC.16b, RMAC.16b, v0.16b
+
+ st1 {v8.16b}, [x1], #16
+
+ cbz w4, .Lccm_enc_end
+ b .Lccm_enc_loop_1x
+
+.Lccm_enc_tail:
+ /* construct CTRs */
+ inc_le128(v8)
+
+ SM4_CRYPT_BLK2(RMAC, v8)
+
+ /* store new MAC */
+ st1 {RMAC.16b}, [x5]
+
+.Lccm_enc_tail_loop:
+ ldrb w0, [x2], #1 /* get 1 byte from input */
+ umov w9, v8.b[0] /* get top crypted CTR byte */
+ umov w6, RMAC.b[0] /* get top MAC byte */
+
+ eor w9, w9, w0 /* w9 = CTR ^ input */
+ eor w6, w6, w0 /* w6 = MAC ^ input */
+
+ strb w9, [x1], #1 /* store out byte */
+ strb w6, [x5], #1 /* store MAC byte */
+
+ subs w4, w4, #1
+ beq .Lccm_enc_ret
+
+ /* shift out one byte */
+ ext RMAC.16b, RMAC.16b, RMAC.16b, #1
+ ext v8.16b, v8.16b, v8.16b, #1
+
+ b .Lccm_enc_tail_loop
+
+.Lccm_enc_end:
+ /* store new MAC */
+ st1 {RMAC.16b}, [x5]
+
+ /* store new CTR */
+ rev x7, x7
+ rev x8, x8
+ stp x7, x8, [x3]
+
+.Lccm_enc_ret:
+ ret
+SYM_FUNC_END(sm4_ce_ccm_enc)
+
+.align 3
+SYM_FUNC_START(sm4_ce_ccm_dec)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: ctr (big endian, 128 bit)
+ * w4: nbytes
+ * x5: mac
+ */
+ SM4_PREPARE(x0)
+
+ ldp x7, x8, [x3]
+ rev x7, x7
+ rev x8, x8
+
+ ld1 {RMAC.16b}, [x5]
+
+.Lccm_dec_loop_4x:
+ cmp w4, #(4 * 16)
+ blt .Lccm_dec_loop_1x
+
+ sub w4, w4, #(4 * 16)
+
+ /* construct CTRs */
+ inc_le128(v8) /* +0 */
+ inc_le128(v9) /* +1 */
+ inc_le128(v10) /* +2 */
+ inc_le128(v11) /* +3 */
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+
+ SM4_CRYPT_BLK2(v8, RMAC)
+ eor v8.16b, v8.16b, v0.16b
+ eor RMAC.16b, RMAC.16b, v8.16b
+ SM4_CRYPT_BLK2(v9, RMAC)
+ eor v9.16b, v9.16b, v1.16b
+ eor RMAC.16b, RMAC.16b, v9.16b
+ SM4_CRYPT_BLK2(v10, RMAC)
+ eor v10.16b, v10.16b, v2.16b
+ eor RMAC.16b, RMAC.16b, v10.16b
+ SM4_CRYPT_BLK2(v11, RMAC)
+ eor v11.16b, v11.16b, v3.16b
+ eor RMAC.16b, RMAC.16b, v11.16b
+
+ st1 {v8.16b-v11.16b}, [x1], #64
+
+ cbz w4, .Lccm_dec_end
+ b .Lccm_dec_loop_4x
+
+.Lccm_dec_loop_1x:
+ cmp w4, #16
+ blt .Lccm_dec_tail
+
+ sub w4, w4, #16
+
+ /* construct CTRs */
+ inc_le128(v8)
+
+ ld1 {v0.16b}, [x2], #16
+
+ SM4_CRYPT_BLK2(v8, RMAC)
+ eor v8.16b, v8.16b, v0.16b
+ eor RMAC.16b, RMAC.16b, v8.16b
+
+ st1 {v8.16b}, [x1], #16
+
+ cbz w4, .Lccm_dec_end
+ b .Lccm_dec_loop_1x
+
+.Lccm_dec_tail:
+ /* construct CTRs */
+ inc_le128(v8)
+
+ SM4_CRYPT_BLK2(RMAC, v8)
+
+ /* store new MAC */
+ st1 {RMAC.16b}, [x5]
+
+.Lccm_dec_tail_loop:
+ ldrb w0, [x2], #1 /* get 1 byte from input */
+ umov w9, v8.b[0] /* get top crypted CTR byte */
+ umov w6, RMAC.b[0] /* get top MAC byte */
+
+ eor w9, w9, w0 /* w9 = CTR ^ input */
+ eor w6, w6, w9 /* w6 = MAC ^ output */
+
+ strb w9, [x1], #1 /* store out byte */
+ strb w6, [x5], #1 /* store MAC byte */
+
+ subs w4, w4, #1
+ beq .Lccm_dec_ret
+
+ /* shift out one byte */
+ ext RMAC.16b, RMAC.16b, RMAC.16b, #1
+ ext v8.16b, v8.16b, v8.16b, #1
+
+ b .Lccm_dec_tail_loop
+
+.Lccm_dec_end:
+ /* store new MAC */
+ st1 {RMAC.16b}, [x5]
+
+ /* store new CTR */
+ rev x7, x7
+ rev x8, x8
+ stp x7, x8, [x3]
+
+.Lccm_dec_ret:
+ ret
+SYM_FUNC_END(sm4_ce_ccm_dec)
diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c
new file mode 100644
index 000000000000..f2cec7b52efc
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
+ * as specified in rfc8998
+ * https://datatracker.ietf.org/doc/html/rfc8998
+ *
+ * Copyright (C) 2022 Tianjia Zhang <[email protected]>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/neon.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-ce.h"
+
+asmlinkage void sm4_ce_cbcmac_update(const u32 *rkey_enc, u8 *mac,
+ const u8 *src, unsigned int nblocks);
+asmlinkage void sm4_ce_ccm_enc(const u32 *rkey_enc, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nbytes, u8 *mac);
+asmlinkage void sm4_ce_ccm_dec(const u32 *rkey_enc, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nbytes, u8 *mac);
+asmlinkage void sm4_ce_ccm_final(const u32 *rkey_enc, u8 *iv, u8 *mac);
+
+
+static int ccm_setkey(struct crypto_aead *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_ctx *ctx = crypto_aead_ctx(tfm);
+
+ if (key_len != SM4_KEY_SIZE)
+ return -EINVAL;
+
+ kernel_neon_begin();
+ sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ if ((authsize & 1) || authsize < 4)
+ return -EINVAL;
+ return 0;
+}
+
+static int ccm_format_input(u8 info[], struct aead_request *req,
+ unsigned int msglen)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ unsigned int l = req->iv[0] + 1;
+ unsigned int m;
+ __be32 len;
+
+ /* verify that CCM dimension 'L': 2 <= L <= 8 */
+ if (l < 2 || l > 8)
+ return -EINVAL;
+ if (l < 4 && msglen >> (8 * l))
+ return -EOVERFLOW;
+
+ memset(&req->iv[SM4_BLOCK_SIZE - l], 0, l);
+
+ memcpy(info, req->iv, SM4_BLOCK_SIZE);
+
+ m = crypto_aead_authsize(aead);
+
+ /* format flags field per RFC 3610/NIST 800-38C */
+ *info |= ((m - 2) / 2) << 3;
+ if (req->assoclen)
+ *info |= (1 << 6);
+
+ /*
+ * format message length field,
+ * Linux uses a u32 type to represent msglen
+ */
+ if (l >= 4)
+ l = 4;
+
+ len = cpu_to_be32(msglen);
+ memcpy(&info[SM4_BLOCK_SIZE - l], (u8 *)&len + 4 - l, l);
+
+ return 0;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_aead_ctx(aead);
+ struct __packed { __be16 l; __be32 h; } aadlen;
+ u32 assoclen = req->assoclen;
+ struct scatter_walk walk;
+ unsigned int len;
+
+ if (assoclen < 0xff00) {
+ aadlen.l = cpu_to_be16(assoclen);
+ len = 2;
+ } else {
+ aadlen.l = cpu_to_be16(0xfffe);
+ put_unaligned_be32(assoclen, &aadlen.h);
+ len = 6;
+ }
+
+ sm4_ce_crypt_block(ctx->rkey_enc, mac, mac);
+ crypto_xor(mac, (const u8 *)&aadlen, len);
+
+ scatterwalk_start(&walk, req->src);
+
+ do {
+ u32 n = scatterwalk_clamp(&walk, assoclen);
+ u8 *p, *ptr;
+
+ if (!n) {
+ scatterwalk_start(&walk, sg_next(walk.sg));
+ n = scatterwalk_clamp(&walk, assoclen);
+ }
+
+ p = ptr = scatterwalk_map(&walk);
+ assoclen -= n;
+ scatterwalk_advance(&walk, n);
+
+ while (n > 0) {
+ unsigned int l, nblocks;
+
+ if (len == SM4_BLOCK_SIZE) {
+ if (n < SM4_BLOCK_SIZE) {
+ sm4_ce_crypt_block(ctx->rkey_enc,
+ mac, mac);
+
+ len = 0;
+ } else {
+ nblocks = n / SM4_BLOCK_SIZE;
+ sm4_ce_cbcmac_update(ctx->rkey_enc,
+ mac, ptr, nblocks);
+
+ ptr += nblocks * SM4_BLOCK_SIZE;
+ n %= SM4_BLOCK_SIZE;
+
+ continue;
+ }
+ }
+
+ l = min(n, SM4_BLOCK_SIZE - len);
+ if (l) {
+ crypto_xor(mac + len, ptr, l);
+ len += l;
+ ptr += l;
+ n -= l;
+ }
+ }
+
+ scatterwalk_unmap(p);
+ scatterwalk_done(&walk, 0, assoclen);
+ } while (assoclen);
+}
+
+static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
+ u32 *rkey_enc, u8 mac[],
+ void (*sm4_ce_ccm_crypt)(const u32 *rkey_enc, u8 *dst,
+ const u8 *src, u8 *iv,
+ unsigned int nbytes, u8 *mac))
+{
+ u8 __aligned(8) ctr0[SM4_BLOCK_SIZE];
+ int err;
+
+ /* preserve the initial ctr0 for the TAG */
+ memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE);
+ crypto_inc(walk->iv, SM4_BLOCK_SIZE);
+
+ kernel_neon_begin();
+
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
+
+ do {
+ unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
+ const u8 *src = walk->src.virt.addr;
+ u8 *dst = walk->dst.virt.addr;
+
+ if (walk->nbytes == walk->total)
+ tail = 0;
+
+ if (walk->nbytes - tail)
+ sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv,
+ walk->nbytes - tail, mac);
+
+ if (walk->nbytes == walk->total)
+ sm4_ce_ccm_final(rkey_enc, ctr0, mac);
+
+ kernel_neon_end();
+
+ if (walk->nbytes) {
+ err = skcipher_walk_done(walk, tail);
+ if (err)
+ return err;
+ if (walk->nbytes)
+ kernel_neon_begin();
+ }
+ } while (walk->nbytes > 0);
+
+ return 0;
+}
+
+static int ccm_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_aead_ctx(aead);
+ u8 __aligned(8) mac[SM4_BLOCK_SIZE];
+ struct skcipher_walk walk;
+ int err;
+
+ err = ccm_format_input(mac, req, req->cryptlen);
+ if (err)
+ return err;
+
+ err = skcipher_walk_aead_encrypt(&walk, req, false);
+ if (err)
+ return err;
+
+ err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_enc);
+ if (err)
+ return err;
+
+ /* copy authtag to end of dst */
+ scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
+ crypto_aead_authsize(aead), 1);
+
+ return 0;
+}
+
+static int ccm_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ unsigned int authsize = crypto_aead_authsize(aead);
+ struct sm4_ctx *ctx = crypto_aead_ctx(aead);
+ u8 __aligned(8) mac[SM4_BLOCK_SIZE];
+ u8 authtag[SM4_BLOCK_SIZE];
+ struct skcipher_walk walk;
+ int err;
+
+ err = ccm_format_input(mac, req, req->cryptlen - authsize);
+ if (err)
+ return err;
+
+ err = skcipher_walk_aead_decrypt(&walk, req, false);
+ if (err)
+ return err;
+
+ err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_dec);
+ if (err)
+ return err;
+
+ /* compare calculated auth tag with the stored one */
+ scatterwalk_map_and_copy(authtag, req->src,
+ req->assoclen + req->cryptlen - authsize,
+ authsize, 0);
+
+ if (crypto_memneq(authtag, mac, authsize))
+ return -EBADMSG;
+
+ return 0;
+}
+
+static struct aead_alg sm4_ccm_alg = {
+ .base = {
+ .cra_name = "ccm(sm4)",
+ .cra_driver_name = "ccm-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .ivsize = SM4_BLOCK_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .maxauthsize = SM4_BLOCK_SIZE,
+ .setkey = ccm_setkey,
+ .setauthsize = ccm_setauthsize,
+ .encrypt = ccm_encrypt,
+ .decrypt = ccm_decrypt,
+};
+
+static int __init sm4_ce_ccm_init(void)
+{
+ return crypto_register_aead(&sm4_ccm_alg);
+}
+
+static void __exit sm4_ce_ccm_exit(void)
+{
+ crypto_unregister_aead(&sm4_ccm_alg);
+}
+
+module_cpu_feature_match(SM4, sm4_ce_ccm_init);
+module_exit(sm4_ce_ccm_exit);
+
+MODULE_DESCRIPTION("Synchronous SM4 in CCM mode using ARMv8 Crypto Extensions");
+MODULE_ALIAS_CRYPTO("ccm(sm4)");
+MODULE_AUTHOR("Tianjia Zhang <[email protected]>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index 934e0f093279..877b80c54a0d 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -10,10 +10,12 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include "sm4-ce-asm.h"
.arch armv8-a+crypto
-.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 20, 24, 25, 26, 27, 28, 29, 30, 31
+.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
+ 20, 24, 25, 26, 27, 28, 29, 30, 31
.set .Lv\b\().4s, \b
.endr
@@ -33,174 +35,8 @@
#define RTMP3 v19
#define RIV v20
-
-/* Helper macros. */
-
-#define PREPARE \
- ld1 {v24.16b-v27.16b}, [x0], #64; \
- ld1 {v28.16b-v31.16b}, [x0];
-
-#define SM4_CRYPT_BLK(b0) \
- rev32 b0.16b, b0.16b; \
- sm4e b0.4s, v24.4s; \
- sm4e b0.4s, v25.4s; \
- sm4e b0.4s, v26.4s; \
- sm4e b0.4s, v27.4s; \
- sm4e b0.4s, v28.4s; \
- sm4e b0.4s, v29.4s; \
- sm4e b0.4s, v30.4s; \
- sm4e b0.4s, v31.4s; \
- rev64 b0.4s, b0.4s; \
- ext b0.16b, b0.16b, b0.16b, #8; \
- rev32 b0.16b, b0.16b;
-
-#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
- rev32 b0.16b, b0.16b; \
- rev32 b1.16b, b1.16b; \
- rev32 b2.16b, b2.16b; \
- rev32 b3.16b, b3.16b; \
- sm4e b0.4s, v24.4s; \
- sm4e b1.4s, v24.4s; \
- sm4e b2.4s, v24.4s; \
- sm4e b3.4s, v24.4s; \
- sm4e b0.4s, v25.4s; \
- sm4e b1.4s, v25.4s; \
- sm4e b2.4s, v25.4s; \
- sm4e b3.4s, v25.4s; \
- sm4e b0.4s, v26.4s; \
- sm4e b1.4s, v26.4s; \
- sm4e b2.4s, v26.4s; \
- sm4e b3.4s, v26.4s; \
- sm4e b0.4s, v27.4s; \
- sm4e b1.4s, v27.4s; \
- sm4e b2.4s, v27.4s; \
- sm4e b3.4s, v27.4s; \
- sm4e b0.4s, v28.4s; \
- sm4e b1.4s, v28.4s; \
- sm4e b2.4s, v28.4s; \
- sm4e b3.4s, v28.4s; \
- sm4e b0.4s, v29.4s; \
- sm4e b1.4s, v29.4s; \
- sm4e b2.4s, v29.4s; \
- sm4e b3.4s, v29.4s; \
- sm4e b0.4s, v30.4s; \
- sm4e b1.4s, v30.4s; \
- sm4e b2.4s, v30.4s; \
- sm4e b3.4s, v30.4s; \
- sm4e b0.4s, v31.4s; \
- sm4e b1.4s, v31.4s; \
- sm4e b2.4s, v31.4s; \
- sm4e b3.4s, v31.4s; \
- rev64 b0.4s, b0.4s; \
- rev64 b1.4s, b1.4s; \
- rev64 b2.4s, b2.4s; \
- rev64 b3.4s, b3.4s; \
- ext b0.16b, b0.16b, b0.16b, #8; \
- ext b1.16b, b1.16b, b1.16b, #8; \
- ext b2.16b, b2.16b, b2.16b, #8; \
- ext b3.16b, b3.16b, b3.16b, #8; \
- rev32 b0.16b, b0.16b; \
- rev32 b1.16b, b1.16b; \
- rev32 b2.16b, b2.16b; \
- rev32 b3.16b, b3.16b;
-
-#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
- rev32 b0.16b, b0.16b; \
- rev32 b1.16b, b1.16b; \
- rev32 b2.16b, b2.16b; \
- rev32 b3.16b, b3.16b; \
- rev32 b4.16b, b4.16b; \
- rev32 b5.16b, b5.16b; \
- rev32 b6.16b, b6.16b; \
- rev32 b7.16b, b7.16b; \
- sm4e b0.4s, v24.4s; \
- sm4e b1.4s, v24.4s; \
- sm4e b2.4s, v24.4s; \
- sm4e b3.4s, v24.4s; \
- sm4e b4.4s, v24.4s; \
- sm4e b5.4s, v24.4s; \
- sm4e b6.4s, v24.4s; \
- sm4e b7.4s, v24.4s; \
- sm4e b0.4s, v25.4s; \
- sm4e b1.4s, v25.4s; \
- sm4e b2.4s, v25.4s; \
- sm4e b3.4s, v25.4s; \
- sm4e b4.4s, v25.4s; \
- sm4e b5.4s, v25.4s; \
- sm4e b6.4s, v25.4s; \
- sm4e b7.4s, v25.4s; \
- sm4e b0.4s, v26.4s; \
- sm4e b1.4s, v26.4s; \
- sm4e b2.4s, v26.4s; \
- sm4e b3.4s, v26.4s; \
- sm4e b4.4s, v26.4s; \
- sm4e b5.4s, v26.4s; \
- sm4e b6.4s, v26.4s; \
- sm4e b7.4s, v26.4s; \
- sm4e b0.4s, v27.4s; \
- sm4e b1.4s, v27.4s; \
- sm4e b2.4s, v27.4s; \
- sm4e b3.4s, v27.4s; \
- sm4e b4.4s, v27.4s; \
- sm4e b5.4s, v27.4s; \
- sm4e b6.4s, v27.4s; \
- sm4e b7.4s, v27.4s; \
- sm4e b0.4s, v28.4s; \
- sm4e b1.4s, v28.4s; \
- sm4e b2.4s, v28.4s; \
- sm4e b3.4s, v28.4s; \
- sm4e b4.4s, v28.4s; \
- sm4e b5.4s, v28.4s; \
- sm4e b6.4s, v28.4s; \
- sm4e b7.4s, v28.4s; \
- sm4e b0.4s, v29.4s; \
- sm4e b1.4s, v29.4s; \
- sm4e b2.4s, v29.4s; \
- sm4e b3.4s, v29.4s; \
- sm4e b4.4s, v29.4s; \
- sm4e b5.4s, v29.4s; \
- sm4e b6.4s, v29.4s; \
- sm4e b7.4s, v29.4s; \
- sm4e b0.4s, v30.4s; \
- sm4e b1.4s, v30.4s; \
- sm4e b2.4s, v30.4s; \
- sm4e b3.4s, v30.4s; \
- sm4e b4.4s, v30.4s; \
- sm4e b5.4s, v30.4s; \
- sm4e b6.4s, v30.4s; \
- sm4e b7.4s, v30.4s; \
- sm4e b0.4s, v31.4s; \
- sm4e b1.4s, v31.4s; \
- sm4e b2.4s, v31.4s; \
- sm4e b3.4s, v31.4s; \
- sm4e b4.4s, v31.4s; \
- sm4e b5.4s, v31.4s; \
- sm4e b6.4s, v31.4s; \
- sm4e b7.4s, v31.4s; \
- rev64 b0.4s, b0.4s; \
- rev64 b1.4s, b1.4s; \
- rev64 b2.4s, b2.4s; \
- rev64 b3.4s, b3.4s; \
- rev64 b4.4s, b4.4s; \
- rev64 b5.4s, b5.4s; \
- rev64 b6.4s, b6.4s; \
- rev64 b7.4s, b7.4s; \
- ext b0.16b, b0.16b, b0.16b, #8; \
- ext b1.16b, b1.16b, b1.16b, #8; \
- ext b2.16b, b2.16b, b2.16b, #8; \
- ext b3.16b, b3.16b, b3.16b, #8; \
- ext b4.16b, b4.16b, b4.16b, #8; \
- ext b5.16b, b5.16b, b5.16b, #8; \
- ext b6.16b, b6.16b, b6.16b, #8; \
- ext b7.16b, b7.16b, b7.16b, #8; \
- rev32 b0.16b, b0.16b; \
- rev32 b1.16b, b1.16b; \
- rev32 b2.16b, b2.16b; \
- rev32 b3.16b, b3.16b; \
- rev32 b4.16b, b4.16b; \
- rev32 b5.16b, b5.16b; \
- rev32 b6.16b, b6.16b; \
- rev32 b7.16b, b7.16b;
+#define RMAC v20
+#define RMASK v21
.align 3
@@ -231,32 +67,23 @@ SYM_FUNC_START(sm4_ce_expand_key)
sm4ekey v6.4s, v5.4s, v30.4s;
sm4ekey v7.4s, v6.4s, v31.4s;
+ adr_l x5, .Lbswap128_mask
+ ld1 {v24.16b}, [x5]
+
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b-v7.16b}, [x1];
- rev64 v7.4s, v7.4s;
- rev64 v6.4s, v6.4s;
- rev64 v5.4s, v5.4s;
- rev64 v4.4s, v4.4s;
- rev64 v3.4s, v3.4s;
- rev64 v2.4s, v2.4s;
- rev64 v1.4s, v1.4s;
- rev64 v0.4s, v0.4s;
- ext v7.16b, v7.16b, v7.16b, #8;
- ext v6.16b, v6.16b, v6.16b, #8;
- ext v5.16b, v5.16b, v5.16b, #8;
- ext v4.16b, v4.16b, v4.16b, #8;
- ext v3.16b, v3.16b, v3.16b, #8;
- ext v2.16b, v2.16b, v2.16b, #8;
- ext v1.16b, v1.16b, v1.16b, #8;
- ext v0.16b, v0.16b, v0.16b, #8;
- st1 {v7.16b}, [x2], #16;
- st1 {v6.16b}, [x2], #16;
- st1 {v5.16b}, [x2], #16;
- st1 {v4.16b}, [x2], #16;
- st1 {v3.16b}, [x2], #16;
- st1 {v2.16b}, [x2], #16;
- st1 {v1.16b}, [x2], #16;
- st1 {v0.16b}, [x2];
+
+ tbl v16.16b, {v7.16b}, v24.16b
+ tbl v17.16b, {v6.16b}, v24.16b
+ tbl v18.16b, {v5.16b}, v24.16b
+ tbl v19.16b, {v4.16b}, v24.16b
+ tbl v20.16b, {v3.16b}, v24.16b
+ tbl v21.16b, {v2.16b}, v24.16b
+ tbl v22.16b, {v1.16b}, v24.16b
+ tbl v23.16b, {v0.16b}, v24.16b
+
+ st1 {v16.16b-v19.16b}, [x2], #64
+ st1 {v20.16b-v23.16b}, [x2]
ret;
SYM_FUNC_END(sm4_ce_expand_key)
@@ -268,7 +95,7 @@ SYM_FUNC_START(sm4_ce_crypt_block)
* x1: dst
* x2: src
*/
- PREPARE;
+ SM4_PREPARE(x0)
ld1 {v0.16b}, [x2];
SM4_CRYPT_BLK(v0);
@@ -285,7 +112,7 @@ SYM_FUNC_START(sm4_ce_crypt)
* x2: src
* w3: nblocks
*/
- PREPARE;
+ SM4_PREPARE(x0)
.Lcrypt_loop_blk:
sub w3, w3, #8;
@@ -337,26 +164,50 @@ SYM_FUNC_START(sm4_ce_cbc_enc)
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
- PREPARE;
+ SM4_PREPARE(x0)
+
+ ld1 {RIV.16b}, [x3]
+
+.Lcbc_enc_loop_4x:
+ cmp w4, #4
+ blt .Lcbc_enc_loop_1x
+
+ sub w4, w4, #4
- ld1 {RIV.16b}, [x3];
+ ld1 {v0.16b-v3.16b}, [x2], #64
-.Lcbc_enc_loop:
- sub w4, w4, #1;
+ eor v0.16b, v0.16b, RIV.16b
+ SM4_CRYPT_BLK(v0)
+ eor v1.16b, v1.16b, v0.16b
+ SM4_CRYPT_BLK(v1)
+ eor v2.16b, v2.16b, v1.16b
+ SM4_CRYPT_BLK(v2)
+ eor v3.16b, v3.16b, v2.16b
+ SM4_CRYPT_BLK(v3)
- ld1 {RTMP0.16b}, [x2], #16;
- eor RIV.16b, RIV.16b, RTMP0.16b;
+ st1 {v0.16b-v3.16b}, [x1], #64
+ mov RIV.16b, v3.16b
- SM4_CRYPT_BLK(RIV);
+ cbz w4, .Lcbc_enc_end
+ b .Lcbc_enc_loop_4x
- st1 {RIV.16b}, [x1], #16;
+.Lcbc_enc_loop_1x:
+ sub w4, w4, #1
- cbnz w4, .Lcbc_enc_loop;
+ ld1 {v0.16b}, [x2], #16
+ eor RIV.16b, RIV.16b, v0.16b
+ SM4_CRYPT_BLK(RIV)
+
+ st1 {RIV.16b}, [x1], #16
+
+ cbnz w4, .Lcbc_enc_loop_1x
+
+.Lcbc_enc_end:
/* store new IV */
- st1 {RIV.16b}, [x3];
+ st1 {RIV.16b}, [x3]
- ret;
+ ret
SYM_FUNC_END(sm4_ce_cbc_enc)
.align 3
@@ -368,82 +219,190 @@ SYM_FUNC_START(sm4_ce_cbc_dec)
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
- PREPARE;
+ SM4_PREPARE(x0)
- ld1 {RIV.16b}, [x3];
+ ld1 {RIV.16b}, [x3]
-.Lcbc_loop_blk:
- sub w4, w4, #8;
- tbnz w4, #31, .Lcbc_tail8;
+.Lcbc_dec_loop_8x:
+ sub w4, w4, #8
+ tbnz w4, #31, .Lcbc_dec_4x
- ld1 {v0.16b-v3.16b}, [x2], #64;
- ld1 {v4.16b-v7.16b}, [x2];
+ ld1 {v0.16b-v3.16b}, [x2], #64
+ ld1 {v4.16b-v7.16b}, [x2], #64
- SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+ rev32 v8.16b, v0.16b
+ rev32 v9.16b, v1.16b
+ rev32 v10.16b, v2.16b
+ rev32 v11.16b, v3.16b
+ rev32 v12.16b, v4.16b
+ rev32 v13.16b, v5.16b
+ rev32 v14.16b, v6.16b
+ rev32 v15.16b, v7.16b
- sub x2, x2, #64;
- eor v0.16b, v0.16b, RIV.16b;
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v1.16b, v1.16b, RTMP0.16b;
- eor v2.16b, v2.16b, RTMP1.16b;
- eor v3.16b, v3.16b, RTMP2.16b;
- st1 {v0.16b-v3.16b}, [x1], #64;
+ SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15)
- eor v4.16b, v4.16b, RTMP3.16b;
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v5.16b, v5.16b, RTMP0.16b;
- eor v6.16b, v6.16b, RTMP1.16b;
- eor v7.16b, v7.16b, RTMP2.16b;
+ eor v8.16b, v8.16b, RIV.16b
+ eor v9.16b, v9.16b, v0.16b
+ eor v10.16b, v10.16b, v1.16b
+ eor v11.16b, v11.16b, v2.16b
+ eor v12.16b, v12.16b, v3.16b
+ eor v13.16b, v13.16b, v4.16b
+ eor v14.16b, v14.16b, v5.16b
+ eor v15.16b, v15.16b, v6.16b
- mov RIV.16b, RTMP3.16b;
- st1 {v4.16b-v7.16b}, [x1], #64;
+ st1 {v8.16b-v11.16b}, [x1], #64
+ st1 {v12.16b-v15.16b}, [x1], #64
- cbz w4, .Lcbc_end;
- b .Lcbc_loop_blk;
+ mov RIV.16b, v7.16b
-.Lcbc_tail8:
- add w4, w4, #8;
- cmp w4, #4;
- blt .Lcbc_tail4;
+ cbz w4, .Lcbc_dec_end
+ b .Lcbc_dec_loop_8x
- sub w4, w4, #4;
+.Lcbc_dec_4x:
+ add w4, w4, #8
+ cmp w4, #4
+ blt .Lcbc_dec_loop_1x
- ld1 {v0.16b-v3.16b}, [x2];
+ sub w4, w4, #4
- SM4_CRYPT_BLK4(v0, v1, v2, v3);
+ ld1 {v0.16b-v3.16b}, [x2], #64
- eor v0.16b, v0.16b, RIV.16b;
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v1.16b, v1.16b, RTMP0.16b;
- eor v2.16b, v2.16b, RTMP1.16b;
- eor v3.16b, v3.16b, RTMP2.16b;
+ rev32 v8.16b, v0.16b
+ rev32 v9.16b, v1.16b
+ rev32 v10.16b, v2.16b
+ rev32 v11.16b, v3.16b
- mov RIV.16b, RTMP3.16b;
- st1 {v0.16b-v3.16b}, [x1], #64;
+ SM4_CRYPT_BLK4_BE(v8, v9, v10, v11)
- cbz w4, .Lcbc_end;
+ eor v8.16b, v8.16b, RIV.16b
+ eor v9.16b, v9.16b, v0.16b
+ eor v10.16b, v10.16b, v1.16b
+ eor v11.16b, v11.16b, v2.16b
-.Lcbc_tail4:
- sub w4, w4, #1;
+ st1 {v8.16b-v11.16b}, [x1], #64
- ld1 {v0.16b}, [x2];
+ mov RIV.16b, v3.16b
- SM4_CRYPT_BLK(v0);
+ cbz w4, .Lcbc_dec_end
- eor v0.16b, v0.16b, RIV.16b;
- ld1 {RIV.16b}, [x2], #16;
- st1 {v0.16b}, [x1], #16;
+.Lcbc_dec_loop_1x:
+ sub w4, w4, #1
+
+ ld1 {v0.16b}, [x2], #16
- cbnz w4, .Lcbc_tail4;
+ rev32 v8.16b, v0.16b
-.Lcbc_end:
+ SM4_CRYPT_BLK_BE(v8)
+
+ eor v8.16b, v8.16b, RIV.16b
+ st1 {v8.16b}, [x1], #16
+
+ mov RIV.16b, v0.16b
+
+ cbnz w4, .Lcbc_dec_loop_1x
+
+.Lcbc_dec_end:
/* store new IV */
- st1 {RIV.16b}, [x3];
+ st1 {RIV.16b}, [x3]
- ret;
+ ret
SYM_FUNC_END(sm4_ce_cbc_dec)
.align 3
+SYM_FUNC_START(sm4_ce_cbc_cts_enc)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: iv (big endian, 128 bit)
+ * w4: nbytes
+ */
+ SM4_PREPARE(x0)
+
+ sub w5, w4, #16
+ uxtw x5, w5
+
+ ld1 {RIV.16b}, [x3]
+
+ ld1 {v0.16b}, [x2]
+ eor RIV.16b, RIV.16b, v0.16b
+ SM4_CRYPT_BLK(RIV)
+
+ /* load permute table */
+ adr_l x6, .Lcts_permute_table
+ add x7, x6, #32
+ add x6, x6, x5
+ sub x7, x7, x5
+ ld1 {v3.16b}, [x6]
+ ld1 {v4.16b}, [x7]
+
+ /* overlapping loads */
+ add x2, x2, x5
+ ld1 {v1.16b}, [x2]
+
+ /* create Cn from En-1 */
+ tbl v0.16b, {RIV.16b}, v3.16b
+ /* padding Pn with zeros */
+ tbl v1.16b, {v1.16b}, v4.16b
+
+ eor v1.16b, v1.16b, RIV.16b
+ SM4_CRYPT_BLK(v1)
+
+ /* overlapping stores */
+ add x5, x1, x5
+ st1 {v0.16b}, [x5]
+ st1 {v1.16b}, [x1]
+
+ ret
+SYM_FUNC_END(sm4_ce_cbc_cts_enc)
+
+.align 3
+SYM_FUNC_START(sm4_ce_cbc_cts_dec)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: iv (big endian, 128 bit)
+ * w4: nbytes
+ */
+ SM4_PREPARE(x0)
+
+ sub w5, w4, #16
+ uxtw x5, w5
+
+ ld1 {RIV.16b}, [x3]
+
+ /* load permute table */
+ adr_l x6, .Lcts_permute_table
+ add x7, x6, #32
+ add x6, x6, x5
+ sub x7, x7, x5
+ ld1 {v3.16b}, [x6]
+ ld1 {v4.16b}, [x7]
+
+ /* overlapping loads */
+ ld1 {v0.16b}, [x2], x5
+ ld1 {v1.16b}, [x2]
+
+ SM4_CRYPT_BLK(v0)
+ /* select the first Ln bytes of Xn to create Pn */
+ tbl v2.16b, {v0.16b}, v3.16b
+ eor v2.16b, v2.16b, v1.16b
+
+ /* overwrite the first Ln bytes with Cn to create En-1 */
+ tbx v0.16b, {v1.16b}, v4.16b
+ SM4_CRYPT_BLK(v0)
+ eor v0.16b, v0.16b, RIV.16b
+
+ /* overlapping stores */
+ add x5, x1, x5
+ st1 {v2.16b}, [x5]
+ st1 {v0.16b}, [x1]
+
+ ret
+SYM_FUNC_END(sm4_ce_cbc_cts_dec)
+
+.align 3
SYM_FUNC_START(sm4_ce_cfb_enc)
/* input:
* x0: round key array, CTX
@@ -452,25 +411,57 @@ SYM_FUNC_START(sm4_ce_cfb_enc)
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
- PREPARE;
+ SM4_PREPARE(x0)
+
+ ld1 {RIV.16b}, [x3]
+
+.Lcfb_enc_loop_4x:
+ cmp w4, #4
+ blt .Lcfb_enc_loop_1x
+
+ sub w4, w4, #4
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+
+ rev32 v8.16b, RIV.16b
+ SM4_CRYPT_BLK_BE(v8)
+ eor v0.16b, v0.16b, v8.16b
+
+ rev32 v8.16b, v0.16b
+ SM4_CRYPT_BLK_BE(v8)
+ eor v1.16b, v1.16b, v8.16b
+
+ rev32 v8.16b, v1.16b
+ SM4_CRYPT_BLK_BE(v8)
+ eor v2.16b, v2.16b, v8.16b
+
+ rev32 v8.16b, v2.16b
+ SM4_CRYPT_BLK_BE(v8)
+ eor v3.16b, v3.16b, v8.16b
+
+ st1 {v0.16b-v3.16b}, [x1], #64
+ mov RIV.16b, v3.16b
+
+ cbz w4, .Lcfb_enc_end
+ b .Lcfb_enc_loop_4x
- ld1 {RIV.16b}, [x3];
+.Lcfb_enc_loop_1x:
+ sub w4, w4, #1
-.Lcfb_enc_loop:
- sub w4, w4, #1;
+ ld1 {v0.16b}, [x2], #16
- SM4_CRYPT_BLK(RIV);
+ SM4_CRYPT_BLK(RIV)
+ eor RIV.16b, RIV.16b, v0.16b
- ld1 {RTMP0.16b}, [x2], #16;
- eor RIV.16b, RIV.16b, RTMP0.16b;
- st1 {RIV.16b}, [x1], #16;
+ st1 {RIV.16b}, [x1], #16
- cbnz w4, .Lcfb_enc_loop;
+ cbnz w4, .Lcfb_enc_loop_1x
+.Lcfb_enc_end:
/* store new IV */
- st1 {RIV.16b}, [x3];
+ st1 {RIV.16b}, [x3]
- ret;
+ ret
SYM_FUNC_END(sm4_ce_cfb_enc)
.align 3
@@ -482,79 +473,91 @@ SYM_FUNC_START(sm4_ce_cfb_dec)
* x3: iv (big endian, 128 bit)
* w4: nblocks
*/
- PREPARE;
+ SM4_PREPARE(x0)
- ld1 {v0.16b}, [x3];
+ ld1 {RIV.16b}, [x3]
-.Lcfb_loop_blk:
- sub w4, w4, #8;
- tbnz w4, #31, .Lcfb_tail8;
+.Lcfb_dec_loop_8x:
+ sub w4, w4, #8
+ tbnz w4, #31, .Lcfb_dec_4x
- ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48;
- ld1 {v4.16b-v7.16b}, [x2];
+ ld1 {v0.16b-v3.16b}, [x2], #64
+ ld1 {v4.16b-v7.16b}, [x2], #64
- SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+ rev32 v8.16b, RIV.16b
+ rev32 v9.16b, v0.16b
+ rev32 v10.16b, v1.16b
+ rev32 v11.16b, v2.16b
+ rev32 v12.16b, v3.16b
+ rev32 v13.16b, v4.16b
+ rev32 v14.16b, v5.16b
+ rev32 v15.16b, v6.16b
- sub x2, x2, #48;
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v0.16b, v0.16b, RTMP0.16b;
- eor v1.16b, v1.16b, RTMP1.16b;
- eor v2.16b, v2.16b, RTMP2.16b;
- eor v3.16b, v3.16b, RTMP3.16b;
- st1 {v0.16b-v3.16b}, [x1], #64;
+ SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15)
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v4.16b, v4.16b, RTMP0.16b;
- eor v5.16b, v5.16b, RTMP1.16b;
- eor v6.16b, v6.16b, RTMP2.16b;
- eor v7.16b, v7.16b, RTMP3.16b;
- st1 {v4.16b-v7.16b}, [x1], #64;
+ mov RIV.16b, v7.16b
- mov v0.16b, RTMP3.16b;
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v5.16b, v5.16b, v13.16b
+ eor v6.16b, v6.16b, v14.16b
+ eor v7.16b, v7.16b, v15.16b
- cbz w4, .Lcfb_end;
- b .Lcfb_loop_blk;
+ st1 {v0.16b-v3.16b}, [x1], #64
+ st1 {v4.16b-v7.16b}, [x1], #64
-.Lcfb_tail8:
- add w4, w4, #8;
- cmp w4, #4;
- blt .Lcfb_tail4;
+ cbz w4, .Lcfb_dec_end
+ b .Lcfb_dec_loop_8x
- sub w4, w4, #4;
+.Lcfb_dec_4x:
+ add w4, w4, #8
+ cmp w4, #4
+ blt .Lcfb_dec_loop_1x
- ld1 {v1.16b, v2.16b, v3.16b}, [x2];
+ sub w4, w4, #4
- SM4_CRYPT_BLK4(v0, v1, v2, v3);
+ ld1 {v0.16b-v3.16b}, [x2], #64
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v0.16b, v0.16b, RTMP0.16b;
- eor v1.16b, v1.16b, RTMP1.16b;
- eor v2.16b, v2.16b, RTMP2.16b;
- eor v3.16b, v3.16b, RTMP3.16b;
- st1 {v0.16b-v3.16b}, [x1], #64;
+ rev32 v8.16b, RIV.16b
+ rev32 v9.16b, v0.16b
+ rev32 v10.16b, v1.16b
+ rev32 v11.16b, v2.16b
- mov v0.16b, RTMP3.16b;
+ SM4_CRYPT_BLK4_BE(v8, v9, v10, v11)
- cbz w4, .Lcfb_end;
+ mov RIV.16b, v3.16b
-.Lcfb_tail4:
- sub w4, w4, #1;
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
- SM4_CRYPT_BLK(v0);
+ st1 {v0.16b-v3.16b}, [x1], #64
- ld1 {RTMP0.16b}, [x2], #16;
- eor v0.16b, v0.16b, RTMP0.16b;
- st1 {v0.16b}, [x1], #16;
+ cbz w4, .Lcfb_dec_end
+
+.Lcfb_dec_loop_1x:
+ sub w4, w4, #1
+
+ ld1 {v0.16b}, [x2], #16
+
+ SM4_CRYPT_BLK(RIV)
- mov v0.16b, RTMP0.16b;
+ eor RIV.16b, RIV.16b, v0.16b
+ st1 {RIV.16b}, [x1], #16
- cbnz w4, .Lcfb_tail4;
+ mov RIV.16b, v0.16b
-.Lcfb_end:
+ cbnz w4, .Lcfb_dec_loop_1x
+
+.Lcfb_dec_end:
/* store new IV */
- st1 {v0.16b}, [x3];
+ st1 {RIV.16b}, [x3]
- ret;
+ ret
SYM_FUNC_END(sm4_ce_cfb_dec)
.align 3
@@ -566,95 +569,525 @@ SYM_FUNC_START(sm4_ce_ctr_enc)
* x3: ctr (big endian, 128 bit)
* w4: nblocks
*/
- PREPARE;
+ SM4_PREPARE(x0)
- ldp x7, x8, [x3];
- rev x7, x7;
- rev x8, x8;
+ ldp x7, x8, [x3]
+ rev x7, x7
+ rev x8, x8
-.Lctr_loop_blk:
- sub w4, w4, #8;
- tbnz w4, #31, .Lctr_tail8;
+.Lctr_loop_8x:
+ sub w4, w4, #8
+ tbnz w4, #31, .Lctr_4x
-#define inc_le128(vctr) \
- mov vctr.d[1], x8; \
- mov vctr.d[0], x7; \
- adds x8, x8, #1; \
- adc x7, x7, xzr; \
- rev64 vctr.16b, vctr.16b;
+#define inc_le128(vctr) \
+ mov vctr.d[1], x8; \
+ mov vctr.d[0], x7; \
+ adds x8, x8, #1; \
+ rev64 vctr.16b, vctr.16b; \
+ adc x7, x7, xzr;
/* construct CTRs */
- inc_le128(v0); /* +0 */
- inc_le128(v1); /* +1 */
- inc_le128(v2); /* +2 */
- inc_le128(v3); /* +3 */
- inc_le128(v4); /* +4 */
- inc_le128(v5); /* +5 */
- inc_le128(v6); /* +6 */
- inc_le128(v7); /* +7 */
+ inc_le128(v0) /* +0 */
+ inc_le128(v1) /* +1 */
+ inc_le128(v2) /* +2 */
+ inc_le128(v3) /* +3 */
+ inc_le128(v4) /* +4 */
+ inc_le128(v5) /* +5 */
+ inc_le128(v6) /* +6 */
+ inc_le128(v7) /* +7 */
+
+ ld1 {v8.16b-v11.16b}, [x2], #64
+ ld1 {v12.16b-v15.16b}, [x2], #64
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v5.16b, v5.16b, v13.16b
+ eor v6.16b, v6.16b, v14.16b
+ eor v7.16b, v7.16b, v15.16b
+
+ st1 {v0.16b-v3.16b}, [x1], #64
+ st1 {v4.16b-v7.16b}, [x1], #64
+
+ cbz w4, .Lctr_end
+ b .Lctr_loop_8x
+
+.Lctr_4x:
+ add w4, w4, #8
+ cmp w4, #4
+ blt .Lctr_loop_1x
+
+ sub w4, w4, #4
- SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+ /* construct CTRs */
+ inc_le128(v0) /* +0 */
+ inc_le128(v1) /* +1 */
+ inc_le128(v2) /* +2 */
+ inc_le128(v3) /* +3 */
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v0.16b, v0.16b, RTMP0.16b;
- eor v1.16b, v1.16b, RTMP1.16b;
- eor v2.16b, v2.16b, RTMP2.16b;
- eor v3.16b, v3.16b, RTMP3.16b;
- st1 {v0.16b-v3.16b}, [x1], #64;
+ ld1 {v8.16b-v11.16b}, [x2], #64
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v4.16b, v4.16b, RTMP0.16b;
- eor v5.16b, v5.16b, RTMP1.16b;
- eor v6.16b, v6.16b, RTMP2.16b;
- eor v7.16b, v7.16b, RTMP3.16b;
- st1 {v4.16b-v7.16b}, [x1], #64;
+ SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
- cbz w4, .Lctr_end;
- b .Lctr_loop_blk;
+ st1 {v0.16b-v3.16b}, [x1], #64
-.Lctr_tail8:
- add w4, w4, #8;
- cmp w4, #4;
- blt .Lctr_tail4;
+ cbz w4, .Lctr_end
- sub w4, w4, #4;
+.Lctr_loop_1x:
+ sub w4, w4, #1
/* construct CTRs */
- inc_le128(v0); /* +0 */
- inc_le128(v1); /* +1 */
- inc_le128(v2); /* +2 */
- inc_le128(v3); /* +3 */
+ inc_le128(v0)
- SM4_CRYPT_BLK4(v0, v1, v2, v3);
+ ld1 {v8.16b}, [x2], #16
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v0.16b, v0.16b, RTMP0.16b;
- eor v1.16b, v1.16b, RTMP1.16b;
- eor v2.16b, v2.16b, RTMP2.16b;
- eor v3.16b, v3.16b, RTMP3.16b;
- st1 {v0.16b-v3.16b}, [x1], #64;
+ SM4_CRYPT_BLK(v0)
- cbz w4, .Lctr_end;
+ eor v0.16b, v0.16b, v8.16b
+ st1 {v0.16b}, [x1], #16
-.Lctr_tail4:
- sub w4, w4, #1;
+ cbnz w4, .Lctr_loop_1x
- /* construct CTRs */
- inc_le128(v0);
+.Lctr_end:
+ /* store new CTR */
+ rev x7, x7
+ rev x8, x8
+ stp x7, x8, [x3]
- SM4_CRYPT_BLK(v0);
+ ret
+SYM_FUNC_END(sm4_ce_ctr_enc)
- ld1 {RTMP0.16b}, [x2], #16;
- eor v0.16b, v0.16b, RTMP0.16b;
- st1 {v0.16b}, [x1], #16;
- cbnz w4, .Lctr_tail4;
+#define tweak_next(vt, vin, RTMP) \
+ sshr RTMP.2d, vin.2d, #63; \
+ and RTMP.16b, RTMP.16b, RMASK.16b; \
+ add vt.2d, vin.2d, vin.2d; \
+ ext RTMP.16b, RTMP.16b, RTMP.16b, #8; \
+ eor vt.16b, vt.16b, RTMP.16b;
-.Lctr_end:
- /* store new CTR */
- rev x7, x7;
- rev x8, x8;
- stp x7, x8, [x3];
+.align 3
+SYM_FUNC_START(sm4_ce_xts_enc)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: tweak (big endian, 128 bit)
+ * w4: nbytes
+ * x5: round key array for IV
+ */
+ ld1 {v8.16b}, [x3]
- ret;
-SYM_FUNC_END(sm4_ce_ctr_enc)
+ cbz x5, .Lxts_enc_nofirst
+
+ SM4_PREPARE(x5)
+
+ /* Generate first tweak */
+ SM4_CRYPT_BLK(v8)
+
+.Lxts_enc_nofirst:
+ SM4_PREPARE(x0)
+
+ ands w5, w4, #15
+ lsr w4, w4, #4
+ sub w6, w4, #1
+ csel w4, w4, w6, eq
+ uxtw x5, w5
+
+ movi RMASK.2s, #0x1
+ movi RTMP0.2s, #0x87
+ uzp1 RMASK.4s, RMASK.4s, RTMP0.4s
+
+ cbz w4, .Lxts_enc_cts
+
+.Lxts_enc_loop_8x:
+ sub w4, w4, #8
+ tbnz w4, #31, .Lxts_enc_4x
+
+ tweak_next( v9, v8, RTMP0)
+ tweak_next(v10, v9, RTMP1)
+ tweak_next(v11, v10, RTMP2)
+ tweak_next(v12, v11, RTMP3)
+ tweak_next(v13, v12, RTMP0)
+ tweak_next(v14, v13, RTMP1)
+ tweak_next(v15, v14, RTMP2)
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+ ld1 {v4.16b-v7.16b}, [x2], #64
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v5.16b, v5.16b, v13.16b
+ eor v6.16b, v6.16b, v14.16b
+ eor v7.16b, v7.16b, v15.16b
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v5.16b, v5.16b, v13.16b
+ eor v6.16b, v6.16b, v14.16b
+ eor v7.16b, v7.16b, v15.16b
+ st1 {v0.16b-v3.16b}, [x1], #64
+ st1 {v4.16b-v7.16b}, [x1], #64
+
+ tweak_next(v8, v15, RTMP3)
+
+ cbz w4, .Lxts_enc_cts
+ b .Lxts_enc_loop_8x
+
+.Lxts_enc_4x:
+ add w4, w4, #8
+ cmp w4, #4
+ blt .Lxts_enc_loop_1x
+
+ sub w4, w4, #4
+
+ tweak_next( v9, v8, RTMP0)
+ tweak_next(v10, v9, RTMP1)
+ tweak_next(v11, v10, RTMP2)
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+ st1 {v0.16b-v3.16b}, [x1], #64
+
+ tweak_next(v8, v11, RTMP3)
+
+ cbz w4, .Lxts_enc_cts
+
+.Lxts_enc_loop_1x:
+ sub w4, w4, #1
+
+ ld1 {v0.16b}, [x2], #16
+ eor v0.16b, v0.16b, v8.16b
+
+ SM4_CRYPT_BLK(v0)
+
+ eor v0.16b, v0.16b, v8.16b
+ st1 {v0.16b}, [x1], #16
+
+ tweak_next(v8, v8, RTMP0)
+
+ cbnz w4, .Lxts_enc_loop_1x
+
+.Lxts_enc_cts:
+ cbz x5, .Lxts_enc_end
+
+ /* cipher text stealing */
+
+ tweak_next(v9, v8, RTMP0)
+ ld1 {v0.16b}, [x2]
+ eor v0.16b, v0.16b, v8.16b
+ SM4_CRYPT_BLK(v0)
+ eor v0.16b, v0.16b, v8.16b
+
+ /* load permute table */
+ adr_l x6, .Lcts_permute_table
+ add x7, x6, #32
+ add x6, x6, x5
+ sub x7, x7, x5
+ ld1 {v3.16b}, [x6]
+ ld1 {v4.16b}, [x7]
+
+ /* overlapping loads */
+ add x2, x2, x5
+ ld1 {v1.16b}, [x2]
+
+ /* create Cn from En-1 */
+ tbl v2.16b, {v0.16b}, v3.16b
+ /* padding Pn with En-1 at the end */
+ tbx v0.16b, {v1.16b}, v4.16b
+
+ eor v0.16b, v0.16b, v9.16b
+ SM4_CRYPT_BLK(v0)
+ eor v0.16b, v0.16b, v9.16b
+
+
+ /* overlapping stores */
+ add x5, x1, x5
+ st1 {v2.16b}, [x5]
+ st1 {v0.16b}, [x1]
+
+ b .Lxts_enc_ret
+
+.Lxts_enc_end:
+ /* store new tweak */
+ st1 {v8.16b}, [x3]
+
+.Lxts_enc_ret:
+ ret
+SYM_FUNC_END(sm4_ce_xts_enc)
+
+.align 3
+SYM_FUNC_START(sm4_ce_xts_dec)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: tweak (big endian, 128 bit)
+ * w4: nbytes
+ * x5: round key array for IV
+ */
+ ld1 {v8.16b}, [x3]
+
+ cbz x5, .Lxts_dec_nofirst
+
+ SM4_PREPARE(x5)
+
+ /* Generate first tweak */
+ SM4_CRYPT_BLK(v8)
+
+.Lxts_dec_nofirst:
+ SM4_PREPARE(x0)
+
+ ands w5, w4, #15
+ lsr w4, w4, #4
+ sub w6, w4, #1
+ csel w4, w4, w6, eq
+ uxtw x5, w5
+
+ movi RMASK.2s, #0x1
+ movi RTMP0.2s, #0x87
+ uzp1 RMASK.4s, RMASK.4s, RTMP0.4s
+
+ cbz w4, .Lxts_dec_cts
+
+.Lxts_dec_loop_8x:
+ sub w4, w4, #8
+ tbnz w4, #31, .Lxts_dec_4x
+
+ tweak_next( v9, v8, RTMP0)
+ tweak_next(v10, v9, RTMP1)
+ tweak_next(v11, v10, RTMP2)
+ tweak_next(v12, v11, RTMP3)
+ tweak_next(v13, v12, RTMP0)
+ tweak_next(v14, v13, RTMP1)
+ tweak_next(v15, v14, RTMP2)
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+ ld1 {v4.16b-v7.16b}, [x2], #64
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v5.16b, v5.16b, v13.16b
+ eor v6.16b, v6.16b, v14.16b
+ eor v7.16b, v7.16b, v15.16b
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v5.16b, v5.16b, v13.16b
+ eor v6.16b, v6.16b, v14.16b
+ eor v7.16b, v7.16b, v15.16b
+ st1 {v0.16b-v3.16b}, [x1], #64
+ st1 {v4.16b-v7.16b}, [x1], #64
+
+ tweak_next(v8, v15, RTMP3)
+
+ cbz w4, .Lxts_dec_cts
+ b .Lxts_dec_loop_8x
+
+.Lxts_dec_4x:
+ add w4, w4, #8
+ cmp w4, #4
+ blt .Lxts_dec_loop_1x
+
+ sub w4, w4, #4
+
+ tweak_next( v9, v8, RTMP0)
+ tweak_next(v10, v9, RTMP1)
+ tweak_next(v11, v10, RTMP2)
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+ eor v0.16b, v0.16b, v8.16b
+ eor v1.16b, v1.16b, v9.16b
+ eor v2.16b, v2.16b, v10.16b
+ eor v3.16b, v3.16b, v11.16b
+ st1 {v0.16b-v3.16b}, [x1], #64
+
+ tweak_next(v8, v11, RTMP3)
+
+ cbz w4, .Lxts_dec_cts
+
+.Lxts_dec_loop_1x:
+ sub w4, w4, #1
+
+ ld1 {v0.16b}, [x2], #16
+ eor v0.16b, v0.16b, v8.16b
+
+ SM4_CRYPT_BLK(v0)
+
+ eor v0.16b, v0.16b, v8.16b
+ st1 {v0.16b}, [x1], #16
+
+ tweak_next(v8, v8, RTMP0)
+
+ cbnz w4, .Lxts_dec_loop_1x
+
+.Lxts_dec_cts:
+ cbz x5, .Lxts_dec_end
+
+ /* cipher text stealing */
+
+ tweak_next(v9, v8, RTMP0)
+ ld1 {v0.16b}, [x2]
+ eor v0.16b, v0.16b, v9.16b
+ SM4_CRYPT_BLK(v0)
+ eor v0.16b, v0.16b, v9.16b
+
+ /* load permute table */
+ adr_l x6, .Lcts_permute_table
+ add x7, x6, #32
+ add x6, x6, x5
+ sub x7, x7, x5
+ ld1 {v3.16b}, [x6]
+ ld1 {v4.16b}, [x7]
+
+ /* overlapping loads */
+ add x2, x2, x5
+ ld1 {v1.16b}, [x2]
+
+ /* create Cn from En-1 */
+ tbl v2.16b, {v0.16b}, v3.16b
+ /* padding Pn with En-1 at the end */
+ tbx v0.16b, {v1.16b}, v4.16b
+
+ eor v0.16b, v0.16b, v8.16b
+ SM4_CRYPT_BLK(v0)
+ eor v0.16b, v0.16b, v8.16b
+
+
+ /* overlapping stores */
+ add x5, x1, x5
+ st1 {v2.16b}, [x5]
+ st1 {v0.16b}, [x1]
+
+ b .Lxts_dec_ret
+
+.Lxts_dec_end:
+ /* store new tweak */
+ st1 {v8.16b}, [x3]
+
+.Lxts_dec_ret:
+ ret
+SYM_FUNC_END(sm4_ce_xts_dec)
+
+.align 3
+SYM_FUNC_START(sm4_ce_mac_update)
+ /* input:
+ * x0: round key array, CTX
+ * x1: digest
+ * x2: src
+ * w3: nblocks
+ * w4: enc_before
+ * w5: enc_after
+ */
+ SM4_PREPARE(x0)
+
+ ld1 {RMAC.16b}, [x1]
+
+ cbz w4, .Lmac_update
+
+ SM4_CRYPT_BLK(RMAC)
+
+.Lmac_update:
+ cbz w3, .Lmac_ret
+
+ sub w6, w3, #1
+ cmp w5, wzr
+ csel w3, w3, w6, ne
+
+ cbz w3, .Lmac_end
+
+.Lmac_loop_4x:
+ cmp w3, #4
+ blt .Lmac_loop_1x
+
+ sub w3, w3, #4
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+
+ eor RMAC.16b, RMAC.16b, v0.16b
+ SM4_CRYPT_BLK(RMAC)
+ eor RMAC.16b, RMAC.16b, v1.16b
+ SM4_CRYPT_BLK(RMAC)
+ eor RMAC.16b, RMAC.16b, v2.16b
+ SM4_CRYPT_BLK(RMAC)
+ eor RMAC.16b, RMAC.16b, v3.16b
+ SM4_CRYPT_BLK(RMAC)
+
+ cbz w3, .Lmac_end
+ b .Lmac_loop_4x
+
+.Lmac_loop_1x:
+ sub w3, w3, #1
+
+ ld1 {v0.16b}, [x2], #16
+
+ eor RMAC.16b, RMAC.16b, v0.16b
+ SM4_CRYPT_BLK(RMAC)
+
+ cbnz w3, .Lmac_loop_1x
+
+
+.Lmac_end:
+ cbnz w5, .Lmac_ret
+
+ ld1 {v0.16b}, [x2], #16
+ eor RMAC.16b, RMAC.16b, v0.16b
+
+.Lmac_ret:
+ st1 {RMAC.16b}, [x1]
+ ret
+SYM_FUNC_END(sm4_ce_mac_update)
+
+
+ .section ".rodata", "a"
+ .align 4
+.Lbswap128_mask:
+ .byte 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b
+ .byte 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03
+
+.Lcts_permute_table:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
diff --git a/arch/arm64/crypto/sm4-ce-gcm-core.S b/arch/arm64/crypto/sm4-ce-gcm-core.S
new file mode 100644
index 000000000000..7aa3ec18a289
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-gcm-core.S
@@ -0,0 +1,741 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
+ * as specified in rfc8998
+ * https://datatracker.ietf.org/doc/html/rfc8998
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <[email protected]>
+ * Copyright (C) 2022 Tianjia Zhang <[email protected]>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "sm4-ce-asm.h"
+
+.arch armv8-a+crypto
+
+.irp b, 0, 1, 2, 3, 24, 25, 26, 27, 28, 29, 30, 31
+ .set .Lv\b\().4s, \b
+.endr
+
+.macro sm4e, vd, vn
+ .inst 0xcec08400 | (.L\vn << 5) | .L\vd
+.endm
+
+/* Register macros */
+
+/* Used for both encryption and decryption */
+#define RHASH v21
+#define RRCONST v22
+#define RZERO v23
+
+/* Helper macros. */
+
+/*
+ * input: m0, m1
+ * output: r0:r1 (low 128-bits in r0, high in r1)
+ */
+#define PMUL_128x128(r0, r1, m0, m1, T0, T1) \
+ ext T0.16b, m1.16b, m1.16b, #8; \
+ pmull r0.1q, m0.1d, m1.1d; \
+ pmull T1.1q, m0.1d, T0.1d; \
+ pmull2 T0.1q, m0.2d, T0.2d; \
+ pmull2 r1.1q, m0.2d, m1.2d; \
+ eor T0.16b, T0.16b, T1.16b; \
+ ext T1.16b, RZERO.16b, T0.16b, #8; \
+ ext T0.16b, T0.16b, RZERO.16b, #8; \
+ eor r0.16b, r0.16b, T1.16b; \
+ eor r1.16b, r1.16b, T0.16b;
+
+#define PMUL_128x128_4x(r0, r1, m0, m1, T0, T1, \
+ r2, r3, m2, m3, T2, T3, \
+ r4, r5, m4, m5, T4, T5, \
+ r6, r7, m6, m7, T6, T7) \
+ ext T0.16b, m1.16b, m1.16b, #8; \
+ ext T2.16b, m3.16b, m3.16b, #8; \
+ ext T4.16b, m5.16b, m5.16b, #8; \
+ ext T6.16b, m7.16b, m7.16b, #8; \
+ pmull r0.1q, m0.1d, m1.1d; \
+ pmull r2.1q, m2.1d, m3.1d; \
+ pmull r4.1q, m4.1d, m5.1d; \
+ pmull r6.1q, m6.1d, m7.1d; \
+ pmull T1.1q, m0.1d, T0.1d; \
+ pmull T3.1q, m2.1d, T2.1d; \
+ pmull T5.1q, m4.1d, T4.1d; \
+ pmull T7.1q, m6.1d, T6.1d; \
+ pmull2 T0.1q, m0.2d, T0.2d; \
+ pmull2 T2.1q, m2.2d, T2.2d; \
+ pmull2 T4.1q, m4.2d, T4.2d; \
+ pmull2 T6.1q, m6.2d, T6.2d; \
+ pmull2 r1.1q, m0.2d, m1.2d; \
+ pmull2 r3.1q, m2.2d, m3.2d; \
+ pmull2 r5.1q, m4.2d, m5.2d; \
+ pmull2 r7.1q, m6.2d, m7.2d; \
+ eor T0.16b, T0.16b, T1.16b; \
+ eor T2.16b, T2.16b, T3.16b; \
+ eor T4.16b, T4.16b, T5.16b; \
+ eor T6.16b, T6.16b, T7.16b; \
+ ext T1.16b, RZERO.16b, T0.16b, #8; \
+ ext T3.16b, RZERO.16b, T2.16b, #8; \
+ ext T5.16b, RZERO.16b, T4.16b, #8; \
+ ext T7.16b, RZERO.16b, T6.16b, #8; \
+ ext T0.16b, T0.16b, RZERO.16b, #8; \
+ ext T2.16b, T2.16b, RZERO.16b, #8; \
+ ext T4.16b, T4.16b, RZERO.16b, #8; \
+ ext T6.16b, T6.16b, RZERO.16b, #8; \
+ eor r0.16b, r0.16b, T1.16b; \
+ eor r2.16b, r2.16b, T3.16b; \
+ eor r4.16b, r4.16b, T5.16b; \
+ eor r6.16b, r6.16b, T7.16b; \
+ eor r1.16b, r1.16b, T0.16b; \
+ eor r3.16b, r3.16b, T2.16b; \
+ eor r5.16b, r5.16b, T4.16b; \
+ eor r7.16b, r7.16b, T6.16b;
+
+/*
+ * input: r0:r1 (low 128-bits in r0, high in r1)
+ * output: a
+ */
+#define REDUCTION(a, r0, r1, rconst, T0, T1) \
+ pmull2 T0.1q, r1.2d, rconst.2d; \
+ ext T1.16b, T0.16b, RZERO.16b, #8; \
+ ext T0.16b, RZERO.16b, T0.16b, #8; \
+ eor r1.16b, r1.16b, T1.16b; \
+ eor r0.16b, r0.16b, T0.16b; \
+ pmull T0.1q, r1.1d, rconst.1d; \
+ eor a.16b, r0.16b, T0.16b;
+
+#define SM4_CRYPT_PMUL_128x128_BLK(b0, r0, r1, m0, m1, T0, T1) \
+ rev32 b0.16b, b0.16b; \
+ ext T0.16b, m1.16b, m1.16b, #8; \
+ sm4e b0.4s, v24.4s; \
+ pmull r0.1q, m0.1d, m1.1d; \
+ sm4e b0.4s, v25.4s; \
+ pmull T1.1q, m0.1d, T0.1d; \
+ sm4e b0.4s, v26.4s; \
+ pmull2 T0.1q, m0.2d, T0.2d; \
+ sm4e b0.4s, v27.4s; \
+ pmull2 r1.1q, m0.2d, m1.2d; \
+ sm4e b0.4s, v28.4s; \
+ eor T0.16b, T0.16b, T1.16b; \
+ sm4e b0.4s, v29.4s; \
+ ext T1.16b, RZERO.16b, T0.16b, #8; \
+ sm4e b0.4s, v30.4s; \
+ ext T0.16b, T0.16b, RZERO.16b, #8; \
+ sm4e b0.4s, v31.4s; \
+ eor r0.16b, r0.16b, T1.16b; \
+ rev64 b0.4s, b0.4s; \
+ eor r1.16b, r1.16b, T0.16b; \
+ ext b0.16b, b0.16b, b0.16b, #8; \
+ rev32 b0.16b, b0.16b;
+
+#define SM4_CRYPT_PMUL_128x128_BLK3(b0, b1, b2, \
+ r0, r1, m0, m1, T0, T1, \
+ r2, r3, m2, m3, T2, T3, \
+ r4, r5, m4, m5, T4, T5) \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ ext T0.16b, m1.16b, m1.16b, #8; \
+ ext T2.16b, m3.16b, m3.16b, #8; \
+ ext T4.16b, m5.16b, m5.16b, #8; \
+ sm4e b0.4s, v24.4s; \
+ sm4e b1.4s, v24.4s; \
+ sm4e b2.4s, v24.4s; \
+ pmull r0.1q, m0.1d, m1.1d; \
+ pmull r2.1q, m2.1d, m3.1d; \
+ pmull r4.1q, m4.1d, m5.1d; \
+ sm4e b0.4s, v25.4s; \
+ sm4e b1.4s, v25.4s; \
+ sm4e b2.4s, v25.4s; \
+ pmull T1.1q, m0.1d, T0.1d; \
+ pmull T3.1q, m2.1d, T2.1d; \
+ pmull T5.1q, m4.1d, T4.1d; \
+ sm4e b0.4s, v26.4s; \
+ sm4e b1.4s, v26.4s; \
+ sm4e b2.4s, v26.4s; \
+ pmull2 T0.1q, m0.2d, T0.2d; \
+ pmull2 T2.1q, m2.2d, T2.2d; \
+ pmull2 T4.1q, m4.2d, T4.2d; \
+ sm4e b0.4s, v27.4s; \
+ sm4e b1.4s, v27.4s; \
+ sm4e b2.4s, v27.4s; \
+ pmull2 r1.1q, m0.2d, m1.2d; \
+ pmull2 r3.1q, m2.2d, m3.2d; \
+ pmull2 r5.1q, m4.2d, m5.2d; \
+ sm4e b0.4s, v28.4s; \
+ sm4e b1.4s, v28.4s; \
+ sm4e b2.4s, v28.4s; \
+ eor T0.16b, T0.16b, T1.16b; \
+ eor T2.16b, T2.16b, T3.16b; \
+ eor T4.16b, T4.16b, T5.16b; \
+ sm4e b0.4s, v29.4s; \
+ sm4e b1.4s, v29.4s; \
+ sm4e b2.4s, v29.4s; \
+ ext T1.16b, RZERO.16b, T0.16b, #8; \
+ ext T3.16b, RZERO.16b, T2.16b, #8; \
+ ext T5.16b, RZERO.16b, T4.16b, #8; \
+ sm4e b0.4s, v30.4s; \
+ sm4e b1.4s, v30.4s; \
+ sm4e b2.4s, v30.4s; \
+ ext T0.16b, T0.16b, RZERO.16b, #8; \
+ ext T2.16b, T2.16b, RZERO.16b, #8; \
+ ext T4.16b, T4.16b, RZERO.16b, #8; \
+ sm4e b0.4s, v31.4s; \
+ sm4e b1.4s, v31.4s; \
+ sm4e b2.4s, v31.4s; \
+ eor r0.16b, r0.16b, T1.16b; \
+ eor r2.16b, r2.16b, T3.16b; \
+ eor r4.16b, r4.16b, T5.16b; \
+ rev64 b0.4s, b0.4s; \
+ rev64 b1.4s, b1.4s; \
+ rev64 b2.4s, b2.4s; \
+ eor r1.16b, r1.16b, T0.16b; \
+ eor r3.16b, r3.16b, T2.16b; \
+ eor r5.16b, r5.16b, T4.16b; \
+ ext b0.16b, b0.16b, b0.16b, #8; \
+ ext b1.16b, b1.16b, b1.16b, #8; \
+ ext b2.16b, b2.16b, b2.16b, #8; \
+ eor r0.16b, r0.16b, r2.16b; \
+ eor r1.16b, r1.16b, r3.16b; \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ eor r0.16b, r0.16b, r4.16b; \
+ eor r1.16b, r1.16b, r5.16b;
+
+#define inc32_le128(vctr) \
+ mov vctr.d[1], x9; \
+ add w6, w9, #1; \
+ mov vctr.d[0], x8; \
+ bfi x9, x6, #0, #32; \
+ rev64 vctr.16b, vctr.16b;
+
+#define GTAG_HASH_LENGTHS(vctr0, vlen) \
+ ld1 {vlen.16b}, [x7]; \
+ /* construct CTR0 */ \
+ /* the lower 32-bits of initial IV is always be32(1) */ \
+ mov x6, #0x1; \
+ bfi x9, x6, #0, #32; \
+ mov vctr0.d[0], x8; \
+ mov vctr0.d[1], x9; \
+ rbit vlen.16b, vlen.16b; \
+ rev64 vctr0.16b, vctr0.16b; \
+ /* authtag = GCTR(CTR0, GHASH) */ \
+ eor RHASH.16b, RHASH.16b, vlen.16b; \
+ SM4_CRYPT_PMUL_128x128_BLK(vctr0, RR0, RR1, RHASH, RH1, \
+ RTMP0, RTMP1); \
+ REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3); \
+ rbit RHASH.16b, RHASH.16b; \
+ eor RHASH.16b, RHASH.16b, vctr0.16b;
+
+
+/* Register macros for encrypt and ghash */
+
+/* can be the same as input v0-v3 */
+#define RR1 v0
+#define RR3 v1
+#define RR5 v2
+#define RR7 v3
+
+#define RR0 v4
+#define RR2 v5
+#define RR4 v6
+#define RR6 v7
+
+#define RTMP0 v8
+#define RTMP1 v9
+#define RTMP2 v10
+#define RTMP3 v11
+#define RTMP4 v12
+#define RTMP5 v13
+#define RTMP6 v14
+#define RTMP7 v15
+
+#define RH1 v16
+#define RH2 v17
+#define RH3 v18
+#define RH4 v19
+
+.align 3
+SYM_FUNC_START(sm4_ce_pmull_ghash_setup)
+ /* input:
+ * x0: round key array, CTX
+ * x1: ghash table
+ */
+ SM4_PREPARE(x0)
+
+ adr_l x2, .Lghash_rconst
+ ld1r {RRCONST.2d}, [x2]
+
+ eor RZERO.16b, RZERO.16b, RZERO.16b
+
+ /* H = E(K, 0^128) */
+ rev32 v0.16b, RZERO.16b
+ SM4_CRYPT_BLK_BE(v0)
+
+ /* H ^ 1 */
+ rbit RH1.16b, v0.16b
+
+ /* H ^ 2 */
+ PMUL_128x128(RR0, RR1, RH1, RH1, RTMP0, RTMP1)
+ REDUCTION(RH2, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+ /* H ^ 3 */
+ PMUL_128x128(RR0, RR1, RH2, RH1, RTMP0, RTMP1)
+ REDUCTION(RH3, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+ /* H ^ 4 */
+ PMUL_128x128(RR0, RR1, RH2, RH2, RTMP0, RTMP1)
+ REDUCTION(RH4, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+ st1 {RH1.16b-RH4.16b}, [x1]
+
+ ret
+SYM_FUNC_END(sm4_ce_pmull_ghash_setup)
+
+.align 3
+SYM_FUNC_START(pmull_ghash_update)
+ /* input:
+ * x0: ghash table
+ * x1: ghash result
+ * x2: src
+ * w3: nblocks
+ */
+ ld1 {RH1.16b-RH4.16b}, [x0]
+
+ ld1 {RHASH.16b}, [x1]
+ rbit RHASH.16b, RHASH.16b
+
+ adr_l x4, .Lghash_rconst
+ ld1r {RRCONST.2d}, [x4]
+
+ eor RZERO.16b, RZERO.16b, RZERO.16b
+
+.Lghash_loop_4x:
+ cmp w3, #4
+ blt .Lghash_loop_1x
+
+ sub w3, w3, #4
+
+ ld1 {v0.16b-v3.16b}, [x2], #64
+
+ rbit v0.16b, v0.16b
+ rbit v1.16b, v1.16b
+ rbit v2.16b, v2.16b
+ rbit v3.16b, v3.16b
+
+ /*
+ * (in0 ^ HASH) * H^4 => rr0:rr1
+ * (in1) * H^3 => rr2:rr3
+ * (in2) * H^2 => rr4:rr5
+ * (in3) * H^1 => rr6:rr7
+ */
+ eor RHASH.16b, RHASH.16b, v0.16b
+
+ PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1,
+ RR2, RR3, v1, RH3, RTMP2, RTMP3,
+ RR4, RR5, v2, RH2, RTMP4, RTMP5,
+ RR6, RR7, v3, RH1, RTMP6, RTMP7)
+
+ eor RR0.16b, RR0.16b, RR2.16b
+ eor RR1.16b, RR1.16b, RR3.16b
+ eor RR0.16b, RR0.16b, RR4.16b
+ eor RR1.16b, RR1.16b, RR5.16b
+ eor RR0.16b, RR0.16b, RR6.16b
+ eor RR1.16b, RR1.16b, RR7.16b
+
+ REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
+
+ cbz w3, .Lghash_end
+ b .Lghash_loop_4x
+
+.Lghash_loop_1x:
+ sub w3, w3, #1
+
+ ld1 {v0.16b}, [x2], #16
+ rbit v0.16b, v0.16b
+ eor RHASH.16b, RHASH.16b, v0.16b
+
+ PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+ REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+ cbnz w3, .Lghash_loop_1x
+
+.Lghash_end:
+ rbit RHASH.16b, RHASH.16b
+ st1 {RHASH.2d}, [x1]
+
+ ret
+SYM_FUNC_END(pmull_ghash_update)
+
+.align 3
+SYM_FUNC_START(sm4_ce_pmull_gcm_enc)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: ctr (big endian, 128 bit)
+ * w4: nbytes
+ * x5: ghash result
+ * x6: ghash table
+ * x7: lengths (only for last block)
+ */
+ SM4_PREPARE(x0)
+
+ ldp x8, x9, [x3]
+ rev x8, x8
+ rev x9, x9
+
+ ld1 {RH1.16b-RH4.16b}, [x6]
+
+ ld1 {RHASH.16b}, [x5]
+ rbit RHASH.16b, RHASH.16b
+
+ adr_l x6, .Lghash_rconst
+ ld1r {RRCONST.2d}, [x6]
+
+ eor RZERO.16b, RZERO.16b, RZERO.16b
+
+ cbz w4, .Lgcm_enc_hash_len
+
+.Lgcm_enc_loop_4x:
+ cmp w4, #(4 * 16)
+ blt .Lgcm_enc_loop_1x
+
+ sub w4, w4, #(4 * 16)
+
+ /* construct CTRs */
+ inc32_le128(v0) /* +0 */
+ inc32_le128(v1) /* +1 */
+ inc32_le128(v2) /* +2 */
+ inc32_le128(v3) /* +3 */
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+ eor v0.16b, v0.16b, RTMP0.16b
+ eor v1.16b, v1.16b, RTMP1.16b
+ eor v2.16b, v2.16b, RTMP2.16b
+ eor v3.16b, v3.16b, RTMP3.16b
+ st1 {v0.16b-v3.16b}, [x1], #64
+
+ /* ghash update */
+
+ rbit v0.16b, v0.16b
+ rbit v1.16b, v1.16b
+ rbit v2.16b, v2.16b
+ rbit v3.16b, v3.16b
+
+ /*
+ * (in0 ^ HASH) * H^4 => rr0:rr1
+ * (in1) * H^3 => rr2:rr3
+ * (in2) * H^2 => rr4:rr5
+ * (in3) * H^1 => rr6:rr7
+ */
+ eor RHASH.16b, RHASH.16b, v0.16b
+
+ PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1,
+ RR2, RR3, v1, RH3, RTMP2, RTMP3,
+ RR4, RR5, v2, RH2, RTMP4, RTMP5,
+ RR6, RR7, v3, RH1, RTMP6, RTMP7)
+
+ eor RR0.16b, RR0.16b, RR2.16b
+ eor RR1.16b, RR1.16b, RR3.16b
+ eor RR0.16b, RR0.16b, RR4.16b
+ eor RR1.16b, RR1.16b, RR5.16b
+ eor RR0.16b, RR0.16b, RR6.16b
+ eor RR1.16b, RR1.16b, RR7.16b
+
+ REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
+
+ cbz w4, .Lgcm_enc_hash_len
+ b .Lgcm_enc_loop_4x
+
+.Lgcm_enc_loop_1x:
+ cmp w4, #16
+ blt .Lgcm_enc_tail
+
+ sub w4, w4, #16
+
+ /* construct CTRs */
+ inc32_le128(v0)
+
+ ld1 {RTMP0.16b}, [x2], #16
+
+ SM4_CRYPT_BLK(v0)
+
+ eor v0.16b, v0.16b, RTMP0.16b
+ st1 {v0.16b}, [x1], #16
+
+ /* ghash update */
+ rbit v0.16b, v0.16b
+ eor RHASH.16b, RHASH.16b, v0.16b
+ PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+ REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+ cbz w4, .Lgcm_enc_hash_len
+ b .Lgcm_enc_loop_1x
+
+.Lgcm_enc_tail:
+ /* construct CTRs */
+ inc32_le128(v0)
+ SM4_CRYPT_BLK(v0)
+
+ /* load permute table */
+ adr_l x0, .Lcts_permute_table
+ add x0, x0, #32
+ sub x0, x0, w4, uxtw
+ ld1 {v3.16b}, [x0]
+
+.Lgcm_enc_tail_loop:
+ /* do encrypt */
+ ldrb w0, [x2], #1 /* get 1 byte from input */
+ umov w6, v0.b[0] /* get top crypted byte */
+ eor w6, w6, w0 /* w6 = CTR ^ input */
+ strb w6, [x1], #1 /* store out byte */
+
+ /* shift right out one byte */
+ ext v0.16b, v0.16b, v0.16b, #1
+ /* the last ciphertext is placed in high bytes */
+ ins v0.b[15], w6
+
+ subs w4, w4, #1
+ bne .Lgcm_enc_tail_loop
+
+ /* padding last block with zeros */
+ tbl v0.16b, {v0.16b}, v3.16b
+
+ /* ghash update */
+ rbit v0.16b, v0.16b
+ eor RHASH.16b, RHASH.16b, v0.16b
+ PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+ REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+.Lgcm_enc_hash_len:
+ cbz x7, .Lgcm_enc_end
+
+ GTAG_HASH_LENGTHS(v1, v3)
+
+ b .Lgcm_enc_ret
+
+.Lgcm_enc_end:
+ /* store new CTR */
+ rev x8, x8
+ rev x9, x9
+ stp x8, x9, [x3]
+
+ rbit RHASH.16b, RHASH.16b
+
+.Lgcm_enc_ret:
+ /* store new MAC */
+ st1 {RHASH.2d}, [x5]
+
+ ret
+SYM_FUNC_END(sm4_ce_pmull_gcm_enc)
+
+#undef RR1
+#undef RR3
+#undef RR5
+#undef RR7
+#undef RR0
+#undef RR2
+#undef RR4
+#undef RR6
+#undef RTMP0
+#undef RTMP1
+#undef RTMP2
+#undef RTMP3
+#undef RTMP4
+#undef RTMP5
+#undef RTMP6
+#undef RTMP7
+#undef RH1
+#undef RH2
+#undef RH3
+#undef RH4
+
+
+/* Register macros for decrypt */
+
+/* v0-v2 for building CTRs, v3-v5 for saving inputs */
+
+#define RR1 v6
+#define RR3 v7
+#define RR5 v8
+
+#define RR0 v9
+#define RR2 v10
+#define RR4 v11
+
+#define RTMP0 v12
+#define RTMP1 v13
+#define RTMP2 v14
+#define RTMP3 v15
+#define RTMP4 v16
+#define RTMP5 v17
+
+#define RH1 v18
+#define RH2 v19
+#define RH3 v20
+
+.align 3
+SYM_FUNC_START(sm4_ce_pmull_gcm_dec)
+ /* input:
+ * x0: round key array, CTX
+ * x1: dst
+ * x2: src
+ * x3: ctr (big endian, 128 bit)
+ * w4: nbytes
+ * x5: ghash result
+ * x6: ghash table
+ * x7: lengths (only for last block)
+ */
+ SM4_PREPARE(x0)
+
+ ldp x8, x9, [x3]
+ rev x8, x8
+ rev x9, x9
+
+ ld1 {RH1.16b-RH3.16b}, [x6]
+
+ ld1 {RHASH.16b}, [x5]
+ rbit RHASH.16b, RHASH.16b
+
+ adr_l x6, .Lghash_rconst
+ ld1r {RRCONST.2d}, [x6]
+
+ eor RZERO.16b, RZERO.16b, RZERO.16b
+
+ cbz w4, .Lgcm_dec_hash_len
+
+.Lgcm_dec_loop_3x:
+ cmp w4, #(3 * 16)
+ blt .Lgcm_dec_loop_1x
+
+ sub w4, w4, #(3 * 16)
+
+ ld1 {v3.16b-v5.16b}, [x2], #(3 * 16)
+
+ /* construct CTRs */
+ inc32_le128(v0) /* +0 */
+ rbit v6.16b, v3.16b
+ inc32_le128(v1) /* +1 */
+ rbit v7.16b, v4.16b
+ inc32_le128(v2) /* +2 */
+ rbit v8.16b, v5.16b
+
+ eor RHASH.16b, RHASH.16b, v6.16b
+
+ /* decrypt & ghash update */
+ SM4_CRYPT_PMUL_128x128_BLK3(v0, v1, v2,
+ RR0, RR1, RHASH, RH3, RTMP0, RTMP1,
+ RR2, RR3, v7, RH2, RTMP2, RTMP3,
+ RR4, RR5, v8, RH1, RTMP4, RTMP5)
+
+ eor v0.16b, v0.16b, v3.16b
+ eor v1.16b, v1.16b, v4.16b
+ eor v2.16b, v2.16b, v5.16b
+
+ REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
+
+ st1 {v0.16b-v2.16b}, [x1], #(3 * 16)
+
+ cbz w4, .Lgcm_dec_hash_len
+ b .Lgcm_dec_loop_3x
+
+.Lgcm_dec_loop_1x:
+ cmp w4, #16
+ blt .Lgcm_dec_tail
+
+ sub w4, w4, #16
+
+ ld1 {v3.16b}, [x2], #16
+
+ /* construct CTRs */
+ inc32_le128(v0)
+ rbit v6.16b, v3.16b
+
+ eor RHASH.16b, RHASH.16b, v6.16b
+
+ SM4_CRYPT_PMUL_128x128_BLK(v0, RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+
+ eor v0.16b, v0.16b, v3.16b
+
+ REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+ st1 {v0.16b}, [x1], #16
+
+ cbz w4, .Lgcm_dec_hash_len
+ b .Lgcm_dec_loop_1x
+
+.Lgcm_dec_tail:
+ /* construct CTRs */
+ inc32_le128(v0)
+ SM4_CRYPT_BLK(v0)
+
+ /* load permute table */
+ adr_l x0, .Lcts_permute_table
+ add x0, x0, #32
+ sub x0, x0, w4, uxtw
+ ld1 {v3.16b}, [x0]
+
+.Lgcm_dec_tail_loop:
+ /* do decrypt */
+ ldrb w0, [x2], #1 /* get 1 byte from input */
+ umov w6, v0.b[0] /* get top crypted byte */
+ eor w6, w6, w0 /* w6 = CTR ^ input */
+ strb w6, [x1], #1 /* store out byte */
+
+ /* shift right out one byte */
+ ext v0.16b, v0.16b, v0.16b, #1
+ /* the last ciphertext is placed in high bytes */
+ ins v0.b[15], w0
+
+ subs w4, w4, #1
+ bne .Lgcm_dec_tail_loop
+
+ /* padding last block with zeros */
+ tbl v0.16b, {v0.16b}, v3.16b
+
+ /* ghash update */
+ rbit v0.16b, v0.16b
+ eor RHASH.16b, RHASH.16b, v0.16b
+ PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+ REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+.Lgcm_dec_hash_len:
+ cbz x7, .Lgcm_dec_end
+
+ GTAG_HASH_LENGTHS(v1, v3)
+
+ b .Lgcm_dec_ret
+
+.Lgcm_dec_end:
+ /* store new CTR */
+ rev x8, x8
+ rev x9, x9
+ stp x8, x9, [x3]
+
+ rbit RHASH.16b, RHASH.16b
+
+.Lgcm_dec_ret:
+ /* store new MAC */
+ st1 {RHASH.2d}, [x5]
+
+ ret
+SYM_FUNC_END(sm4_ce_pmull_gcm_dec)
+
+ .section ".rodata", "a"
+ .align 4
+.Lcts_permute_table:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
+.Lghash_rconst:
+ .quad 0x87
diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c
new file mode 100644
index 000000000000..c450a2025ca9
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
+ * as specified in rfc8998
+ * https://datatracker.ietf.org/doc/html/rfc8998
+ *
+ * Copyright (C) 2022 Tianjia Zhang <[email protected]>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/neon.h>
+#include <crypto/b128ops.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-ce.h"
+
+asmlinkage void sm4_ce_pmull_ghash_setup(const u32 *rkey_enc, u8 *ghash_table);
+asmlinkage void pmull_ghash_update(const u8 *ghash_table, u8 *ghash,
+ const u8 *src, unsigned int nblocks);
+asmlinkage void sm4_ce_pmull_gcm_enc(const u32 *rkey_enc, u8 *dst,
+ const u8 *src, u8 *iv,
+ unsigned int nbytes, u8 *ghash,
+ const u8 *ghash_table, const u8 *lengths);
+asmlinkage void sm4_ce_pmull_gcm_dec(const u32 *rkey_enc, u8 *dst,
+ const u8 *src, u8 *iv,
+ unsigned int nbytes, u8 *ghash,
+ const u8 *ghash_table, const u8 *lengths);
+
+#define GHASH_BLOCK_SIZE 16
+#define GCM_IV_SIZE 12
+
+struct sm4_gcm_ctx {
+ struct sm4_ctx key;
+ u8 ghash_table[16 * 4];
+};
+
+
+static int gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_gcm_ctx *ctx = crypto_aead_ctx(tfm);
+
+ if (key_len != SM4_KEY_SIZE)
+ return -EINVAL;
+
+ kernel_neon_begin();
+
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
+
+ kernel_neon_end();
+ return 0;
+}
+
+static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ switch (authsize) {
+ case 4:
+ case 8:
+ case 12 ... 16:
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[])
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
+ u8 __aligned(8) buffer[GHASH_BLOCK_SIZE];
+ u32 assoclen = req->assoclen;
+ struct scatter_walk walk;
+ unsigned int buflen = 0;
+
+ scatterwalk_start(&walk, req->src);
+
+ do {
+ u32 n = scatterwalk_clamp(&walk, assoclen);
+ u8 *p, *ptr;
+
+ if (!n) {
+ scatterwalk_start(&walk, sg_next(walk.sg));
+ n = scatterwalk_clamp(&walk, assoclen);
+ }
+
+ p = ptr = scatterwalk_map(&walk);
+ assoclen -= n;
+ scatterwalk_advance(&walk, n);
+
+ if (n + buflen < GHASH_BLOCK_SIZE) {
+ memcpy(&buffer[buflen], ptr, n);
+ buflen += n;
+ } else {
+ unsigned int nblocks;
+
+ if (buflen) {
+ unsigned int l = GHASH_BLOCK_SIZE - buflen;
+
+ memcpy(&buffer[buflen], ptr, l);
+ ptr += l;
+ n -= l;
+
+ pmull_ghash_update(ctx->ghash_table, ghash,
+ buffer, 1);
+ }
+
+ nblocks = n / GHASH_BLOCK_SIZE;
+ if (nblocks) {
+ pmull_ghash_update(ctx->ghash_table, ghash,
+ ptr, nblocks);
+ ptr += nblocks * GHASH_BLOCK_SIZE;
+ }
+
+ buflen = n % GHASH_BLOCK_SIZE;
+ if (buflen)
+ memcpy(&buffer[0], ptr, buflen);
+ }
+
+ scatterwalk_unmap(p);
+ scatterwalk_done(&walk, 0, assoclen);
+ } while (assoclen);
+
+ /* padding with '0' */
+ if (buflen) {
+ memset(&buffer[buflen], 0, GHASH_BLOCK_SIZE - buflen);
+ pmull_ghash_update(ctx->ghash_table, ghash, buffer, 1);
+ }
+}
+
+static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
+ struct sm4_gcm_ctx *ctx, u8 ghash[],
+ void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc,
+ u8 *dst, const u8 *src, u8 *iv,
+ unsigned int nbytes, u8 *ghash,
+ const u8 *ghash_table, const u8 *lengths))
+{
+ u8 __aligned(8) iv[SM4_BLOCK_SIZE];
+ be128 __aligned(8) lengths;
+ int err;
+
+ memset(ghash, 0, SM4_BLOCK_SIZE);
+
+ lengths.a = cpu_to_be64(req->assoclen * 8);
+ lengths.b = cpu_to_be64(walk->total * 8);
+
+ memcpy(iv, walk->iv, GCM_IV_SIZE);
+ put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+ kernel_neon_begin();
+
+ if (req->assoclen)
+ gcm_calculate_auth_mac(req, ghash);
+
+ do {
+ unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
+ const u8 *src = walk->src.virt.addr;
+ u8 *dst = walk->dst.virt.addr;
+
+ if (walk->nbytes == walk->total) {
+ tail = 0;
+
+ sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
+ walk->nbytes, ghash,
+ ctx->ghash_table,
+ (const u8 *)&lengths);
+ } else if (walk->nbytes - tail) {
+ sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
+ walk->nbytes - tail, ghash,
+ ctx->ghash_table, NULL);
+ }
+
+ kernel_neon_end();
+
+ err = skcipher_walk_done(walk, tail);
+ if (err)
+ return err;
+ if (walk->nbytes)
+ kernel_neon_begin();
+ } while (walk->nbytes > 0);
+
+ return 0;
+}
+
+static int gcm_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
+ u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_aead_encrypt(&walk, req, false);
+ if (err)
+ return err;
+
+ err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc);
+ if (err)
+ return err;
+
+ /* copy authtag to end of dst */
+ scatterwalk_map_and_copy(ghash, req->dst, req->assoclen + req->cryptlen,
+ crypto_aead_authsize(aead), 1);
+
+ return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ unsigned int authsize = crypto_aead_authsize(aead);
+ struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
+ u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
+ u8 authtag[SM4_BLOCK_SIZE];
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_aead_decrypt(&walk, req, false);
+ if (err)
+ return err;
+
+ err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec);
+ if (err)
+ return err;
+
+ /* compare calculated auth tag with the stored one */
+ scatterwalk_map_and_copy(authtag, req->src,
+ req->assoclen + req->cryptlen - authsize,
+ authsize, 0);
+
+ if (crypto_memneq(authtag, ghash, authsize))
+ return -EBADMSG;
+
+ return 0;
+}
+
+static struct aead_alg sm4_gcm_alg = {
+ .base = {
+ .cra_name = "gcm(sm4)",
+ .cra_driver_name = "gcm-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_gcm_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .ivsize = GCM_IV_SIZE,
+ .chunksize = SM4_BLOCK_SIZE,
+ .maxauthsize = SM4_BLOCK_SIZE,
+ .setkey = gcm_setkey,
+ .setauthsize = gcm_setauthsize,
+ .encrypt = gcm_encrypt,
+ .decrypt = gcm_decrypt,
+};
+
+static int __init sm4_ce_gcm_init(void)
+{
+ if (!cpu_have_named_feature(PMULL))
+ return -ENODEV;
+
+ return crypto_register_aead(&sm4_gcm_alg);
+}
+
+static void __exit sm4_ce_gcm_exit(void)
+{
+ crypto_unregister_aead(&sm4_gcm_alg);
+}
+
+static const struct cpu_feature __maybe_unused sm4_ce_gcm_cpu_feature[] = {
+ { cpu_feature(PMULL) },
+ {}
+};
+MODULE_DEVICE_TABLE(cpu, sm4_ce_gcm_cpu_feature);
+
+module_cpu_feature_match(SM4, sm4_ce_gcm_init);
+module_exit(sm4_ce_gcm_exit);
+
+MODULE_DESCRIPTION("Synchronous SM4 in GCM mode using ARMv8 Crypto Extensions");
+MODULE_ALIAS_CRYPTO("gcm(sm4)");
+MODULE_AUTHOR("Tianjia Zhang <[email protected]>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 496d55c0d01a..0a2d32ed3bde 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -14,8 +14,12 @@
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
+#include <crypto/b128ops.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/xts.h>
#include <crypto/sm4.h>
#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
@@ -26,15 +30,48 @@ asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src);
asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src,
unsigned int nblks);
asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src,
- u8 *iv, unsigned int nblks);
+ u8 *iv, unsigned int nblocks);
asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src,
- u8 *iv, unsigned int nblks);
+ u8 *iv, unsigned int nblocks);
+asmlinkage void sm4_ce_cbc_cts_enc(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nbytes);
+asmlinkage void sm4_ce_cbc_cts_dec(const u32 *rkey, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nbytes);
asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src,
u8 *iv, unsigned int nblks);
+asmlinkage void sm4_ce_xts_enc(const u32 *rkey1, u8 *dst, const u8 *src,
+ u8 *tweak, unsigned int nbytes,
+ const u32 *rkey2_enc);
+asmlinkage void sm4_ce_xts_dec(const u32 *rkey1, u8 *dst, const u8 *src,
+ u8 *tweak, unsigned int nbytes,
+ const u32 *rkey2_enc);
+asmlinkage void sm4_ce_mac_update(const u32 *rkey_enc, u8 *digest,
+ const u8 *src, unsigned int nblocks,
+ bool enc_before, bool enc_after);
+
+EXPORT_SYMBOL(sm4_ce_expand_key);
+EXPORT_SYMBOL(sm4_ce_crypt_block);
+EXPORT_SYMBOL(sm4_ce_cbc_enc);
+EXPORT_SYMBOL(sm4_ce_cfb_enc);
+
+struct sm4_xts_ctx {
+ struct sm4_ctx key1;
+ struct sm4_ctx key2;
+};
+
+struct sm4_mac_tfm_ctx {
+ struct sm4_ctx key;
+ u8 __aligned(8) consts[];
+};
+
+struct sm4_mac_desc_ctx {
+ unsigned int len;
+ u8 digest[SM4_BLOCK_SIZE];
+};
static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
@@ -44,8 +81,33 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
+ kernel_neon_begin();
sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
+ kernel_neon_end();
+ return 0;
+}
+
+static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int ret;
+
+ if (key_len != SM4_KEY_SIZE * 2)
+ return -EINVAL;
+
+ ret = xts_verify_key(tfm, key, key_len);
+ if (ret)
+ return ret;
+
+ kernel_neon_begin();
+ sm4_ce_expand_key(key, ctx->key1.rkey_enc,
+ ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+ sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
+ ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+ kernel_neon_end();
+
return 0;
}
@@ -94,66 +156,128 @@ static int sm4_ecb_decrypt(struct skcipher_request *req)
return sm4_ecb_do_crypt(req, ctx->rkey_dec);
}
-static int sm4_cbc_encrypt(struct skcipher_request *req)
+static int sm4_cbc_crypt(struct skcipher_request *req,
+ struct sm4_ctx *ctx, bool encrypt)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- unsigned int nblks;
+ unsigned int nblocks;
- kernel_neon_begin();
+ nblocks = nbytes / SM4_BLOCK_SIZE;
+ if (nblocks) {
+ kernel_neon_begin();
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
+ if (encrypt)
+ sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
+ walk.iv, nblocks);
+ else
+ sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
+ walk.iv, nblocks);
- kernel_neon_end();
+ kernel_neon_end();
+ }
- err = skcipher_walk_done(&walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
}
return err;
}
+static int sm4_cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_cbc_crypt(req, ctx, true);
+}
+
static int sm4_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return sm4_cbc_crypt(req, ctx, false);
+}
+
+static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct scatterlist *src = req->src;
+ struct scatterlist *dst = req->dst;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
struct skcipher_walk walk;
- unsigned int nbytes;
+ int cbc_blocks;
int err;
- err = skcipher_walk_virt(&walk, req, false);
+ if (req->cryptlen < SM4_BLOCK_SIZE)
+ return -EINVAL;
- while ((nbytes = walk.nbytes) > 0) {
- const u8 *src = walk.src.virt.addr;
- u8 *dst = walk.dst.virt.addr;
- unsigned int nblks;
+ if (req->cryptlen == SM4_BLOCK_SIZE)
+ return sm4_cbc_crypt(req, ctx, encrypt);
- kernel_neon_begin();
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+ NULL, NULL);
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks);
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
+ /* handle the CBC cryption part */
+ cbc_blocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2;
+ if (cbc_blocks) {
+ skcipher_request_set_crypt(&subreq, src, dst,
+ cbc_blocks * SM4_BLOCK_SIZE,
+ req->iv);
- kernel_neon_end();
+ err = sm4_cbc_crypt(&subreq, ctx, encrypt);
+ if (err)
+ return err;
- err = skcipher_walk_done(&walk, nbytes);
+ dst = src = scatterwalk_ffwd(sg_src, src, subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst,
+ subreq.cryptlen);
}
- return err;
+ /* handle ciphertext stealing */
+ skcipher_request_set_crypt(&subreq, src, dst,
+ req->cryptlen - cbc_blocks * SM4_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+
+ if (encrypt)
+ sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes);
+ else
+ sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes);
+
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
+static int sm4_cbc_cts_encrypt(struct skcipher_request *req)
+{
+ return sm4_cbc_cts_crypt(req, true);
+}
+
+static int sm4_cbc_cts_decrypt(struct skcipher_request *req)
+{
+ return sm4_cbc_cts_crypt(req, false);
}
static int sm4_cfb_encrypt(struct skcipher_request *req)
@@ -283,6 +407,111 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
return err;
}
+static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % SM4_BLOCK_SIZE;
+ const u32 *rkey2_enc = ctx->key2.rkey_enc;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct scatterlist *src, *dst;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ if (req->cryptlen < SM4_BLOCK_SIZE)
+ return -EINVAL;
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+ int nblocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2;
+
+ skcipher_walk_abort(&walk);
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ nblocks * SM4_BLOCK_SIZE, req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+ } else {
+ tail = 0;
+ }
+
+ while ((nbytes = walk.nbytes) >= SM4_BLOCK_SIZE) {
+ if (nbytes < walk.total)
+ nbytes &= ~(SM4_BLOCK_SIZE - 1);
+
+ kernel_neon_begin();
+
+ if (encrypt)
+ sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, nbytes,
+ rkey2_enc);
+ else
+ sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, nbytes,
+ rkey2_enc);
+
+ kernel_neon_end();
+
+ rkey2_enc = NULL;
+
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ if (err)
+ return err;
+ }
+
+ if (likely(tail == 0))
+ return 0;
+
+ /* handle ciphertext stealing */
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, subreq.cryptlen);
+
+ skcipher_request_set_crypt(&subreq, src, dst, SM4_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+
+ if (encrypt)
+ sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes,
+ rkey2_enc);
+ else
+ sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes,
+ rkey2_enc);
+
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
+static int sm4_xts_encrypt(struct skcipher_request *req)
+{
+ return sm4_xts_crypt(req, true);
+}
+
+static int sm4_xts_decrypt(struct skcipher_request *req)
+{
+ return sm4_xts_crypt(req, false);
+}
+
static struct skcipher_alg sm4_algs[] = {
{
.base = {
@@ -345,28 +574,312 @@ static struct skcipher_alg sm4_algs[] = {
.setkey = sm4_setkey,
.encrypt = sm4_ctr_crypt,
.decrypt = sm4_ctr_crypt,
+ }, {
+ .base = {
+ .cra_name = "cts(cbc(sm4))",
+ .cra_driver_name = "cts-cbc-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE,
+ .max_keysize = SM4_KEY_SIZE,
+ .ivsize = SM4_BLOCK_SIZE,
+ .walksize = SM4_BLOCK_SIZE * 2,
+ .setkey = sm4_setkey,
+ .encrypt = sm4_cbc_cts_encrypt,
+ .decrypt = sm4_cbc_cts_decrypt,
+ }, {
+ .base = {
+ .cra_name = "xts(sm4)",
+ .cra_driver_name = "xts-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_xts_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = SM4_KEY_SIZE * 2,
+ .max_keysize = SM4_KEY_SIZE * 2,
+ .ivsize = SM4_BLOCK_SIZE,
+ .walksize = SM4_BLOCK_SIZE * 2,
+ .setkey = sm4_xts_setkey,
+ .encrypt = sm4_xts_encrypt,
+ .decrypt = sm4_xts_decrypt,
+ }
+};
+
+static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+
+ if (key_len != SM4_KEY_SIZE)
+ return -EINVAL;
+
+ kernel_neon_begin();
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+ be128 *consts = (be128 *)ctx->consts;
+ u64 a, b;
+
+ if (key_len != SM4_KEY_SIZE)
+ return -EINVAL;
+
+ memset(consts, 0, SM4_BLOCK_SIZE);
+
+ kernel_neon_begin();
+
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+
+ /* encrypt the zero block */
+ sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
+
+ kernel_neon_end();
+
+ /* gf(2^128) multiply zero-ciphertext with u and u^2 */
+ a = be64_to_cpu(consts[0].a);
+ b = be64_to_cpu(consts[0].b);
+ consts[0].a = cpu_to_be64((a << 1) | (b >> 63));
+ consts[0].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
+
+ a = be64_to_cpu(consts[0].a);
+ b = be64_to_cpu(consts[0].b);
+ consts[1].a = cpu_to_be64((a << 1) | (b >> 63));
+ consts[1].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
+
+ return 0;
+}
+
+static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+ u8 __aligned(8) key2[SM4_BLOCK_SIZE];
+ static u8 const ks[3][SM4_BLOCK_SIZE] = {
+ { [0 ... SM4_BLOCK_SIZE - 1] = 0x1},
+ { [0 ... SM4_BLOCK_SIZE - 1] = 0x2},
+ { [0 ... SM4_BLOCK_SIZE - 1] = 0x3},
+ };
+
+ if (key_len != SM4_KEY_SIZE)
+ return -EINVAL;
+
+ kernel_neon_begin();
+
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+
+ sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
+ sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
+
+ sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int sm4_mac_init(struct shash_desc *desc)
+{
+ struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ memset(ctx->digest, 0, SM4_BLOCK_SIZE);
+ ctx->len = 0;
+
+ return 0;
+}
+
+static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
+ unsigned int len)
+{
+ struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int l, nblocks;
+
+ if (len == 0)
+ return 0;
+
+ if (ctx->len || ctx->len + len < SM4_BLOCK_SIZE) {
+ l = min(len, SM4_BLOCK_SIZE - ctx->len);
+
+ crypto_xor(ctx->digest + ctx->len, p, l);
+ ctx->len += l;
+ len -= l;
+ p += l;
+ }
+
+ if (len && (ctx->len % SM4_BLOCK_SIZE) == 0) {
+ kernel_neon_begin();
+
+ if (len < SM4_BLOCK_SIZE && ctx->len == SM4_BLOCK_SIZE) {
+ sm4_ce_crypt_block(tctx->key.rkey_enc,
+ ctx->digest, ctx->digest);
+ ctx->len = 0;
+ } else {
+ nblocks = len / SM4_BLOCK_SIZE;
+ len %= SM4_BLOCK_SIZE;
+
+ sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
+ nblocks, (ctx->len == SM4_BLOCK_SIZE),
+ (len != 0));
+
+ p += nblocks * SM4_BLOCK_SIZE;
+
+ if (len == 0)
+ ctx->len = SM4_BLOCK_SIZE;
+ }
+
+ kernel_neon_end();
+
+ if (len) {
+ crypto_xor(ctx->digest, p, len);
+ ctx->len = len;
+ }
+ }
+
+ return 0;
+}
+
+static int sm4_cmac_final(struct shash_desc *desc, u8 *out)
+{
+ struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+ const u8 *consts = tctx->consts;
+
+ if (ctx->len != SM4_BLOCK_SIZE) {
+ ctx->digest[ctx->len] ^= 0x80;
+ consts += SM4_BLOCK_SIZE;
+ }
+
+ kernel_neon_begin();
+ sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
+ false, true);
+ kernel_neon_end();
+
+ memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
+
+ return 0;
+}
+
+static int sm4_cbcmac_final(struct shash_desc *desc, u8 *out)
+{
+ struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ if (ctx->len) {
+ kernel_neon_begin();
+ sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
+ ctx->digest);
+ kernel_neon_end();
+ }
+
+ memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
+
+ return 0;
+}
+
+static struct shash_alg sm4_mac_algs[] = {
+ {
+ .base = {
+ .cra_name = "cmac(sm4)",
+ .cra_driver_name = "cmac-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx)
+ + SM4_BLOCK_SIZE * 2,
+ .cra_module = THIS_MODULE,
+ },
+ .digestsize = SM4_BLOCK_SIZE,
+ .init = sm4_mac_init,
+ .update = sm4_mac_update,
+ .final = sm4_cmac_final,
+ .setkey = sm4_cmac_setkey,
+ .descsize = sizeof(struct sm4_mac_desc_ctx),
+ }, {
+ .base = {
+ .cra_name = "xcbc(sm4)",
+ .cra_driver_name = "xcbc-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = SM4_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx)
+ + SM4_BLOCK_SIZE * 2,
+ .cra_module = THIS_MODULE,
+ },
+ .digestsize = SM4_BLOCK_SIZE,
+ .init = sm4_mac_init,
+ .update = sm4_mac_update,
+ .final = sm4_cmac_final,
+ .setkey = sm4_xcbc_setkey,
+ .descsize = sizeof(struct sm4_mac_desc_ctx),
+ }, {
+ .base = {
+ .cra_name = "cbcmac(sm4)",
+ .cra_driver_name = "cbcmac-sm4-ce",
+ .cra_priority = 400,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .digestsize = SM4_BLOCK_SIZE,
+ .init = sm4_mac_init,
+ .update = sm4_mac_update,
+ .final = sm4_cbcmac_final,
+ .setkey = sm4_cbcmac_setkey,
+ .descsize = sizeof(struct sm4_mac_desc_ctx),
}
};
static int __init sm4_init(void)
{
- return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+ int err;
+
+ err = crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+ if (err)
+ return err;
+
+ err = crypto_register_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
+ if (err)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+ return err;
}
static void __exit sm4_exit(void)
{
+ crypto_unregister_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
}
module_cpu_feature_match(SM4, sm4_init);
module_exit(sm4_exit);
-MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR/XTS using ARMv8 Crypto Extensions");
MODULE_ALIAS_CRYPTO("sm4-ce");
MODULE_ALIAS_CRYPTO("sm4");
MODULE_ALIAS_CRYPTO("ecb(sm4)");
MODULE_ALIAS_CRYPTO("cbc(sm4)");
MODULE_ALIAS_CRYPTO("cfb(sm4)");
MODULE_ALIAS_CRYPTO("ctr(sm4)");
+MODULE_ALIAS_CRYPTO("cts(cbc(sm4))");
+MODULE_ALIAS_CRYPTO("xts(sm4)");
+MODULE_ALIAS_CRYPTO("cmac(sm4)");
+MODULE_ALIAS_CRYPTO("xcbc(sm4)");
+MODULE_ALIAS_CRYPTO("cbcmac(sm4)");
MODULE_AUTHOR("Tianjia Zhang <[email protected]>");
MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/sm4-ce.h b/arch/arm64/crypto/sm4-ce.h
new file mode 100644
index 000000000000..109c21b37590
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 common functions for Crypto Extensions
+ * Copyright (C) 2022 Tianjia Zhang <[email protected]>
+ */
+
+void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec,
+ const u32 *fk, const u32 *ck);
+
+void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src);
+
+void sm4_ce_cbc_enc(const u32 *rkey_enc, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblocks);
+
+void sm4_ce_cfb_enc(const u32 *rkey_enc, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblocks);
diff --git a/arch/arm64/crypto/sm4-neon-core.S b/arch/arm64/crypto/sm4-neon-core.S
index 3d5256b354d2..f295b4b7d70a 100644
--- a/arch/arm64/crypto/sm4-neon-core.S
+++ b/arch/arm64/crypto/sm4-neon-core.S
@@ -18,6 +18,11 @@
#define RTMP2 v10
#define RTMP3 v11
+#define RTMP4 v12
+#define RTMP5 v13
+#define RTMP6 v14
+#define RTMP7 v15
+
#define RX0 v12
#define RX1 v13
#define RKEY v14
@@ -25,7 +30,7 @@
/* Helper macros. */
-#define PREPARE \
+#define SM4_PREPARE() \
adr_l x5, crypto_sm4_sbox; \
ld1 {v16.16b-v19.16b}, [x5], #64; \
ld1 {v20.16b-v23.16b}, [x5], #64; \
@@ -42,7 +47,25 @@
zip1 s2.2d, RTMP2.2d, RTMP3.2d; \
zip2 s3.2d, RTMP2.2d, RTMP3.2d;
-#define rotate_clockwise_90(s0, s1, s2, s3) \
+#define transpose_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7) \
+ zip1 RTMP0.4s, s0.4s, s1.4s; \
+ zip1 RTMP1.4s, s2.4s, s3.4s; \
+ zip2 RTMP2.4s, s0.4s, s1.4s; \
+ zip2 RTMP3.4s, s2.4s, s3.4s; \
+ zip1 RTMP4.4s, s4.4s, s5.4s; \
+ zip1 RTMP5.4s, s6.4s, s7.4s; \
+ zip2 RTMP6.4s, s4.4s, s5.4s; \
+ zip2 RTMP7.4s, s6.4s, s7.4s; \
+ zip1 s0.2d, RTMP0.2d, RTMP1.2d; \
+ zip2 s1.2d, RTMP0.2d, RTMP1.2d; \
+ zip1 s2.2d, RTMP2.2d, RTMP3.2d; \
+ zip2 s3.2d, RTMP2.2d, RTMP3.2d; \
+ zip1 s4.2d, RTMP4.2d, RTMP5.2d; \
+ zip2 s5.2d, RTMP4.2d, RTMP5.2d; \
+ zip1 s6.2d, RTMP6.2d, RTMP7.2d; \
+ zip2 s7.2d, RTMP6.2d, RTMP7.2d;
+
+#define rotate_clockwise_4x4(s0, s1, s2, s3) \
zip1 RTMP0.4s, s1.4s, s0.4s; \
zip2 RTMP1.4s, s1.4s, s0.4s; \
zip1 RTMP2.4s, s3.4s, s2.4s; \
@@ -52,6 +75,24 @@
zip1 s2.2d, RTMP3.2d, RTMP1.2d; \
zip2 s3.2d, RTMP3.2d, RTMP1.2d;
+#define rotate_clockwise_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7) \
+ zip1 RTMP0.4s, s1.4s, s0.4s; \
+ zip1 RTMP2.4s, s3.4s, s2.4s; \
+ zip2 RTMP1.4s, s1.4s, s0.4s; \
+ zip2 RTMP3.4s, s3.4s, s2.4s; \
+ zip1 RTMP4.4s, s5.4s, s4.4s; \
+ zip1 RTMP6.4s, s7.4s, s6.4s; \
+ zip2 RTMP5.4s, s5.4s, s4.4s; \
+ zip2 RTMP7.4s, s7.4s, s6.4s; \
+ zip1 s0.2d, RTMP2.2d, RTMP0.2d; \
+ zip2 s1.2d, RTMP2.2d, RTMP0.2d; \
+ zip1 s2.2d, RTMP3.2d, RTMP1.2d; \
+ zip2 s3.2d, RTMP3.2d, RTMP1.2d; \
+ zip1 s4.2d, RTMP6.2d, RTMP4.2d; \
+ zip2 s5.2d, RTMP6.2d, RTMP4.2d; \
+ zip1 s6.2d, RTMP7.2d, RTMP5.2d; \
+ zip2 s7.2d, RTMP7.2d, RTMP5.2d;
+
#define ROUND4(round, s0, s1, s2, s3) \
dup RX0.4s, RKEY.s[round]; \
/* rk ^ s1 ^ s2 ^ s3 */ \
@@ -87,14 +128,7 @@
/* s0 ^= RTMP3 */ \
eor s0.16b, s0.16b, RTMP3.16b;
-#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
- rev32 b0.16b, b0.16b; \
- rev32 b1.16b, b1.16b; \
- rev32 b2.16b, b2.16b; \
- rev32 b3.16b, b3.16b; \
- \
- transpose_4x4(b0, b1, b2, b3); \
- \
+#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3) \
mov x6, 8; \
4: \
ld1 {RKEY.4s}, [x0], #16; \
@@ -107,15 +141,23 @@
\
bne 4b; \
\
- rotate_clockwise_90(b0, b1, b2, b3); \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
rev32 b3.16b, b3.16b; \
\
+ rotate_clockwise_4x4(b0, b1, b2, b3); \
+ \
/* repoint to rkey */ \
sub x0, x0, #128;
+#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
+ rev32 b0.16b, b0.16b; \
+ rev32 b1.16b, b1.16b; \
+ rev32 b2.16b, b2.16b; \
+ rev32 b3.16b, b3.16b; \
+ SM4_CRYPT_BLK4_BE(b0, b1, b2, b3);
+
#define ROUND8(round, s0, s1, s2, s3, t0, t1, t2, t3) \
/* rk ^ s1 ^ s2 ^ s3 */ \
dup RX0.4s, RKEY.s[round]; \
@@ -175,7 +217,7 @@
eor s0.16b, s0.16b, RTMP0.16b; \
eor t0.16b, t0.16b, RTMP1.16b;
-#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
+#define SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7) \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
@@ -185,9 +227,6 @@
rev32 b6.16b, b6.16b; \
rev32 b7.16b, b7.16b; \
\
- transpose_4x4(b0, b1, b2, b3); \
- transpose_4x4(b4, b5, b6, b7); \
- \
mov x6, 8; \
8: \
ld1 {RKEY.4s}, [x0], #16; \
@@ -200,8 +239,6 @@
\
bne 8b; \
\
- rotate_clockwise_90(b0, b1, b2, b3); \
- rotate_clockwise_90(b4, b5, b6, b7); \
rev32 b0.16b, b0.16b; \
rev32 b1.16b, b1.16b; \
rev32 b2.16b, b2.16b; \
@@ -214,274 +251,429 @@
/* repoint to rkey */ \
sub x0, x0, #128;
+#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
+ SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7); \
+ rotate_clockwise_4x4_2x(b0, b1, b2, b3, b4, b5, b6, b7); \
-.align 3
-SYM_FUNC_START_LOCAL(__sm4_neon_crypt_blk1_4)
- /* input:
- * x0: round key array, CTX
- * x1: dst
- * x2: src
- * w3: num blocks (1..4)
- */
- PREPARE;
-
- ld1 {v0.16b}, [x2], #16;
- mov v1.16b, v0.16b;
- mov v2.16b, v0.16b;
- mov v3.16b, v0.16b;
- cmp w3, #2;
- blt .Lblk4_load_input_done;
- ld1 {v1.16b}, [x2], #16;
- beq .Lblk4_load_input_done;
- ld1 {v2.16b}, [x2], #16;
- cmp w3, #3;
- beq .Lblk4_load_input_done;
- ld1 {v3.16b}, [x2];
-
-.Lblk4_load_input_done:
- SM4_CRYPT_BLK4(v0, v1, v2, v3);
-
- st1 {v0.16b}, [x1], #16;
- cmp w3, #2;
- blt .Lblk4_store_output_done;
- st1 {v1.16b}, [x1], #16;
- beq .Lblk4_store_output_done;
- st1 {v2.16b}, [x1], #16;
- cmp w3, #3;
- beq .Lblk4_store_output_done;
- st1 {v3.16b}, [x1];
-
-.Lblk4_store_output_done:
- ret;
-SYM_FUNC_END(__sm4_neon_crypt_blk1_4)
.align 3
-SYM_FUNC_START(sm4_neon_crypt_blk1_8)
+SYM_FUNC_START(sm4_neon_crypt)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
- * w3: num blocks (1..8)
+ * w3: nblocks
*/
- cmp w3, #5;
- blt __sm4_neon_crypt_blk1_4;
-
- PREPARE;
-
- ld1 {v0.16b-v3.16b}, [x2], #64;
- ld1 {v4.16b}, [x2], #16;
- mov v5.16b, v4.16b;
- mov v6.16b, v4.16b;
- mov v7.16b, v4.16b;
- beq .Lblk8_load_input_done;
- ld1 {v5.16b}, [x2], #16;
- cmp w3, #7;
- blt .Lblk8_load_input_done;
- ld1 {v6.16b}, [x2], #16;
- beq .Lblk8_load_input_done;
- ld1 {v7.16b}, [x2];
-
-.Lblk8_load_input_done:
- SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
-
- cmp w3, #6;
- st1 {v0.16b-v3.16b}, [x1], #64;
- st1 {v4.16b}, [x1], #16;
- blt .Lblk8_store_output_done;
- st1 {v5.16b}, [x1], #16;
- beq .Lblk8_store_output_done;
- st1 {v6.16b}, [x1], #16;
- cmp w3, #7;
- beq .Lblk8_store_output_done;
- st1 {v7.16b}, [x1];
-
-.Lblk8_store_output_done:
- ret;
-SYM_FUNC_END(sm4_neon_crypt_blk1_8)
+ SM4_PREPARE()
-.align 3
-SYM_FUNC_START(sm4_neon_crypt_blk8)
- /* input:
- * x0: round key array, CTX
- * x1: dst
- * x2: src
- * w3: nblocks (multiples of 8)
- */
- PREPARE;
+.Lcrypt_loop_8x:
+ sub w3, w3, #8
+ tbnz w3, #31, .Lcrypt_4x
+
+ ld4 {v0.4s-v3.4s}, [x2], #64
+ ld4 {v4.4s-v7.4s}, [x2], #64
-.Lcrypt_loop_blk:
- subs w3, w3, #8;
- bmi .Lcrypt_end;
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
- ld1 {v0.16b-v3.16b}, [x2], #64;
- ld1 {v4.16b-v7.16b}, [x2], #64;
+ st1 {v0.16b-v3.16b}, [x1], #64
+ st1 {v4.16b-v7.16b}, [x1], #64
- SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+ cbz w3, .Lcrypt_end
+ b .Lcrypt_loop_8x
- st1 {v0.16b-v3.16b}, [x1], #64;
- st1 {v4.16b-v7.16b}, [x1], #64;
+.Lcrypt_4x:
+ add w3, w3, #8
+ cmp w3, #4
+ blt .Lcrypt_tail
- b .Lcrypt_loop_blk;
+ sub w3, w3, #4
+
+ ld4 {v0.4s-v3.4s}, [x2], #64
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+ st1 {v0.16b-v3.16b}, [x1], #64
+
+ cbz w3, .Lcrypt_end
+
+.Lcrypt_tail:
+ cmp w3, #2
+ ld1 {v0.16b}, [x2], #16
+ blt .Lcrypt_tail_load_done
+ ld1 {v1.16b}, [x2], #16
+ beq .Lcrypt_tail_load_done
+ ld1 {v2.16b}, [x2], #16
+
+.Lcrypt_tail_load_done:
+ transpose_4x4(v0, v1, v2, v3)
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+ cmp w3, #2
+ st1 {v0.16b}, [x1], #16
+ blt .Lcrypt_end
+ st1 {v1.16b}, [x1], #16
+ beq .Lcrypt_end
+ st1 {v2.16b}, [x1], #16
.Lcrypt_end:
- ret;
-SYM_FUNC_END(sm4_neon_crypt_blk8)
+ ret
+SYM_FUNC_END(sm4_neon_crypt)
.align 3
-SYM_FUNC_START(sm4_neon_cbc_dec_blk8)
+SYM_FUNC_START(sm4_neon_cbc_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
- * w4: nblocks (multiples of 8)
+ * w4: nblocks
*/
- PREPARE;
+ SM4_PREPARE()
+
+ ld1 {RIV.16b}, [x3]
+
+.Lcbc_dec_loop_8x:
+ sub w4, w4, #8
+ tbnz w4, #31, .Lcbc_dec_4x
+
+ ld4 {v0.4s-v3.4s}, [x2], #64
+ ld4 {v4.4s-v7.4s}, [x2]
+
+ SM4_CRYPT_BLK8_norotate(v0, v1, v2, v3, v4, v5, v6, v7)
+
+ /* Avoid overwriting the RIV register */
+ rotate_clockwise_4x4(v0, v1, v2, v3)
+ rotate_clockwise_4x4(v4, v5, v6, v7)
+
+ sub x2, x2, #64
+
+ eor v0.16b, v0.16b, RIV.16b
- ld1 {RIV.16b}, [x3];
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
+ ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
-.Lcbc_loop_blk:
- subs w4, w4, #8;
- bmi .Lcbc_end;
+ eor v1.16b, v1.16b, RTMP0.16b
+ eor v2.16b, v2.16b, RTMP1.16b
+ eor v3.16b, v3.16b, RTMP2.16b
+ eor v4.16b, v4.16b, RTMP3.16b
+ eor v5.16b, v5.16b, RTMP4.16b
+ eor v6.16b, v6.16b, RTMP5.16b
+ eor v7.16b, v7.16b, RTMP6.16b
- ld1 {v0.16b-v3.16b}, [x2], #64;
- ld1 {v4.16b-v7.16b}, [x2];
+ mov RIV.16b, RTMP7.16b
- SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+ st1 {v0.16b-v3.16b}, [x1], #64
+ st1 {v4.16b-v7.16b}, [x1], #64
- sub x2, x2, #64;
- eor v0.16b, v0.16b, RIV.16b;
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v1.16b, v1.16b, RTMP0.16b;
- eor v2.16b, v2.16b, RTMP1.16b;
- eor v3.16b, v3.16b, RTMP2.16b;
- st1 {v0.16b-v3.16b}, [x1], #64;
+ cbz w4, .Lcbc_dec_end
+ b .Lcbc_dec_loop_8x
- eor v4.16b, v4.16b, RTMP3.16b;
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v5.16b, v5.16b, RTMP0.16b;
- eor v6.16b, v6.16b, RTMP1.16b;
- eor v7.16b, v7.16b, RTMP2.16b;
+.Lcbc_dec_4x:
+ add w4, w4, #8
+ cmp w4, #4
+ blt .Lcbc_dec_tail
- mov RIV.16b, RTMP3.16b;
- st1 {v4.16b-v7.16b}, [x1], #64;
+ sub w4, w4, #4
- b .Lcbc_loop_blk;
+ ld1 {v0.16b-v3.16b}, [x2], #64
-.Lcbc_end:
+ rev32 v4.16b, v0.16b
+ rev32 v5.16b, v1.16b
+ rev32 v6.16b, v2.16b
+ rev32 v7.16b, v3.16b
+
+ transpose_4x4(v4, v5, v6, v7)
+
+ SM4_CRYPT_BLK4_BE(v4, v5, v6, v7)
+
+ eor v4.16b, v4.16b, RIV.16b
+ eor v5.16b, v5.16b, v0.16b
+ eor v6.16b, v6.16b, v1.16b
+ eor v7.16b, v7.16b, v2.16b
+
+ mov RIV.16b, v3.16b
+
+ st1 {v4.16b-v7.16b}, [x1], #64
+
+ cbz w4, .Lcbc_dec_end
+
+.Lcbc_dec_tail:
+ cmp w4, #2
+ ld1 {v0.16b}, [x2], #16
+ blt .Lcbc_dec_tail_load_done
+ ld1 {v1.16b}, [x2], #16
+ beq .Lcbc_dec_tail_load_done
+ ld1 {v2.16b}, [x2], #16
+
+.Lcbc_dec_tail_load_done:
+ rev32 v4.16b, v0.16b
+ rev32 v5.16b, v1.16b
+ rev32 v6.16b, v2.16b
+
+ transpose_4x4(v4, v5, v6, v7)
+
+ SM4_CRYPT_BLK4_BE(v4, v5, v6, v7)
+
+ cmp w4, #2
+ eor v4.16b, v4.16b, RIV.16b
+ mov RIV.16b, v0.16b
+ st1 {v4.16b}, [x1], #16
+ blt .Lcbc_dec_end
+
+ eor v5.16b, v5.16b, v0.16b
+ mov RIV.16b, v1.16b
+ st1 {v5.16b}, [x1], #16
+ beq .Lcbc_dec_end
+
+ eor v6.16b, v6.16b, v1.16b
+ mov RIV.16b, v2.16b
+ st1 {v6.16b}, [x1], #16
+
+.Lcbc_dec_end:
/* store new IV */
- st1 {RIV.16b}, [x3];
+ st1 {RIV.16b}, [x3]
- ret;
-SYM_FUNC_END(sm4_neon_cbc_dec_blk8)
+ ret
+SYM_FUNC_END(sm4_neon_cbc_dec)
.align 3
-SYM_FUNC_START(sm4_neon_cfb_dec_blk8)
+SYM_FUNC_START(sm4_neon_cfb_dec)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: iv (big endian, 128 bit)
- * w4: nblocks (multiples of 8)
+ * w4: nblocks
*/
- PREPARE;
+ SM4_PREPARE()
+
+ ld1 {v0.16b}, [x3]
+
+.Lcfb_dec_loop_8x:
+ sub w4, w4, #8
+ tbnz w4, #31, .Lcfb_dec_4x
+
+ ld1 {v1.16b-v3.16b}, [x2], #48
+ ld4 {v4.4s-v7.4s}, [x2]
+
+ transpose_4x4(v0, v1, v2, v3)
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+ sub x2, x2, #48
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
+ ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
+
+ eor v0.16b, v0.16b, RTMP0.16b
+ eor v1.16b, v1.16b, RTMP1.16b
+ eor v2.16b, v2.16b, RTMP2.16b
+ eor v3.16b, v3.16b, RTMP3.16b
+ eor v4.16b, v4.16b, RTMP4.16b
+ eor v5.16b, v5.16b, RTMP5.16b
+ eor v6.16b, v6.16b, RTMP6.16b
+ eor v7.16b, v7.16b, RTMP7.16b
+
+ st1 {v0.16b-v3.16b}, [x1], #64
+ st1 {v4.16b-v7.16b}, [x1], #64
+
+ mov v0.16b, RTMP7.16b
+
+ cbz w4, .Lcfb_dec_end
+ b .Lcfb_dec_loop_8x
+
+.Lcfb_dec_4x:
+ add w4, w4, #8
+ cmp w4, #4
+ blt .Lcfb_dec_tail
+
+ sub w4, w4, #4
+
+ ld1 {v4.16b-v7.16b}, [x2], #64
+
+ rev32 v0.16b, v0.16b /* v0 is IV register */
+ rev32 v1.16b, v4.16b
+ rev32 v2.16b, v5.16b
+ rev32 v3.16b, v6.16b
+
+ transpose_4x4(v0, v1, v2, v3)
+
+ SM4_CRYPT_BLK4_BE(v0, v1, v2, v3)
- ld1 {v0.16b}, [x3];
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ eor v3.16b, v3.16b, v7.16b
-.Lcfb_loop_blk:
- subs w4, w4, #8;
- bmi .Lcfb_end;
+ st1 {v0.16b-v3.16b}, [x1], #64
- ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48;
- ld1 {v4.16b-v7.16b}, [x2];
+ mov v0.16b, v7.16b
- SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+ cbz w4, .Lcfb_dec_end
- sub x2, x2, #48;
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v0.16b, v0.16b, RTMP0.16b;
- eor v1.16b, v1.16b, RTMP1.16b;
- eor v2.16b, v2.16b, RTMP2.16b;
- eor v3.16b, v3.16b, RTMP3.16b;
- st1 {v0.16b-v3.16b}, [x1], #64;
+.Lcfb_dec_tail:
+ cmp w4, #2
+ ld1 {v4.16b}, [x2], #16
+ blt .Lcfb_dec_tail_load_done
+ ld1 {v5.16b}, [x2], #16
+ beq .Lcfb_dec_tail_load_done
+ ld1 {v6.16b}, [x2], #16
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v4.16b, v4.16b, RTMP0.16b;
- eor v5.16b, v5.16b, RTMP1.16b;
- eor v6.16b, v6.16b, RTMP2.16b;
- eor v7.16b, v7.16b, RTMP3.16b;
- st1 {v4.16b-v7.16b}, [x1], #64;
+.Lcfb_dec_tail_load_done:
+ rev32 v0.16b, v0.16b /* v0 is IV register */
+ rev32 v1.16b, v4.16b
+ rev32 v2.16b, v5.16b
- mov v0.16b, RTMP3.16b;
+ transpose_4x4(v0, v1, v2, v3)
- b .Lcfb_loop_blk;
+ SM4_CRYPT_BLK4_BE(v0, v1, v2, v3)
-.Lcfb_end:
+ cmp w4, #2
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x1], #16
+ mov v0.16b, v4.16b
+ blt .Lcfb_dec_end
+
+ eor v1.16b, v1.16b, v5.16b
+ st1 {v1.16b}, [x1], #16
+ mov v0.16b, v5.16b
+ beq .Lcfb_dec_end
+
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v2.16b}, [x1], #16
+ mov v0.16b, v6.16b
+
+.Lcfb_dec_end:
/* store new IV */
- st1 {v0.16b}, [x3];
+ st1 {v0.16b}, [x3]
- ret;
-SYM_FUNC_END(sm4_neon_cfb_dec_blk8)
+ ret
+SYM_FUNC_END(sm4_neon_cfb_dec)
.align 3
-SYM_FUNC_START(sm4_neon_ctr_enc_blk8)
+SYM_FUNC_START(sm4_neon_ctr_crypt)
/* input:
* x0: round key array, CTX
* x1: dst
* x2: src
* x3: ctr (big endian, 128 bit)
- * w4: nblocks (multiples of 8)
+ * w4: nblocks
*/
- PREPARE;
+ SM4_PREPARE()
- ldp x7, x8, [x3];
- rev x7, x7;
- rev x8, x8;
+ ldp x7, x8, [x3]
+ rev x7, x7
+ rev x8, x8
-.Lctr_loop_blk:
- subs w4, w4, #8;
- bmi .Lctr_end;
+.Lctr_crypt_loop_8x:
+ sub w4, w4, #8
+ tbnz w4, #31, .Lctr_crypt_4x
-#define inc_le128(vctr) \
- mov vctr.d[1], x8; \
- mov vctr.d[0], x7; \
- adds x8, x8, #1; \
- adc x7, x7, xzr; \
- rev64 vctr.16b, vctr.16b;
+#define inc_le128(vctr) \
+ mov vctr.d[1], x8; \
+ mov vctr.d[0], x7; \
+ adds x8, x8, #1; \
+ rev64 vctr.16b, vctr.16b; \
+ adc x7, x7, xzr;
/* construct CTRs */
- inc_le128(v0); /* +0 */
- inc_le128(v1); /* +1 */
- inc_le128(v2); /* +2 */
- inc_le128(v3); /* +3 */
- inc_le128(v4); /* +4 */
- inc_le128(v5); /* +5 */
- inc_le128(v6); /* +6 */
- inc_le128(v7); /* +7 */
-
- SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
-
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v0.16b, v0.16b, RTMP0.16b;
- eor v1.16b, v1.16b, RTMP1.16b;
- eor v2.16b, v2.16b, RTMP2.16b;
- eor v3.16b, v3.16b, RTMP3.16b;
- st1 {v0.16b-v3.16b}, [x1], #64;
-
- ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64;
- eor v4.16b, v4.16b, RTMP0.16b;
- eor v5.16b, v5.16b, RTMP1.16b;
- eor v6.16b, v6.16b, RTMP2.16b;
- eor v7.16b, v7.16b, RTMP3.16b;
- st1 {v4.16b-v7.16b}, [x1], #64;
-
- b .Lctr_loop_blk;
-
-.Lctr_end:
+ inc_le128(v0) /* +0 */
+ inc_le128(v1) /* +1 */
+ inc_le128(v2) /* +2 */
+ inc_le128(v3) /* +3 */
+ inc_le128(v4) /* +4 */
+ inc_le128(v5) /* +5 */
+ inc_le128(v6) /* +6 */
+ inc_le128(v7) /* +7 */
+
+ transpose_4x4_2x(v0, v1, v2, v3, v4, v5, v6, v7)
+
+ SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+ ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
+ ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
+
+ eor v0.16b, v0.16b, RTMP0.16b
+ eor v1.16b, v1.16b, RTMP1.16b
+ eor v2.16b, v2.16b, RTMP2.16b
+ eor v3.16b, v3.16b, RTMP3.16b
+ eor v4.16b, v4.16b, RTMP4.16b
+ eor v5.16b, v5.16b, RTMP5.16b
+ eor v6.16b, v6.16b, RTMP6.16b
+ eor v7.16b, v7.16b, RTMP7.16b
+
+ st1 {v0.16b-v3.16b}, [x1], #64
+ st1 {v4.16b-v7.16b}, [x1], #64
+
+ cbz w4, .Lctr_crypt_end
+ b .Lctr_crypt_loop_8x
+
+.Lctr_crypt_4x:
+ add w4, w4, #8
+ cmp w4, #4
+ blt .Lctr_crypt_tail
+
+ sub w4, w4, #4
+
+ /* construct CTRs */
+ inc_le128(v0) /* +0 */
+ inc_le128(v1) /* +1 */
+ inc_le128(v2) /* +2 */
+ inc_le128(v3) /* +3 */
+
+ ld1 {v4.16b-v7.16b}, [x2], #64
+
+ transpose_4x4(v0, v1, v2, v3)
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ eor v3.16b, v3.16b, v7.16b
+
+ st1 {v0.16b-v3.16b}, [x1], #64
+
+ cbz w4, .Lctr_crypt_end
+
+.Lctr_crypt_tail:
+ /* inc_le128 will change the sign bit */
+ ld1 {v4.16b}, [x2], #16
+ inc_le128(v0)
+ cmp w4, #2
+ blt .Lctr_crypt_tail_load_done
+
+ ld1 {v5.16b}, [x2], #16
+ inc_le128(v1)
+ cmp w4, #2
+ beq .Lctr_crypt_tail_load_done
+
+ ld1 {v6.16b}, [x2], #16
+ inc_le128(v2)
+
+.Lctr_crypt_tail_load_done:
+ transpose_4x4(v0, v1, v2, v3)
+
+ SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+ cmp w4, #2
+
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x1], #16
+ blt .Lctr_crypt_end
+
+ eor v1.16b, v1.16b, v5.16b
+ st1 {v1.16b}, [x1], #16
+ beq .Lctr_crypt_end
+
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v2.16b}, [x1], #16
+
+.Lctr_crypt_end:
/* store new CTR */
- rev x7, x7;
- rev x8, x8;
- stp x7, x8, [x3];
+ rev x7, x7
+ rev x8, x8
+ stp x7, x8, [x3]
- ret;
-SYM_FUNC_END(sm4_neon_ctr_enc_blk8)
+ ret
+SYM_FUNC_END(sm4_neon_ctr_crypt)
diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c
index 03a6a6866a31..7b19accf5c03 100644
--- a/arch/arm64/crypto/sm4-neon-glue.c
+++ b/arch/arm64/crypto/sm4-neon-glue.c
@@ -18,19 +18,14 @@
#include <crypto/internal/skcipher.h>
#include <crypto/sm4.h>
-#define BYTES2BLKS(nbytes) ((nbytes) >> 4)
-#define BYTES2BLK8(nbytes) (((nbytes) >> 4) & ~(8 - 1))
-
-asmlinkage void sm4_neon_crypt_blk1_8(const u32 *rkey, u8 *dst, const u8 *src,
- unsigned int nblks);
-asmlinkage void sm4_neon_crypt_blk8(const u32 *rkey, u8 *dst, const u8 *src,
- unsigned int nblks);
-asmlinkage void sm4_neon_cbc_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
- u8 *iv, unsigned int nblks);
-asmlinkage void sm4_neon_cfb_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
- u8 *iv, unsigned int nblks);
-asmlinkage void sm4_neon_ctr_enc_blk8(const u32 *rkey, u8 *dst, const u8 *src,
- u8 *iv, unsigned int nblks);
+asmlinkage void sm4_neon_crypt(const u32 *rkey, u8 *dst, const u8 *src,
+ unsigned int nblocks);
+asmlinkage void sm4_neon_cbc_dec(const u32 *rkey_dec, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblocks);
+asmlinkage void sm4_neon_cfb_dec(const u32 *rkey_enc, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblocks);
+asmlinkage void sm4_neon_ctr_crypt(const u32 *rkey_enc, u8 *dst, const u8 *src,
+ u8 *iv, unsigned int nblocks);
static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
@@ -51,27 +46,18 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- unsigned int nblks;
+ unsigned int nblocks;
- kernel_neon_begin();
+ nblocks = nbytes / SM4_BLOCK_SIZE;
+ if (nblocks) {
+ kernel_neon_begin();
- nblks = BYTES2BLK8(nbytes);
- if (nblks) {
- sm4_neon_crypt_blk8(rkey, dst, src, nblks);
- dst += nblks * SM4_BLOCK_SIZE;
- src += nblks * SM4_BLOCK_SIZE;
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
+ sm4_neon_crypt(rkey, dst, src, nblocks);
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- sm4_neon_crypt_blk1_8(rkey, dst, src, nblks);
- nbytes -= nblks * SM4_BLOCK_SIZE;
+ kernel_neon_end();
}
- kernel_neon_end();
-
- err = skcipher_walk_done(&walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
}
return err;
@@ -138,48 +124,19 @@ static int sm4_cbc_decrypt(struct skcipher_request *req)
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- unsigned int nblks;
+ unsigned int nblocks;
- kernel_neon_begin();
+ nblocks = nbytes / SM4_BLOCK_SIZE;
+ if (nblocks) {
+ kernel_neon_begin();
- nblks = BYTES2BLK8(nbytes);
- if (nblks) {
- sm4_neon_cbc_dec_blk8(ctx->rkey_dec, dst, src,
- walk.iv, nblks);
- dst += nblks * SM4_BLOCK_SIZE;
- src += nblks * SM4_BLOCK_SIZE;
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
+ sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
+ walk.iv, nblocks);
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- u8 keystream[SM4_BLOCK_SIZE * 8];
- u8 iv[SM4_BLOCK_SIZE];
- int i;
-
- sm4_neon_crypt_blk1_8(ctx->rkey_dec, keystream,
- src, nblks);
-
- src += ((int)nblks - 2) * SM4_BLOCK_SIZE;
- dst += (nblks - 1) * SM4_BLOCK_SIZE;
- memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
-
- for (i = nblks - 1; i > 0; i--) {
- crypto_xor_cpy(dst, src,
- &keystream[i * SM4_BLOCK_SIZE],
- SM4_BLOCK_SIZE);
- src -= SM4_BLOCK_SIZE;
- dst -= SM4_BLOCK_SIZE;
- }
- crypto_xor_cpy(dst, walk.iv,
- keystream, SM4_BLOCK_SIZE);
- memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
- nbytes -= nblks * SM4_BLOCK_SIZE;
+ kernel_neon_end();
}
- kernel_neon_end();
-
- err = skcipher_walk_done(&walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
}
return err;
@@ -238,41 +195,21 @@ static int sm4_cfb_decrypt(struct skcipher_request *req)
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- unsigned int nblks;
+ unsigned int nblocks;
- kernel_neon_begin();
+ nblocks = nbytes / SM4_BLOCK_SIZE;
+ if (nblocks) {
+ kernel_neon_begin();
- nblks = BYTES2BLK8(nbytes);
- if (nblks) {
- sm4_neon_cfb_dec_blk8(ctx->rkey_enc, dst, src,
- walk.iv, nblks);
- dst += nblks * SM4_BLOCK_SIZE;
- src += nblks * SM4_BLOCK_SIZE;
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
+ sm4_neon_cfb_dec(ctx->rkey_enc, dst, src,
+ walk.iv, nblocks);
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- u8 keystream[SM4_BLOCK_SIZE * 8];
-
- memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
- if (nblks > 1)
- memcpy(&keystream[SM4_BLOCK_SIZE], src,
- (nblks - 1) * SM4_BLOCK_SIZE);
- memcpy(walk.iv, src + (nblks - 1) * SM4_BLOCK_SIZE,
- SM4_BLOCK_SIZE);
-
- sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
- keystream, nblks);
-
- crypto_xor_cpy(dst, src, keystream,
- nblks * SM4_BLOCK_SIZE);
- dst += nblks * SM4_BLOCK_SIZE;
- src += nblks * SM4_BLOCK_SIZE;
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
+ kernel_neon_end();
- kernel_neon_end();
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
@@ -302,40 +239,21 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
while ((nbytes = walk.nbytes) > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- unsigned int nblks;
+ unsigned int nblocks;
- kernel_neon_begin();
+ nblocks = nbytes / SM4_BLOCK_SIZE;
+ if (nblocks) {
+ kernel_neon_begin();
- nblks = BYTES2BLK8(nbytes);
- if (nblks) {
- sm4_neon_ctr_enc_blk8(ctx->rkey_enc, dst, src,
- walk.iv, nblks);
- dst += nblks * SM4_BLOCK_SIZE;
- src += nblks * SM4_BLOCK_SIZE;
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
+ sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
+ walk.iv, nblocks);
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- u8 keystream[SM4_BLOCK_SIZE * 8];
- int i;
-
- for (i = 0; i < nblks; i++) {
- memcpy(&keystream[i * SM4_BLOCK_SIZE],
- walk.iv, SM4_BLOCK_SIZE);
- crypto_inc(walk.iv, SM4_BLOCK_SIZE);
- }
- sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
- keystream, nblks);
-
- crypto_xor_cpy(dst, src, keystream,
- nblks * SM4_BLOCK_SIZE);
- dst += nblks * SM4_BLOCK_SIZE;
- src += nblks * SM4_BLOCK_SIZE;
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
+ kernel_neon_end();
- kernel_neon_end();
+ dst += nblocks * SM4_BLOCK_SIZE;
+ src += nblocks * SM4_BLOCK_SIZE;
+ nbytes -= nblocks * SM4_BLOCK_SIZE;
+ }
/* tail */
if (walk.nbytes == walk.total && nbytes > 0) {
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 32b3341fe970..da985e0dc69a 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -82,7 +82,6 @@ static int __init rng_init (void)
sigio_broken(random_fd);
hwrng.name = RNG_MODULE_NAME;
hwrng.read = rng_dev_read;
- hwrng.quality = 1024;
err = hwrng_register(&hwrng);
if (err) {
diff --git a/arch/x86/crypto/polyval-clmulni_glue.c b/arch/x86/crypto/polyval-clmulni_glue.c
index b7664d018851..8fa58b0f3cb3 100644
--- a/arch/x86/crypto/polyval-clmulni_glue.c
+++ b/arch/x86/crypto/polyval-clmulni_glue.c
@@ -27,13 +27,17 @@
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
+#define POLYVAL_ALIGN 16
+#define POLYVAL_ALIGN_ATTR __aligned(POLYVAL_ALIGN)
+#define POLYVAL_ALIGN_EXTRA ((POLYVAL_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
+#define POLYVAL_CTX_SIZE (sizeof(struct polyval_tfm_ctx) + POLYVAL_ALIGN_EXTRA)
#define NUM_KEY_POWERS 8
struct polyval_tfm_ctx {
/*
* These powers must be in the order h^8, ..., h^1.
*/
- u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
+ u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE] POLYVAL_ALIGN_ATTR;
};
struct polyval_desc_ctx {
@@ -45,6 +49,11 @@ asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator);
asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2);
+static inline struct polyval_tfm_ctx *polyval_tfm_ctx(struct crypto_shash *tfm)
+{
+ return PTR_ALIGN(crypto_shash_ctx(tfm), POLYVAL_ALIGN);
+}
+
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator)
{
@@ -72,7 +81,7 @@ static void internal_polyval_mul(u8 *op1, const u8 *op2)
static int polyval_x86_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
- struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+ struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(tfm);
int i;
if (keylen != POLYVAL_BLOCK_SIZE)
@@ -102,7 +111,7 @@ static int polyval_x86_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
- const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm);
u8 *pos;
unsigned int nblocks;
unsigned int n;
@@ -143,7 +152,7 @@ static int polyval_x86_update(struct shash_desc *desc,
static int polyval_x86_final(struct shash_desc *desc, u8 *dst)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
- const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm);
if (dctx->bytes) {
internal_polyval_mul(dctx->buffer,
@@ -167,7 +176,7 @@ static struct shash_alg polyval_alg = {
.cra_driver_name = "polyval-clmulni",
.cra_priority = 200,
.cra_blocksize = POLYVAL_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct polyval_tfm_ctx),
+ .cra_ctxsize = POLYVAL_CTX_SIZE,
.cra_module = THIS_MODULE,
},
};
diff --git a/crypto/Kconfig b/crypto/Kconfig
index d779667671b2..9c86f7045157 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -175,9 +175,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS
This is intended for developer use only, as these tests take much
longer to run than the normal self tests.
-config CRYPTO_GF128MUL
- tristate
-
config CRYPTO_NULL
tristate "Null algorithms"
select CRYPTO_NULL2
@@ -714,9 +711,9 @@ config CRYPTO_KEYWRAP
config CRYPTO_LRW
tristate "LRW (Liskov Rivest Wagner)"
+ select CRYPTO_LIB_GF128MUL
select CRYPTO_SKCIPHER
select CRYPTO_MANAGER
- select CRYPTO_GF128MUL
select CRYPTO_ECB
help
LRW (Liskov Rivest Wagner) mode
@@ -926,8 +923,8 @@ config CRYPTO_CMAC
config CRYPTO_GHASH
tristate "GHASH"
- select CRYPTO_GF128MUL
select CRYPTO_HASH
+ select CRYPTO_LIB_GF128MUL
help
GCM GHASH function (NIST SP800-38D)
@@ -967,8 +964,8 @@ config CRYPTO_MICHAEL_MIC
config CRYPTO_POLYVAL
tristate
- select CRYPTO_GF128MUL
select CRYPTO_HASH
+ select CRYPTO_LIB_GF128MUL
help
POLYVAL hash function for HCTR2
diff --git a/crypto/Makefile b/crypto/Makefile
index 303b21c43df0..d0126c915834 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -85,7 +85,6 @@ obj-$(CONFIG_CRYPTO_WP512) += wp512.o
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
CFLAGS_blake2b_generic.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930
-obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
obj-$(CONFIG_CRYPTO_CBC) += cbc.o
obj-$(CONFIG_CRYPTO_CFB) += cfb.o
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index e893c0f6c879..0a4fa2a429e2 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -12,6 +12,8 @@
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/key.h>
+#include <linux/key-type.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/net.h>
@@ -19,6 +21,10 @@
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/security.h>
+#include <linux/string.h>
+#include <keys/user-type.h>
+#include <keys/trusted-type.h>
+#include <keys/encrypted-type.h>
struct alg_type_list {
const struct af_alg_type *type;
@@ -222,6 +228,129 @@ out:
return err;
}
+#ifdef CONFIG_KEYS
+
+static const u8 *key_data_ptr_user(const struct key *key,
+ unsigned int *datalen)
+{
+ const struct user_key_payload *ukp;
+
+ ukp = user_key_payload_locked(key);
+ if (IS_ERR_OR_NULL(ukp))
+ return ERR_PTR(-EKEYREVOKED);
+
+ *datalen = key->datalen;
+
+ return ukp->data;
+}
+
+static const u8 *key_data_ptr_encrypted(const struct key *key,
+ unsigned int *datalen)
+{
+ const struct encrypted_key_payload *ekp;
+
+ ekp = dereference_key_locked(key);
+ if (IS_ERR_OR_NULL(ekp))
+ return ERR_PTR(-EKEYREVOKED);
+
+ *datalen = ekp->decrypted_datalen;
+
+ return ekp->decrypted_data;
+}
+
+static const u8 *key_data_ptr_trusted(const struct key *key,
+ unsigned int *datalen)
+{
+ const struct trusted_key_payload *tkp;
+
+ tkp = dereference_key_locked(key);
+ if (IS_ERR_OR_NULL(tkp))
+ return ERR_PTR(-EKEYREVOKED);
+
+ *datalen = tkp->key_len;
+
+ return tkp->key;
+}
+
+static struct key *lookup_key(key_serial_t serial)
+{
+ key_ref_t key_ref;
+
+ key_ref = lookup_user_key(serial, 0, KEY_NEED_SEARCH);
+ if (IS_ERR(key_ref))
+ return ERR_CAST(key_ref);
+
+ return key_ref_to_ptr(key_ref);
+}
+
+static int alg_setkey_by_key_serial(struct alg_sock *ask, sockptr_t optval,
+ unsigned int optlen)
+{
+ const struct af_alg_type *type = ask->type;
+ u8 *key_data = NULL;
+ unsigned int key_datalen;
+ key_serial_t serial;
+ struct key *key;
+ const u8 *ret;
+ int err;
+
+ if (optlen != sizeof(serial))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&serial, optval, optlen))
+ return -EFAULT;
+
+ key = lookup_key(serial);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+
+ down_read(&key->sem);
+
+ ret = ERR_PTR(-ENOPROTOOPT);
+ if (!strcmp(key->type->name, "user") ||
+ !strcmp(key->type->name, "logon")) {
+ ret = key_data_ptr_user(key, &key_datalen);
+ } else if (IS_REACHABLE(CONFIG_ENCRYPTED_KEYS) &&
+ !strcmp(key->type->name, "encrypted")) {
+ ret = key_data_ptr_encrypted(key, &key_datalen);
+ } else if (IS_REACHABLE(CONFIG_TRUSTED_KEYS) &&
+ !strcmp(key->type->name, "trusted")) {
+ ret = key_data_ptr_trusted(key, &key_datalen);
+ }
+
+ if (IS_ERR(ret)) {
+ up_read(&key->sem);
+ return PTR_ERR(ret);
+ }
+
+ key_data = sock_kmalloc(&ask->sk, key_datalen, GFP_KERNEL);
+ if (!key_data) {
+ up_read(&key->sem);
+ return -ENOMEM;
+ }
+
+ memcpy(key_data, ret, key_datalen);
+
+ up_read(&key->sem);
+
+ err = type->setkey(ask->private, key_data, key_datalen);
+
+ sock_kzfree_s(&ask->sk, key_data, key_datalen);
+
+ return err;
+}
+
+#else
+
+static inline int alg_setkey_by_key_serial(struct alg_sock *ask,
+ sockptr_t optval,
+ unsigned int optlen)
+{
+ return -ENOPROTOOPT;
+}
+
+#endif
+
static int alg_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -242,12 +371,16 @@ static int alg_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case ALG_SET_KEY:
+ case ALG_SET_KEY_BY_KEY_SERIAL:
if (sock->state == SS_CONNECTED)
goto unlock;
if (!type->setkey)
goto unlock;
- err = alg_setkey(sk, optval, optlen);
+ if (optname == ALG_SET_KEY_BY_KEY_SERIAL)
+ err = alg_setkey_by_key_serial(ask, optval, optlen);
+ else
+ err = alg_setkey(sk, optval, optlen);
break;
case ALG_SET_AEAD_AUTHSIZE:
if (sock->state == SS_CONNECTED)
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 6b815ece51c6..30dbae72728f 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -218,7 +218,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
cryptlen += ilen;
}
- ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen);
+ ahash_request_set_crypt(ahreq, plain, odata, cryptlen);
err = crypto_ahash_finup(ahreq);
out:
return err;
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 668095eca0fa..ca3a40fc7da9 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -68,11 +68,12 @@ struct aead_instance_ctx {
struct cryptd_skcipher_ctx {
refcount_t refcnt;
- struct crypto_sync_skcipher *child;
+ struct crypto_skcipher *child;
};
struct cryptd_skcipher_request_ctx {
crypto_completion_t complete;
+ struct skcipher_request req;
};
struct cryptd_hash_ctx {
@@ -227,13 +228,13 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
const u8 *key, unsigned int keylen)
{
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
- struct crypto_sync_skcipher *child = ctx->child;
+ struct crypto_skcipher *child = ctx->child;
- crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_sync_skcipher_set_flags(child,
- crypto_skcipher_get_flags(parent) &
- CRYPTO_TFM_REQ_MASK);
- return crypto_sync_skcipher_setkey(child, key, keylen);
+ crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(child,
+ crypto_skcipher_get_flags(parent) &
+ CRYPTO_TFM_REQ_MASK);
+ return crypto_skcipher_setkey(child, key, keylen);
}
static void cryptd_skcipher_complete(struct skcipher_request *req, int err)
@@ -258,13 +259,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_sync_skcipher *child = ctx->child;
- SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
+ struct skcipher_request *subreq = &rctx->req;
+ struct crypto_skcipher *child = ctx->child;
if (unlikely(err == -EINPROGRESS))
goto out;
- skcipher_request_set_sync_tfm(subreq, child);
+ skcipher_request_set_tfm(subreq, child);
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@@ -286,13 +287,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_sync_skcipher *child = ctx->child;
- SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
+ struct skcipher_request *subreq = &rctx->req;
+ struct crypto_skcipher *child = ctx->child;
if (unlikely(err == -EINPROGRESS))
goto out;
- skcipher_request_set_sync_tfm(subreq, child);
+ skcipher_request_set_tfm(subreq, child);
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
NULL, NULL);
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@@ -343,9 +344,10 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
if (IS_ERR(cipher))
return PTR_ERR(cipher);
- ctx->child = (struct crypto_sync_skcipher *)cipher;
+ ctx->child = cipher;
crypto_skcipher_set_reqsize(
- tfm, sizeof(struct cryptd_skcipher_request_ctx));
+ tfm, sizeof(struct cryptd_skcipher_request_ctx) +
+ crypto_skcipher_reqsize(cipher));
return 0;
}
@@ -353,7 +355,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
{
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- crypto_free_sync_skcipher(ctx->child);
+ crypto_free_skcipher(ctx->child);
}
static void cryptd_skcipher_free(struct skcipher_instance *inst)
@@ -931,7 +933,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
{
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
- return &ctx->child->base;
+ return ctx->child;
}
EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 418211180cee..0ecab31cfe79 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -763,7 +763,7 @@ struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
struct crypto_skcipher *tfm;
/* Only sync algorithms allowed. */
- mask |= CRYPTO_ALG_ASYNC;
+ mask |= CRYPTO_ALG_ASYNC | CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE;
tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask);
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index a82679b576bb..0f101897e90f 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -324,7 +324,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
crypto_req_done, &data[i].wait);
}
- pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
+ pr_info("testing speed of multibuffer %s (%s) %s\n", algo,
get_driver_name(crypto_aead, tfm), e);
i = 0;
@@ -506,8 +506,8 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
out:
if (ret == 0)
- printk("1 operation in %lu cycles (%d bytes)\n",
- (cycles + 4) / 8, blen);
+ pr_cont("1 operation in %lu cycles (%d bytes)\n",
+ (cycles + 4) / 8, blen);
return ret;
}
@@ -575,8 +575,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
}
crypto_init_wait(&wait);
- printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
- get_driver_name(crypto_aead, tfm), e);
+ pr_info("testing speed of %s (%s) %s\n", algo,
+ get_driver_name(crypto_aead, tfm), e);
req = aead_request_alloc(tfm, GFP_KERNEL);
if (!req) {
@@ -624,8 +624,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
memset(iv, 0xff, iv_len);
crypto_aead_clear_flags(tfm, ~0);
- printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
- i, *keysize * 8, bs);
+ pr_info("test %u (%d bit key, %d byte blocks): ",
+ i, *keysize * 8, bs);
memset(tvmem[0], 0xff, PAGE_SIZE);
@@ -727,8 +727,8 @@ static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
return ret;
}
- printk("%6u opers/sec, %9lu bytes/sec\n",
- bcount / secs, ((long)bcount * blen) / secs);
+ pr_cont("%6u opers/sec, %9lu bytes/sec\n",
+ bcount / secs, ((long)bcount * blen) / secs);
return 0;
}
@@ -877,8 +877,8 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs,
return;
}
- printk(KERN_INFO "\ntesting speed of async %s (%s)\n", algo,
- get_driver_name(crypto_ahash, tfm));
+ pr_info("testing speed of async %s (%s)\n", algo,
+ get_driver_name(crypto_ahash, tfm));
if (crypto_ahash_digestsize(tfm) > MAX_DIGEST_SIZE) {
pr_err("digestsize(%u) > %d\n", crypto_ahash_digestsize(tfm),
@@ -1117,7 +1117,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
crypto_init_wait(&data[i].wait);
}
- pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
+ pr_info("testing speed of multibuffer %s (%s) %s\n", algo,
get_driver_name(crypto_skcipher, tfm), e);
i = 0;
@@ -1324,13 +1324,12 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs,
return;
}
- pr_info("\ntesting speed of %s %s (%s) %s\n", async ? "async" : "sync",
+ pr_info("testing speed of %s %s (%s) %s\n", async ? "async" : "sync",
algo, get_driver_name(crypto_skcipher, tfm), e);
req = skcipher_request_alloc(tfm, GFP_KERNEL);
if (!req) {
- pr_err("tcrypt: skcipher: Failed to allocate request for %s\n",
- algo);
+ pr_err("skcipher: Failed to allocate request for %s\n", algo);
goto out;
}
@@ -1471,387 +1470,396 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
}
for (i = 1; i < 200; i++)
- ret += do_test(NULL, 0, 0, i, num_mb);
+ ret = min(ret, do_test(NULL, 0, 0, i, num_mb));
break;
case 1:
- ret += tcrypt_test("md5");
+ ret = min(ret, tcrypt_test("md5"));
break;
case 2:
- ret += tcrypt_test("sha1");
+ ret = min(ret, tcrypt_test("sha1"));
break;
case 3:
- ret += tcrypt_test("ecb(des)");
- ret += tcrypt_test("cbc(des)");
- ret += tcrypt_test("ctr(des)");
+ ret = min(ret, tcrypt_test("ecb(des)"));
+ ret = min(ret, tcrypt_test("cbc(des)"));
+ ret = min(ret, tcrypt_test("ctr(des)"));
break;
case 4:
- ret += tcrypt_test("ecb(des3_ede)");
- ret += tcrypt_test("cbc(des3_ede)");
- ret += tcrypt_test("ctr(des3_ede)");
+ ret = min(ret, tcrypt_test("ecb(des3_ede)"));
+ ret = min(ret, tcrypt_test("cbc(des3_ede)"));
+ ret = min(ret, tcrypt_test("ctr(des3_ede)"));
break;
case 5:
- ret += tcrypt_test("md4");
+ ret = min(ret, tcrypt_test("md4"));
break;
case 6:
- ret += tcrypt_test("sha256");
+ ret = min(ret, tcrypt_test("sha256"));
break;
case 7:
- ret += tcrypt_test("ecb(blowfish)");
- ret += tcrypt_test("cbc(blowfish)");
- ret += tcrypt_test("ctr(blowfish)");
+ ret = min(ret, tcrypt_test("ecb(blowfish)"));
+ ret = min(ret, tcrypt_test("cbc(blowfish)"));
+ ret = min(ret, tcrypt_test("ctr(blowfish)"));
break;
case 8:
- ret += tcrypt_test("ecb(twofish)");
- ret += tcrypt_test("cbc(twofish)");
- ret += tcrypt_test("ctr(twofish)");
- ret += tcrypt_test("lrw(twofish)");
- ret += tcrypt_test("xts(twofish)");
+ ret = min(ret, tcrypt_test("ecb(twofish)"));
+ ret = min(ret, tcrypt_test("cbc(twofish)"));
+ ret = min(ret, tcrypt_test("ctr(twofish)"));
+ ret = min(ret, tcrypt_test("lrw(twofish)"));
+ ret = min(ret, tcrypt_test("xts(twofish)"));
break;
case 9:
- ret += tcrypt_test("ecb(serpent)");
- ret += tcrypt_test("cbc(serpent)");
- ret += tcrypt_test("ctr(serpent)");
- ret += tcrypt_test("lrw(serpent)");
- ret += tcrypt_test("xts(serpent)");
+ ret = min(ret, tcrypt_test("ecb(serpent)"));
+ ret = min(ret, tcrypt_test("cbc(serpent)"));
+ ret = min(ret, tcrypt_test("ctr(serpent)"));
+ ret = min(ret, tcrypt_test("lrw(serpent)"));
+ ret = min(ret, tcrypt_test("xts(serpent)"));
break;
case 10:
- ret += tcrypt_test("ecb(aes)");
- ret += tcrypt_test("cbc(aes)");
- ret += tcrypt_test("lrw(aes)");
- ret += tcrypt_test("xts(aes)");
- ret += tcrypt_test("ctr(aes)");
- ret += tcrypt_test("rfc3686(ctr(aes))");
- ret += tcrypt_test("ofb(aes)");
- ret += tcrypt_test("cfb(aes)");
- ret += tcrypt_test("xctr(aes)");
+ ret = min(ret, tcrypt_test("ecb(aes)"));
+ ret = min(ret, tcrypt_test("cbc(aes)"));
+ ret = min(ret, tcrypt_test("lrw(aes)"));
+ ret = min(ret, tcrypt_test("xts(aes)"));
+ ret = min(ret, tcrypt_test("ctr(aes)"));
+ ret = min(ret, tcrypt_test("rfc3686(ctr(aes))"));
+ ret = min(ret, tcrypt_test("ofb(aes)"));
+ ret = min(ret, tcrypt_test("cfb(aes)"));
+ ret = min(ret, tcrypt_test("xctr(aes)"));
break;
case 11:
- ret += tcrypt_test("sha384");
+ ret = min(ret, tcrypt_test("sha384"));
break;
case 12:
- ret += tcrypt_test("sha512");
+ ret = min(ret, tcrypt_test("sha512"));
break;
case 13:
- ret += tcrypt_test("deflate");
+ ret = min(ret, tcrypt_test("deflate"));
break;
case 14:
- ret += tcrypt_test("ecb(cast5)");
- ret += tcrypt_test("cbc(cast5)");
- ret += tcrypt_test("ctr(cast5)");
+ ret = min(ret, tcrypt_test("ecb(cast5)"));
+ ret = min(ret, tcrypt_test("cbc(cast5)"));
+ ret = min(ret, tcrypt_test("ctr(cast5)"));
break;
case 15:
- ret += tcrypt_test("ecb(cast6)");
- ret += tcrypt_test("cbc(cast6)");
- ret += tcrypt_test("ctr(cast6)");
- ret += tcrypt_test("lrw(cast6)");
- ret += tcrypt_test("xts(cast6)");
+ ret = min(ret, tcrypt_test("ecb(cast6)"));
+ ret = min(ret, tcrypt_test("cbc(cast6)"));
+ ret = min(ret, tcrypt_test("ctr(cast6)"));
+ ret = min(ret, tcrypt_test("lrw(cast6)"));
+ ret = min(ret, tcrypt_test("xts(cast6)"));
break;
case 16:
- ret += tcrypt_test("ecb(arc4)");
+ ret = min(ret, tcrypt_test("ecb(arc4)"));
break;
case 17:
- ret += tcrypt_test("michael_mic");
+ ret = min(ret, tcrypt_test("michael_mic"));
break;
case 18:
- ret += tcrypt_test("crc32c");
+ ret = min(ret, tcrypt_test("crc32c"));
break;
case 19:
- ret += tcrypt_test("ecb(tea)");
+ ret = min(ret, tcrypt_test("ecb(tea)"));
break;
case 20:
- ret += tcrypt_test("ecb(xtea)");
+ ret = min(ret, tcrypt_test("ecb(xtea)"));
break;
case 21:
- ret += tcrypt_test("ecb(khazad)");
+ ret = min(ret, tcrypt_test("ecb(khazad)"));
break;
case 22:
- ret += tcrypt_test("wp512");
+ ret = min(ret, tcrypt_test("wp512"));
break;
case 23:
- ret += tcrypt_test("wp384");
+ ret = min(ret, tcrypt_test("wp384"));
break;
case 24:
- ret += tcrypt_test("wp256");
+ ret = min(ret, tcrypt_test("wp256"));
break;
case 26:
- ret += tcrypt_test("ecb(anubis)");
- ret += tcrypt_test("cbc(anubis)");
+ ret = min(ret, tcrypt_test("ecb(anubis)"));
+ ret = min(ret, tcrypt_test("cbc(anubis)"));
break;
case 30:
- ret += tcrypt_test("ecb(xeta)");
+ ret = min(ret, tcrypt_test("ecb(xeta)"));
break;
case 31:
- ret += tcrypt_test("pcbc(fcrypt)");
+ ret = min(ret, tcrypt_test("pcbc(fcrypt)"));
break;
case 32:
- ret += tcrypt_test("ecb(camellia)");
- ret += tcrypt_test("cbc(camellia)");
- ret += tcrypt_test("ctr(camellia)");
- ret += tcrypt_test("lrw(camellia)");
- ret += tcrypt_test("xts(camellia)");
+ ret = min(ret, tcrypt_test("ecb(camellia)"));
+ ret = min(ret, tcrypt_test("cbc(camellia)"));
+ ret = min(ret, tcrypt_test("ctr(camellia)"));
+ ret = min(ret, tcrypt_test("lrw(camellia)"));
+ ret = min(ret, tcrypt_test("xts(camellia)"));
break;
case 33:
- ret += tcrypt_test("sha224");
+ ret = min(ret, tcrypt_test("sha224"));
break;
case 35:
- ret += tcrypt_test("gcm(aes)");
+ ret = min(ret, tcrypt_test("gcm(aes)"));
break;
case 36:
- ret += tcrypt_test("lzo");
+ ret = min(ret, tcrypt_test("lzo"));
break;
case 37:
- ret += tcrypt_test("ccm(aes)");
+ ret = min(ret, tcrypt_test("ccm(aes)"));
break;
case 38:
- ret += tcrypt_test("cts(cbc(aes))");
+ ret = min(ret, tcrypt_test("cts(cbc(aes))"));
break;
case 39:
- ret += tcrypt_test("xxhash64");
+ ret = min(ret, tcrypt_test("xxhash64"));
break;
case 40:
- ret += tcrypt_test("rmd160");
+ ret = min(ret, tcrypt_test("rmd160"));
break;
case 42:
- ret += tcrypt_test("blake2b-512");
+ ret = min(ret, tcrypt_test("blake2b-512"));
break;
case 43:
- ret += tcrypt_test("ecb(seed)");
+ ret = min(ret, tcrypt_test("ecb(seed)"));
break;
case 45:
- ret += tcrypt_test("rfc4309(ccm(aes))");
+ ret = min(ret, tcrypt_test("rfc4309(ccm(aes))"));
break;
case 46:
- ret += tcrypt_test("ghash");
+ ret = min(ret, tcrypt_test("ghash"));
break;
case 47:
- ret += tcrypt_test("crct10dif");
+ ret = min(ret, tcrypt_test("crct10dif"));
break;
case 48:
- ret += tcrypt_test("sha3-224");
+ ret = min(ret, tcrypt_test("sha3-224"));
break;
case 49:
- ret += tcrypt_test("sha3-256");
+ ret = min(ret, tcrypt_test("sha3-256"));
break;
case 50:
- ret += tcrypt_test("sha3-384");
+ ret = min(ret, tcrypt_test("sha3-384"));
break;
case 51:
- ret += tcrypt_test("sha3-512");
+ ret = min(ret, tcrypt_test("sha3-512"));
break;
case 52:
- ret += tcrypt_test("sm3");
+ ret = min(ret, tcrypt_test("sm3"));
break;
case 53:
- ret += tcrypt_test("streebog256");
+ ret = min(ret, tcrypt_test("streebog256"));
break;
case 54:
- ret += tcrypt_test("streebog512");
+ ret = min(ret, tcrypt_test("streebog512"));
break;
case 55:
- ret += tcrypt_test("gcm(sm4)");
+ ret = min(ret, tcrypt_test("gcm(sm4)"));
break;
case 56:
- ret += tcrypt_test("ccm(sm4)");
+ ret = min(ret, tcrypt_test("ccm(sm4)"));
break;
case 57:
- ret += tcrypt_test("polyval");
+ ret = min(ret, tcrypt_test("polyval"));
break;
case 58:
- ret += tcrypt_test("gcm(aria)");
+ ret = min(ret, tcrypt_test("gcm(aria)"));
+ break;
+
+ case 59:
+ ret = min(ret, tcrypt_test("cts(cbc(sm4))"));
break;
case 100:
- ret += tcrypt_test("hmac(md5)");
+ ret = min(ret, tcrypt_test("hmac(md5)"));
break;
case 101:
- ret += tcrypt_test("hmac(sha1)");
+ ret = min(ret, tcrypt_test("hmac(sha1)"));
break;
case 102:
- ret += tcrypt_test("hmac(sha256)");
+ ret = min(ret, tcrypt_test("hmac(sha256)"));
break;
case 103:
- ret += tcrypt_test("hmac(sha384)");
+ ret = min(ret, tcrypt_test("hmac(sha384)"));
break;
case 104:
- ret += tcrypt_test("hmac(sha512)");
+ ret = min(ret, tcrypt_test("hmac(sha512)"));
break;
case 105:
- ret += tcrypt_test("hmac(sha224)");
+ ret = min(ret, tcrypt_test("hmac(sha224)"));
break;
case 106:
- ret += tcrypt_test("xcbc(aes)");
+ ret = min(ret, tcrypt_test("xcbc(aes)"));
break;
case 108:
- ret += tcrypt_test("hmac(rmd160)");
+ ret = min(ret, tcrypt_test("hmac(rmd160)"));
break;
case 109:
- ret += tcrypt_test("vmac64(aes)");
+ ret = min(ret, tcrypt_test("vmac64(aes)"));
break;
case 111:
- ret += tcrypt_test("hmac(sha3-224)");
+ ret = min(ret, tcrypt_test("hmac(sha3-224)"));
break;
case 112:
- ret += tcrypt_test("hmac(sha3-256)");
+ ret = min(ret, tcrypt_test("hmac(sha3-256)"));
break;
case 113:
- ret += tcrypt_test("hmac(sha3-384)");
+ ret = min(ret, tcrypt_test("hmac(sha3-384)"));
break;
case 114:
- ret += tcrypt_test("hmac(sha3-512)");
+ ret = min(ret, tcrypt_test("hmac(sha3-512)"));
break;
case 115:
- ret += tcrypt_test("hmac(streebog256)");
+ ret = min(ret, tcrypt_test("hmac(streebog256)"));
break;
case 116:
- ret += tcrypt_test("hmac(streebog512)");
+ ret = min(ret, tcrypt_test("hmac(streebog512)"));
break;
case 150:
- ret += tcrypt_test("ansi_cprng");
+ ret = min(ret, tcrypt_test("ansi_cprng"));
break;
case 151:
- ret += tcrypt_test("rfc4106(gcm(aes))");
+ ret = min(ret, tcrypt_test("rfc4106(gcm(aes))"));
break;
case 152:
- ret += tcrypt_test("rfc4543(gcm(aes))");
+ ret = min(ret, tcrypt_test("rfc4543(gcm(aes))"));
break;
case 153:
- ret += tcrypt_test("cmac(aes)");
+ ret = min(ret, tcrypt_test("cmac(aes)"));
break;
case 154:
- ret += tcrypt_test("cmac(des3_ede)");
+ ret = min(ret, tcrypt_test("cmac(des3_ede)"));
break;
case 155:
- ret += tcrypt_test("authenc(hmac(sha1),cbc(aes))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(aes))"));
break;
case 156:
- ret += tcrypt_test("authenc(hmac(md5),ecb(cipher_null))");
+ ret = min(ret, tcrypt_test("authenc(hmac(md5),ecb(cipher_null))"));
break;
case 157:
- ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))"));
break;
case 158:
- ret += tcrypt_test("cbcmac(sm4)");
+ ret = min(ret, tcrypt_test("cbcmac(sm4)"));
break;
case 159:
- ret += tcrypt_test("cmac(sm4)");
+ ret = min(ret, tcrypt_test("cmac(sm4)"));
+ break;
+
+ case 160:
+ ret = min(ret, tcrypt_test("xcbc(sm4)"));
break;
case 181:
- ret += tcrypt_test("authenc(hmac(sha1),cbc(des))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des))"));
break;
case 182:
- ret += tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))"));
break;
case 183:
- ret += tcrypt_test("authenc(hmac(sha224),cbc(des))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des))"));
break;
case 184:
- ret += tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))"));
break;
case 185:
- ret += tcrypt_test("authenc(hmac(sha256),cbc(des))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des))"));
break;
case 186:
- ret += tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))"));
break;
case 187:
- ret += tcrypt_test("authenc(hmac(sha384),cbc(des))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des))"));
break;
case 188:
- ret += tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))"));
break;
case 189:
- ret += tcrypt_test("authenc(hmac(sha512),cbc(des))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des))"));
break;
case 190:
- ret += tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))");
+ ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))"));
break;
case 191:
- ret += tcrypt_test("ecb(sm4)");
- ret += tcrypt_test("cbc(sm4)");
- ret += tcrypt_test("cfb(sm4)");
- ret += tcrypt_test("ctr(sm4)");
+ ret = min(ret, tcrypt_test("ecb(sm4)"));
+ ret = min(ret, tcrypt_test("cbc(sm4)"));
+ ret = min(ret, tcrypt_test("cfb(sm4)"));
+ ret = min(ret, tcrypt_test("ctr(sm4)"));
+ ret = min(ret, tcrypt_test("xts(sm4)"));
break;
case 192:
- ret += tcrypt_test("ecb(aria)");
- ret += tcrypt_test("cbc(aria)");
- ret += tcrypt_test("cfb(aria)");
- ret += tcrypt_test("ctr(aria)");
+ ret = min(ret, tcrypt_test("ecb(aria)"));
+ ret = min(ret, tcrypt_test("cbc(aria)"));
+ ret = min(ret, tcrypt_test("cfb(aria)"));
+ ret = min(ret, tcrypt_test("ctr(aria)"));
break;
case 200:
test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
@@ -2109,6 +2117,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
+ test_cipher_speed("cts(cbc(sm4))", ENCRYPT, sec, NULL, 0,
+ speed_template_16);
+ test_cipher_speed("cts(cbc(sm4))", DECRYPT, sec, NULL, 0,
+ speed_template_16);
test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
speed_template_16);
test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
@@ -2117,6 +2129,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
+ test_cipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_32);
+ test_cipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_32);
break;
case 219:
@@ -2630,6 +2646,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16);
test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
+ test_acipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0,
+ speed_template_32);
+ test_acipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0,
+ speed_template_32);
break;
case 519:
@@ -2885,7 +2905,7 @@ static int __init tcrypt_mod_init(void)
err = do_test(alg, type, mask, mode, num_mb);
if (err) {
- printk(KERN_ERR "tcrypt: one or more tests failed!\n");
+ pr_err("one or more tests failed!\n");
goto err_free_tv;
} else {
pr_debug("all tests passed\n");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index bcd059caa1c8..e2806ef044fd 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4713,6 +4713,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.test = alg_test_null,
.fips_allowed = 1,
}, {
+ .alg = "cts(cbc(sm4))",
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(sm4_cts_tv_template)
+ }
+ }, {
.alg = "curve25519",
.test = alg_test_kpp,
.suite = {
@@ -5587,6 +5593,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.hash = __VECS(aes_xcbc128_tv_template)
}
}, {
+ .alg = "xcbc(sm4)",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(sm4_xcbc128_tv_template)
+ }
+ }, {
.alg = "xchacha12",
.test = alg_test_skcipher,
.suite = {
@@ -5641,6 +5653,13 @@ static const struct alg_test_desc alg_test_descs[] = {
.cipher = __VECS(serpent_xts_tv_template)
}
}, {
+ .alg = "xts(sm4)",
+ .generic_driver = "xts(ecb(sm4-generic))",
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(sm4_xts_tv_template)
+ }
+ }, {
.alg = "xts(twofish)",
.generic_driver = "xts(ecb(twofish-generic))",
.test = alg_test_skcipher,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index d6088e26f326..f10bfb9d9973 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -14882,6 +14882,353 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = {
}
};
+static const struct cipher_testvec sm4_cts_tv_template[] = {
+ /* Generated from AES-CTS test vectors */
+ {
+ .klen = 16,
+ .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+ "\x74\x65\x72\x69\x79\x61\x6b\x69",
+ .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+ "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+ "\x20",
+ .len = 17,
+ .ctext = "\x05\xfe\x23\xee\x17\xa2\x89\x98"
+ "\xbc\x97\x0a\x0b\x54\x67\xca\xd7"
+ "\xd6",
+ }, {
+ .klen = 16,
+ .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+ "\x74\x65\x72\x69\x79\x61\x6b\x69",
+ .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+ "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+ "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+ "\x20\x47\x61\x75\x27\x73\x20",
+ .len = 31,
+ .ctext = "\x15\x46\xe4\x95\xa4\xec\xf0\xb8"
+ "\x49\xd6\x6a\x9d\x89\xc7\xfd\x70"
+ "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+ "\x93\xf7\x70\xbb\xa8\x3f\xa3",
+ }, {
+ .klen = 16,
+ .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+ "\x74\x65\x72\x69\x79\x61\x6b\x69",
+ .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+ "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+ "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+ "\x20\x47\x61\x75\x27\x73\x20\x43",
+ .len = 32,
+ .ctext = "\x89\xc7\x99\x3f\x87\x69\x5c\xd3"
+ "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3"
+ "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+ "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf",
+ }, {
+ .klen = 16,
+ .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+ "\x74\x65\x72\x69\x79\x61\x6b\x69",
+ .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+ "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+ "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+ "\x20\x47\x61\x75\x27\x73\x20\x43"
+ "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
+ "\x70\x6c\x65\x61\x73\x65\x2c",
+ .len = 47,
+ .ctext = "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+ "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf"
+ "\xd3\xe1\xdc\xeb\xfa\x04\x11\x99"
+ "\xde\xcf\x6f\x4d\x7b\x09\x92\x7f"
+ "\x89\xc7\x99\x3f\x87\x69\x5c\xd3"
+ "\x01\x6a\xbf\xd4\x3f\x79\x02",
+ }, {
+ .klen = 16,
+ .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+ "\x74\x65\x72\x69\x79\x61\x6b\x69",
+ .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+ "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+ "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+ "\x20\x47\x61\x75\x27\x73\x20\x43"
+ "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
+ "\x70\x6c\x65\x61\x73\x65\x2c\x20",
+ .len = 48,
+ .ctext = "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+ "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf"
+ "\x9a\xbd\x7b\xfe\x82\xab\xcc\x7f"
+ "\xbd\x99\x21\x0c\x5e\x4d\xed\x20"
+ "\x89\xc7\x99\x3f\x87\x69\x5c\xd3"
+ "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3",
+ }, {
+ .klen = 16,
+ .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+ "\x74\x65\x72\x69\x79\x61\x6b\x69",
+ .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+ "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+ "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+ "\x20\x47\x61\x75\x27\x73\x20\x43"
+ "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
+ "\x70\x6c\x65\x61\x73\x65\x2c\x20"
+ "\x61\x6e\x64\x20\x77\x6f\x6e\x74"
+ "\x6f\x6e\x20\x73\x6f\x75\x70\x2e",
+ .len = 64,
+ .ctext = "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+ "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf"
+ "\x89\xc7\x99\x3f\x87\x69\x5c\xd3"
+ "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3"
+ "\x58\x19\xa4\x8f\xa9\x68\x5e\x6b"
+ "\x2c\x0f\x81\x60\x15\x98\x27\x4f"
+ "\x9a\xbd\x7b\xfe\x82\xab\xcc\x7f"
+ "\xbd\x99\x21\x0c\x5e\x4d\xed\x20",
+ }
+};
+
+static const struct cipher_testvec sm4_xts_tv_template[] = {
+ /* Generated from AES-XTS test vectors */
+ {
+ .key = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .klen = 32,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ctext = "\xd9\xb4\x21\xf7\x31\xc8\x94\xfd"
+ "\xc3\x5b\x77\x29\x1f\xe4\xe3\xb0"
+ "\x2a\x1f\xb7\x66\x98\xd5\x9f\x0e"
+ "\x51\x37\x6c\x4a\xda\x5b\xc7\x5d",
+ .len = 32,
+ }, {
+ .key = "\x11\x11\x11\x11\x11\x11\x11\x11"
+ "\x11\x11\x11\x11\x11\x11\x11\x11"
+ "\x22\x22\x22\x22\x22\x22\x22\x22"
+ "\x22\x22\x22\x22\x22\x22\x22\x22",
+ .klen = 32,
+ .iv = "\x33\x33\x33\x33\x33\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44"
+ "\x44\x44\x44\x44\x44\x44\x44\x44"
+ "\x44\x44\x44\x44\x44\x44\x44\x44"
+ "\x44\x44\x44\x44\x44\x44\x44\x44",
+ .ctext = "\xa7\x4d\x72\x6c\x11\x19\x6a\x32"
+ "\xbe\x04\xe0\x01\xff\x29\xd0\xc7"
+ "\x93\x2f\x9f\x3e\xc2\x9b\xfc\xb6"
+ "\x4d\xd1\x7f\x63\xcb\xd3\xea\x31",
+ .len = 32,
+ }, {
+ .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+ "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+ "\x22\x22\x22\x22\x22\x22\x22\x22"
+ "\x22\x22\x22\x22\x22\x22\x22\x22",
+ .klen = 32,
+ .iv = "\x33\x33\x33\x33\x33\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44"
+ "\x44\x44\x44\x44\x44\x44\x44\x44"
+ "\x44\x44\x44\x44\x44\x44\x44\x44"
+ "\x44\x44\x44\x44\x44\x44\x44\x44",
+ .ctext = "\x7f\x76\x08\x8e\xff\xad\xf7\x0c"
+ "\x02\xea\x9f\x95\xda\x06\x28\xd3"
+ "\x51\xbf\xcb\x9e\xac\x05\x63\xbc"
+ "\xf1\x7b\x71\x0d\xab\x0a\x98\x26",
+ .len = 32,
+ }, {
+ .key = "\x27\x18\x28\x18\x28\x45\x90\x45"
+ "\x23\x53\x60\x28\x74\x71\x35\x26"
+ "\x31\x41\x59\x26\x53\x58\x97\x93"
+ "\x23\x84\x62\x64\x33\x83\x27\x95",
+ .klen = 32,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+ "\x20\x21\x22\x23\x24\x25\x26\x27"
+ "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+ "\x30\x31\x32\x33\x34\x35\x36\x37"
+ "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+ "\x40\x41\x42\x43\x44\x45\x46\x47"
+ "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+ "\x50\x51\x52\x53\x54\x55\x56\x57"
+ "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+ "\x60\x61\x62\x63\x64\x65\x66\x67"
+ "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+ "\x70\x71\x72\x73\x74\x75\x76\x77"
+ "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+ "\x80\x81\x82\x83\x84\x85\x86\x87"
+ "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+ "\x90\x91\x92\x93\x94\x95\x96\x97"
+ "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+ "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+ "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+ "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+ "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+ "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+ "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+ "\x20\x21\x22\x23\x24\x25\x26\x27"
+ "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+ "\x30\x31\x32\x33\x34\x35\x36\x37"
+ "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+ "\x40\x41\x42\x43\x44\x45\x46\x47"
+ "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+ "\x50\x51\x52\x53\x54\x55\x56\x57"
+ "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+ "\x60\x61\x62\x63\x64\x65\x66\x67"
+ "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+ "\x70\x71\x72\x73\x74\x75\x76\x77"
+ "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+ "\x80\x81\x82\x83\x84\x85\x86\x87"
+ "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+ "\x90\x91\x92\x93\x94\x95\x96\x97"
+ "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+ "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+ "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+ "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+ "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+ "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+ .ctext = "\x54\xdd\x65\xb6\x32\x6f\xae\xa8"
+ "\xfa\xd1\xa8\x3c\x63\x61\x4a\xf3"
+ "\x9f\x72\x1d\x8d\xfe\x17\x7a\x30"
+ "\xb6\x6a\xbf\x6a\x44\x99\x80\xe1"
+ "\xcd\xbe\x06\xaf\xb7\x33\x36\xf3"
+ "\x7a\x4d\x39\xde\x96\x4a\x30\xd7"
+ "\xd0\x4a\x37\x99\x16\x9c\x60\x25"
+ "\x8f\x6b\x74\x8a\x61\x86\x1a\xa5"
+ "\xec\x92\xa2\xc1\x5b\x2b\x7c\x61"
+ "\x5a\x42\xab\xa4\x99\xbb\xd6\xb7"
+ "\x1d\xb9\xc7\x89\xb2\x18\x20\x89"
+ "\xa2\x5d\xd3\xdf\x80\x0e\xd1\x86"
+ "\x4d\x19\xf7\xed\x45\xfd\x17\xa9"
+ "\x48\x0b\x0f\xb8\x2d\x9b\x7f\xc3"
+ "\xed\x57\xe9\xa1\x14\x0e\xaa\x77"
+ "\x8d\xd2\xdd\x67\x9e\x3e\xdc\x3d"
+ "\xc4\xd5\x5c\x95\x0e\xbc\x53\x1d"
+ "\x95\x92\xf7\xc4\x63\x82\x56\xd5"
+ "\x65\x18\x29\x2a\x20\xaf\x98\xfd"
+ "\xd3\xa6\x36\x00\x35\x0a\x70\xab"
+ "\x5a\x40\xf4\xc2\x85\x03\x7c\xa0"
+ "\x1f\x25\x1f\x19\xec\xae\x03\x29"
+ "\xff\x77\xad\x88\xcd\x5a\x4c\xde"
+ "\xa2\xae\xab\xc2\x21\x48\xff\xbd"
+ "\x23\x9b\xd1\x05\x15\xbd\xe1\x13"
+ "\x1d\xec\x84\x04\xe4\x43\xdc\x76"
+ "\x31\x40\xd5\xf2\x2b\xf3\x3e\x0c"
+ "\x68\x72\xd6\xb8\x1d\x63\x0f\x6f"
+ "\x00\xcd\xd0\x58\xfe\x80\xf9\xcb"
+ "\xfb\x77\x70\x7f\x93\xce\xe2\xca"
+ "\x92\xb9\x15\xb8\x30\x40\x27\xc1"
+ "\x90\xa8\x4e\x2d\x65\xe0\x18\xcc"
+ "\x6a\x38\x7d\x37\x66\xac\xdb\x28"
+ "\x25\x32\x84\xe8\xdb\x9a\xcf\x8f"
+ "\x52\x28\x0d\xdc\x6d\x00\x33\xd2"
+ "\xcc\xaa\xa4\xf9\xae\xff\x12\x36"
+ "\x69\xbc\x02\x4f\xd6\x76\x8e\xdf"
+ "\x8b\xc1\xf8\xd6\x22\xc1\x9c\x60"
+ "\x9e\xf9\x7f\x60\x91\x90\xcd\x11"
+ "\x02\x41\xe7\xfb\x08\x4e\xd8\x94"
+ "\x2d\xa1\xf9\xb9\xcf\x1b\x51\x4b"
+ "\x61\xa3\x88\xb3\x0e\xa6\x1a\x4a"
+ "\x74\x5b\x38\x1e\xe7\xad\x6c\x4d"
+ "\xb1\x27\x54\x53\xb8\x41\x3f\x98"
+ "\xdf\x6e\x4a\x40\x98\x6e\xe4\xb5"
+ "\x9a\xf5\xdf\xae\xcd\x30\x12\x65"
+ "\x17\x90\x67\xa0\x0d\x7c\xa3\x5a"
+ "\xb9\x5a\xbd\x61\x7a\xde\xa2\x8e"
+ "\xc1\xc2\x6a\x97\xde\x28\xb8\xbf"
+ "\xe3\x01\x20\xd6\xae\xfb\xd2\x58"
+ "\xc5\x9e\x42\xd1\x61\xe8\x06\x5a"
+ "\x78\x10\x6b\xdc\xa5\xcd\x90\xfb"
+ "\x3a\xac\x4e\x93\x86\x6c\x8a\x7f"
+ "\x96\x76\x86\x0a\x79\x14\x5b\xd9"
+ "\x2e\x02\xe8\x19\xa9\x0b\xe0\xb9"
+ "\x7c\xc5\x22\xb3\x21\x06\x85\x6f"
+ "\xdf\x0e\x54\xd8\x8e\x46\x24\x15"
+ "\x5a\x2f\x1c\x14\xea\xea\xa1\x63"
+ "\xf8\x58\xe9\x9a\x80\x6e\x79\x1a"
+ "\xcd\x82\xf1\xb0\xe2\x9f\x00\x28"
+ "\xa4\xc3\x8e\x97\x6f\x57\x1a\x93"
+ "\xf4\xfd\x57\xd7\x87\xc2\x4d\xb0"
+ "\xe0\x1c\xa3\x04\xe5\xa5\xc4\xdd"
+ "\x50\xcf\x8b\xdb\xf4\x91\xe5\x7c",
+ .len = 512,
+ }, {
+ .key = "\x62\x49\x77\x57\x24\x70\x93\x69"
+ "\x99\x59\x57\x49\x66\x96\x76\x27"
+ "\x02\x88\x41\x97\x16\x93\x99\x37"
+ "\x51\x05\x82\x09\x74\x94\x45\x92",
+ .klen = 32,
+ .iv = "\xff\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+ "\x20\x21\x22\x23\x24\x25\x26\x27"
+ "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+ "\x30\x31\x32\x33\x34\x35\x36\x37"
+ "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+ "\x40\x41\x42\x43\x44\x45\x46\x47"
+ "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+ "\x50\x51\x52\x53\x54\x55\x56\x57"
+ "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+ "\x60\x61\x62\x63\x64\x65\x66\x67"
+ "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+ "\x70\x71\x72\x73\x74\x75\x76\x77"
+ "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+ "\x80\x81\x82\x83\x84\x85\x86\x87"
+ "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+ "\x90\x91\x92\x93\x94\x95\x96\x97"
+ "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+ "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+ "\xf8\xf9\xfa\xfb\xfc",
+ .ctext = "\xa2\x9f\x9e\x4e\x71\xdb\x28\x3c"
+ "\x80\x0e\xf6\xb7\x8e\x57\x1c\xba"
+ "\x90\xda\x3b\x6c\x22\x00\x68\x30"
+ "\x1d\x63\x0d\x9e\x6a\xad\x37\x55"
+ "\xbc\x77\x1e\xc9\xad\x83\x30\xd5"
+ "\x27\xb2\x66\x77\x18\x3c\xa6\x39"
+ "\x9c\x0a\xaa\x1f\x02\xe1\xd5\x65"
+ "\x9b\x8d\xc5\x97\x3d\xc5\x04\x53"
+ "\x78\x00\xe3\xb0\x1a\x43\x4e\xb7"
+ "\xc4\x9f\x38\xc5\x7b\xa4\x70\x64"
+ "\x78\xe6\x32\xd9\x65\x44\xc5\x64"
+ "\xb8\x42\x35\x99\xff\x66\x75\xb0"
+ "\x22\xd3\x9b\x6e\x8d\xcf\x6a\x24"
+ "\xfd\x92\xb7\x1b\x04\x28\x2a\x61"
+ "\xdc\x96\x2a\x20\x7a\x2c\xf1\xf9"
+ "\x12\x15\xf0\x4d\xcf\x2b\xde\x33"
+ "\x41\xbc\xe7\x85\x87\x22\xb7\x16"
+ "\x02\x1c\xd8\xa2\x0f\x1f\xa3\xe9"
+ "\xd8\x45\x48\xe7\xbe\x08\x4e\x4e"
+ "\x23\x79\x84\xdb\x40\x76\xf5\x13"
+ "\x78\x92\x4a\x2f\xf9\x1b\xf2\x80"
+ "\x25\x74\x51\x45\x9a\x77\x78\x97"
+ "\xd3\xe0\xc7\xc4\x35\x67\x2a\xe6"
+ "\xb3\x0d\x62\x9f\x8b",
+ .len = 189,
+ },
+};
+
static const struct aead_testvec sm4_gcm_tv_template[] = {
{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */
.key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
@@ -14913,6 +15260,298 @@ static const struct aead_testvec sm4_gcm_tv_template[] = {
"\x83\xDE\x35\x41\xE4\xC2\xB5\x81"
"\x77\xE0\x65\xA9\xBF\x7B\x62\xEC",
.clen = 80,
+ }, { /* Generated from AES-GCM test vectors */
+ .key = zeroed_string,
+ .klen = 16,
+ .ctext = "\x23\x2f\x0c\xfe\x30\x8b\x49\xea"
+ "\x6f\xc8\x82\x29\xb5\xdc\x85\x8d",
+ .clen = 16,
+ }, {
+ .key = zeroed_string,
+ .klen = 16,
+ .ptext = zeroed_string,
+ .plen = 16,
+ .ctext = "\x7d\xe2\xaa\x7f\x11\x10\x18\x82"
+ "\x18\x06\x3b\xe1\xbf\xeb\x6d\x89"
+ "\xb8\x51\xb5\xf3\x94\x93\x75\x2b"
+ "\xe5\x08\xf1\xbb\x44\x82\xc5\x57",
+ .clen = 32,
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+ .klen = 16,
+ .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+ "\xde\xca\xf8\x88",
+ .ptext = "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+ "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+ "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+ "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+ "\xba\x63\x7b\x39\x1a\xaf\xd2\x55",
+ .plen = 64,
+ .ctext = "\xe4\x11\x0f\xf1\xc1\x41\x97\xe6"
+ "\x76\x21\x6a\x33\x83\x10\x41\xeb"
+ "\x09\x58\x00\x11\x7b\xdc\x3f\x75"
+ "\x1a\x49\x6e\xfc\xf2\xbb\xdf\xdb"
+ "\x3a\x2e\x13\xfd\xc5\xc1\x9d\x07"
+ "\x1a\xe5\x48\x3f\xed\xde\x98\x5d"
+ "\x3f\x2d\x5b\x4e\xee\x0b\xb6\xdf"
+ "\xe3\x63\x36\x83\x23\xf7\x5b\x80"
+ "\x7d\xfe\x77\xef\x71\xb1\x5e\xc9"
+ "\x52\x6b\x09\xab\x84\x28\x4b\x8a",
+ .clen = 80,
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+ .klen = 16,
+ .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+ "\xde\xca\xf8\x88",
+ .ptext = "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+ "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+ "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+ "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+ "\xba\x63\x7b\x39",
+ .plen = 60,
+ .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xab\xad\xda\xd2",
+ .alen = 20,
+ .ctext = "\xe4\x11\x0f\xf1\xc1\x41\x97\xe6"
+ "\x76\x21\x6a\x33\x83\x10\x41\xeb"
+ "\x09\x58\x00\x11\x7b\xdc\x3f\x75"
+ "\x1a\x49\x6e\xfc\xf2\xbb\xdf\xdb"
+ "\x3a\x2e\x13\xfd\xc5\xc1\x9d\x07"
+ "\x1a\xe5\x48\x3f\xed\xde\x98\x5d"
+ "\x3f\x2d\x5b\x4e\xee\x0b\xb6\xdf"
+ "\xe3\x63\x36\x83"
+ "\x89\xf6\xba\x35\xb8\x18\xd3\xcc"
+ "\x38\x6c\x05\xb3\x8a\xcb\xc9\xde",
+ .clen = 76,
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\xfe\xff\xe9\x92\x86\x65\x73\x1c",
+ .klen = 16,
+ .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+ "\xde\xca\xf8\x88",
+ .ptext = "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+ "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+ "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+ "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+ "\xba\x63\x7b\x39",
+ .plen = 60,
+ .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xab\xad\xda\xd2",
+ .alen = 20,
+ .ctext = "\xc1\x11\x44\x51\xd9\x25\x87\x5b"
+ "\x0f\xd9\x06\xf3\x33\x44\xbb\x87"
+ "\x8b\xa3\x77\xd2\x0c\x60\xfa\xcc"
+ "\x85\x50\x6f\x96\x0c\x54\x54\xc1"
+ "\x58\x04\x88\x6e\xf4\x26\x35\x7e"
+ "\x94\x80\x48\x6c\xf2\xf4\x88\x1f"
+ "\x19\x63\xea\xae\xba\x81\x1a\x5d"
+ "\x0e\x6f\x59\x08"
+ "\x33\xac\x5b\xa8\x19\x60\xdb\x1d"
+ "\xdd\x2e\x22\x2e\xe0\x87\x51\x5d",
+ .clen = 76,
+ }, {
+ .key = "\x8b\x32\xcf\xe7\x44\xed\x13\x59"
+ "\x04\x38\x77\xb0\xb9\xad\xb4\x38",
+ .klen = 16,
+ .iv = "\x00\xff\xff\xff\xff\x00\x00\xff"
+ "\xff\xff\x00\xff",
+ .ptext = "\x42\xc1\xcc\x08\x48\x6f\x41\x3f"
+ "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0"
+ "\x58\x83\xf0\xc3\x70\x14\xc0\x5b"
+ "\x3f\xec\x1d\x25\x3c\x51\xd2\x03"
+ "\xcf\x59\x74\x1f\xb2\x85\xb4\x07"
+ "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb"
+ "\xaf\x08\x44\xbd\x6f\x91\x15\xe1"
+ "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50"
+ "\x59\xa9\x97\xab\xbb\x0e\x74\x5c"
+ "\x00\xa4\x43\x54\x04\x54\x9b\x3b"
+ "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08"
+ "\xae\xe6\x10\x3f\x32\x65\xd1\xfc"
+ "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3"
+ "\x35\x23\xf4\x20\x41\xd4\xad\x82"
+ "\x8b\xa4\xad\x96\x1c\x20\x53\xbe"
+ "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72"
+ "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7"
+ "\xad\x49\x3a\xae\x98\xce\xa6\x66"
+ "\x10\x30\x90\x8c\x55\x83\xd7\x7c"
+ "\x8b\xe6\x53\xde\xd2\x6e\x18\x21"
+ "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73"
+ "\x57\xcc\x89\x09\x75\x9b\x78\x70"
+ "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5"
+ "\xfa\x70\x04\x70\xc6\x96\x1c\x7d"
+ "\x54\x41\x77\xa8\xe3\xb0\x7e\x96"
+ "\x82\xd9\xec\xa2\x87\x68\x55\xf9"
+ "\x8f\x9e\x73\x43\x47\x6a\x08\x36"
+ "\x93\x67\xa8\x2d\xde\xac\x41\xa9"
+ "\x5c\x4d\x73\x97\x0f\x70\x68\xfa"
+ "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9"
+ "\x78\x1f\x51\x07\xe3\x9a\x13\x4e"
+ "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7"
+ "\xab\x19\x37\xd9\xba\x76\x5e\xd2"
+ "\xf2\x53\x15\x17\x4c\x6b\x16\x9f"
+ "\x02\x66\x49\xca\x7c\x91\x05\xf2"
+ "\x45\x36\x1e\xf5\x77\xad\x1f\x46"
+ "\xa8\x13\xfb\x63\xb6\x08\x99\x63"
+ "\x82\xa2\xed\xb3\xac\xdf\x43\x19"
+ "\x45\xea\x78\x73\xd9\xb7\x39\x11"
+ "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81"
+ "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79"
+ "\xa4\x47\x7d\x80\x20\x26\xfd\x63"
+ "\x0a\xc7\x7e\x6d\x75\x47\xff\x76"
+ "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b"
+ "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1"
+ "\x54\x03\xa4\x09\x0c\x37\x7a\x15"
+ "\x23\x27\x5b\x8b\x4b\xa5\x64\x97"
+ "\xae\x4a\x50\x73\x1f\x66\x1c\x5c"
+ "\x03\x25\x3c\x8d\x48\x58\x71\x34"
+ "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5"
+ "\xb6\x19\x2b\x84\x2a\x20\xd1\xea"
+ "\x80\x6f\x96\x0e\x05\x62\xc7\x78"
+ "\x87\x79\x60\x38\x46\xb4\x25\x57"
+ "\x6e\x16\x63\xf8\xad\x6e\xd7\x42"
+ "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a"
+ "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22"
+ "\x86\x5c\x74\x3a\xeb\x24\x26\xc7"
+ "\x09\xfc\x91\x96\x47\x87\x4f\x1a"
+ "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24"
+ "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a"
+ "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5"
+ "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb"
+ "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe"
+ "\x0b\x63\xde\x87\x42\x79\x8a\x68"
+ "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f"
+ "\x9d\xd1\xc7\x45\x90\x08\xc9\x83"
+ "\xe9\x83\x84\xcb\x28\x69\x09\x69"
+ "\xce\x99\x46\x00\x54\xcb\xd8\x38"
+ "\xf9\x53\x4a\xbf\x31\xce\x57\x15"
+ "\x33\xfa\x96\x04\x33\x42\xe3\xc0"
+ "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6"
+ "\x19\x95\xd0\x0e\x82\x07\x63\xf9"
+ "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9"
+ "\xb5\x9f\x23\x28\x60\xe7\x20\x51"
+ "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2"
+ "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb"
+ "\x78\xc6\x91\x22\x40\x91\x80\xbe"
+ "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9"
+ "\x67\x10\xa4\x83\x98\x79\x23\xe7"
+ "\x92\xda\xa9\x22\x16\xb1\xe7\x78"
+ "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37"
+ "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9"
+ "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d"
+ "\x48\x11\x06\xbb\x2d\xf2\x63\x88"
+ "\x3f\x73\x09\xe2\x45\x56\x31\x51"
+ "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9"
+ "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66"
+ "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23"
+ "\x59\xfa\xfa\xaa\x44\x04\x01\xa7"
+ "\xa4\x78\xdb\x74\x3d\x8b\xb5",
+ .plen = 719,
+ .ctext = "\xdc\xb1\x0f\x2a\xe8\x2d\x1c\x57"
+ "\xc4\x82\xfa\xd6\x87\xe6\x2f\x50"
+ "\xbd\x9e\x0a\x42\x31\xf2\xc7\xbb"
+ "\x21\x63\xa7\x05\x43\x33\xef\x33"
+ "\x5c\xd3\x47\x55\xce\x5c\xe4\xd4"
+ "\xe5\x07\x62\x22\xac\x01\xa8\x35"
+ "\x9c\x59\x34\x30\x8e\xff\x9f\xb4"
+ "\xd2\x4e\x74\x90\x64\xf2\x78\x5e"
+ "\x63\xb7\xc5\x08\x1b\x37\xa5\x9e"
+ "\xc0\xde\xff\xa9\x7f\x0b\xd3\x02"
+ "\x83\x6e\x33\xfa\x43\x11\xd3\xda"
+ "\x02\xcf\xcd\x4a\xc0\x78\x1f\x39"
+ "\x62\xcb\xa3\x95\x7e\x13\x92\x28"
+ "\xb2\xc4\x7a\xba\xd1\xc6\xf6\x1f"
+ "\xda\x0b\xf1\xd1\x99\x54\xd8\x3b"
+ "\x16\xf8\xe6\x97\x1e\xa7\xcf\x49"
+ "\x69\x84\x01\x4c\xdc\x7a\x34\xff"
+ "\x01\x08\xa3\x0b\x39\xac\x21\x37"
+ "\xd8\xb4\x04\x19\x8b\x7a\x7d\x17"
+ "\x44\xd1\x18\xaf\x1f\xa9\x29\xfe"
+ "\xfa\x77\xe0\x40\x42\x0c\x79\xb7"
+ "\xc3\x15\x1b\xd9\x0c\x82\xfc\x16"
+ "\x70\xd6\x2a\xe9\x94\x72\xc5\xa5"
+ "\x8a\x58\xbc\xfa\xe0\x88\x39\x4a"
+ "\x80\xe8\xec\xaf\x60\xac\xe7\xf8"
+ "\x9c\xf0\xfc\x61\x39\x07\x98\x6b"
+ "\x88\xe3\x98\x22\x28\x18\x4a\x2d"
+ "\x25\xef\x10\xe3\x83\x66\x3f\xfd"
+ "\xc7\x0b\xa3\xfd\x97\xa9\xf4\xbd"
+ "\xd8\x2a\xee\x4a\x50\xad\xcc\xb5"
+ "\xc7\xab\xb8\x79\x9c\xd1\xf1\x27"
+ "\x08\xf5\xf5\xe8\x1b\x66\xce\x41"
+ "\x56\x60\x94\x86\xf0\x78\xc2\xfa"
+ "\x5b\x63\x40\xb1\xd1\x1a\x38\x69"
+ "\x0b\x8c\xb2\xf5\xa2\xbe\x90\x9d"
+ "\x46\x23\x79\x8b\x3b\x4a\xf4\xbb"
+ "\x55\xf7\x58\x9d\xaf\x59\xff\x74"
+ "\xf3\xb9\xc4\x26\xb1\xf8\xe1\x28"
+ "\x8b\x5e\x8f\x6d\x64\xe7\xe8\x63"
+ "\xd2\x9e\xcb\xee\xae\x19\x04\x1d"
+ "\x05\xf0\x9d\x99\x7b\x33\x33\xae"
+ "\x6e\xe5\x09\xdd\x67\x51\xc4\xc8"
+ "\x6a\xc7\x36\x35\xc9\x93\x76\xa1"
+ "\xa8\x1c\xfa\x75\x92\x34\x0e\x7d"
+ "\x3d\x1d\xef\x00\xfd\xa5\x25\x12"
+ "\x7c\x91\x21\x41\xcc\x50\x47\xa9"
+ "\x22\x50\x24\x96\x34\x79\x3d\xe8"
+ "\x3f\xa0\x56\xaf\x98\x53\x55\xc3"
+ "\x46\x1b\x17\x54\xb8\xb0\xb7\xe0"
+ "\xe0\xab\x47\x6f\x06\xda\xcc\x75"
+ "\xa7\x96\xb7\x92\xf3\xa0\x5f\xe6"
+ "\xba\x97\xe3\x2f\x97\x05\xb2\x99"
+ "\xa0\x09\x10\x98\x9c\xd3\x2e\xd1"
+ "\x7e\x2a\x30\x54\x3c\xb9\x33\xe3"
+ "\xf2\xaf\xd3\xa5\xee\xd0\x0b\x8a"
+ "\x19\x54\x0f\x02\x51\x1f\x91\xdf"
+ "\x71\x9c\xad\x77\x35\x28\x55\x6d"
+ "\xcd\x7a\xd9\xa3\x41\x98\x6b\x37"
+ "\x19\x0f\xbe\xae\x69\xb2\x25\x01"
+ "\xee\x0e\x51\x4b\x53\xea\x0f\x5f"
+ "\x85\x74\x79\x36\x32\x0a\x2a\x40"
+ "\xad\x6b\x78\x41\x54\x99\xe9\xc1"
+ "\x2b\x6c\x9b\x42\x21\xef\xe2\x50"
+ "\x56\x8d\x78\xdf\x58\xbe\x0a\x0f"
+ "\xfc\xfc\x0d\x2e\xd0\xcb\xa6\x0a"
+ "\xa8\xd9\x1e\xa9\xd4\x7c\x99\x88"
+ "\xcf\x11\xad\x1c\xd3\x04\x63\x55"
+ "\xef\x85\x0b\x69\xa1\x40\xf1\x75"
+ "\x24\xf4\xe5\x2c\xd4\x7a\x24\x50"
+ "\x8f\xa2\x71\xc9\x92\x20\xcd\xcf"
+ "\xda\x40\xbe\xf6\xfe\x1a\xca\xc7"
+ "\x4a\x80\x45\x55\xcb\xdd\xb7\x01"
+ "\xb0\x8d\xcb\xd2\xae\xbd\xa4\xd0"
+ "\x5c\x10\x05\x66\x7b\xd4\xff\xd9"
+ "\xc4\x23\x9d\x8d\x6b\x24\xf8\x3f"
+ "\x73\x4d\x5c\x2b\x33\x4c\x5e\x63"
+ "\x74\x6d\x03\xa1\x7a\x35\x65\x17"
+ "\x38\x7f\x3b\xc1\x69\xcf\x61\x34"
+ "\x30\x21\xaf\x97\x47\x12\x3f\xa1"
+ "\xa7\x50\xc5\x87\xfb\x3f\x70\x32"
+ "\x86\x17\x5f\x25\xe4\x74\xc6\xd0"
+ "\x9b\x39\xe6\xe1\x5a\xec\x8f\x40"
+ "\xce\xcc\x37\x3b\xd8\x72\x1c\x31"
+ "\x75\xa4\xa6\x89\x8c\xdd\xd6\xd2"
+ "\x32\x3d\xe8\xc3\x54\xab\x1f\x35"
+ "\x52\xb4\x94\x81\xb0\x37\x3a\x03"
+ "\xbb\xb1\x99\x30\xa5\xf8\x21\xcd"
+ "\x93\x5d\xa7\x13\xed\xc7\x49\x09"
+ "\x70\xda\x08\x39\xaa\x15\x9e\x45"
+ "\x35\x2b\x0f\x5c\x8c\x8b\xc9"
+ "\xa8\xb8\x9f\xfd\x37\x36\x31\x7e"
+ "\x34\x4f\xc1\xc0\xca\x8a\x22\xfd",
+ .clen = 735,
}
};
@@ -14947,6 +15586,282 @@ static const struct aead_testvec sm4_ccm_tv_template[] = {
"\x16\x84\x2D\x4F\xA1\x86\xF5\x6A"
"\xB3\x32\x56\x97\x1F\xA1\x10\xF4",
.clen = 80,
+ }, { /* Generated from AES-CCM test vectors */
+ .key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+ "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf",
+ .klen = 16,
+ .iv = "\x01\x00\x00\x00\x03\x02\x01\x00"
+ "\xa0\xa1\xa2\xa3\xa4\xa5\x00\x00",
+ .assoc = "\x00\x01\x02\x03\x04\x05\x06\x07",
+ .alen = 8,
+ .ptext = "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e",
+ .plen = 23,
+ .ctext = "\x7b\xff\x4a\x15\xf5\x73\xce\x82"
+ "\x6e\xc2\x31\x1d\xe2\x53\x02\xac"
+ "\xa4\x48\xf9\xe4\xf5\x1f\x81\x70"
+ "\x18\xbc\xb6\x84\x01\xb8\xae",
+ .clen = 31,
+ }, {
+ .key = "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1"
+ "\x53\x14\x73\x66\x8d\x88\xf6\x80",
+ .klen = 16,
+ .iv = "\x03\xa0\x20\x35\x26\xf2\x21\x8d"
+ "\x50\x20\xda\xe2\x00\x00\x00\x00",
+ .assoc = "\x5b\x9e\x13\x67\x02\x5e\xef\xc1"
+ "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47"
+ "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d"
+ "\xd8\x9e\x2b\x56\x10\x23\x56\xe7",
+ .alen = 32,
+ .ctext = "\x23\x58\xce\xdc\x40\xb1\xcd\x92"
+ "\x47\x96\x59\xfc\x8a\x26\x4f\xcf",
+ .clen = 16,
+ }, {
+ .key = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
+ "\xff\x80\x2e\x48\x7d\x82\xf8\xb9",
+ .klen = 16,
+ .iv = "\x03\xaf\x94\x87\x78\x35\x82\x81"
+ "\x7f\x88\x94\x68\x00\x00\x00\x00",
+ .alen = 0,
+ .ptext = "\x00",
+ .plen = 0,
+ .ctext = "\x72\x7e\xf5\xd6\x39\x7a\x2b\x43",
+ .clen = 8,
+ }, {
+ .key = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
+ "\xa4\x48\x93\x39\x26\x71\x4a\xc6",
+ .klen = 16,
+ .iv = "\x03\xee\x49\x83\xe9\xa9\xff\xe9"
+ "\x57\xba\xfd\x9e\x00\x00\x00\x00",
+ .assoc = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
+ "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
+ "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
+ "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
+ .alen = 32,
+ .ptext = "\x00",
+ .plen = 0,
+ .ctext = "\xb0\x9d\xc6\xfb\x7d\xb5\xa1\x0e",
+ .clen = 8,
+ }, {
+ .key = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+ "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b",
+ .klen = 16,
+ .iv = "\x03\xcf\x76\x3f\xd9\x95\x75\x8f"
+ "\x44\x89\x40\x7b\x00\x00\x00\x00",
+ .assoc = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88"
+ "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b"
+ "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b"
+ "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe",
+ .alen = 32,
+ .ptext = "\xc2\x54\xc8\xde\x78\x87\x77\x40"
+ "\x49\x71\xe4\xb7\xe7\xcb\x76\x61"
+ "\x0a\x41\xb9\xe9\xc0\x76\x54\xab"
+ "\x04\x49\x3b\x19\x93\x57\x25\x5d",
+ .plen = 32,
+ .ctext = "\xc9\xae\xef\x1d\xf3\x2c\xd3\x38"
+ "\xc9\x7f\x7e\x28\xe8\xaa\xb3\x60"
+ "\x49\xdc\x66\xca\x7b\x3d\xe0\x3c"
+ "\xcb\x45\x9c\x1b\xb2\xbe\x07\x90"
+ "\x87\xa6\x6b\x89\x0d\x0f\x90\xaa"
+ "\x7d\xf6\x5a\x9a\x68\x2b\x81\x92",
+ .clen = 48,
+ }, {
+ .key = "\x8b\x32\xcf\xe7\x44\xed\x13\x59"
+ "\x04\x38\x77\xb0\xb9\xad\xb4\x38",
+ .klen = 16,
+ .iv = "\x02\xff\xff\xff\xff\x00\x00\xff"
+ "\xff\xff\x00\xff\xff\x00\x00\x00",
+ .assoc = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88"
+ "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b"
+ "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b"
+ "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe"
+ "\xc8\xf3\x5c\x52\x10\x63",
+ .alen = 38,
+ .ptext = "\x42\xc1\xcc\x08\x48\x6f\x41\x3f"
+ "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0"
+ "\x58\x83\xf0\xc3\x70\x14\xc0\x5b"
+ "\x3f\xec\x1d\x25\x3c\x51\xd2\x03"
+ "\xcf\x59\x74\x1f\xb2\x85\xb4\x07"
+ "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb"
+ "\xaf\x08\x44\xbd\x6f\x91\x15\xe1"
+ "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50"
+ "\x59\xa9\x97\xab\xbb\x0e\x74\x5c"
+ "\x00\xa4\x43\x54\x04\x54\x9b\x3b"
+ "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08"
+ "\xae\xe6\x10\x3f\x32\x65\xd1\xfc"
+ "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3"
+ "\x35\x23\xf4\x20\x41\xd4\xad\x82"
+ "\x8b\xa4\xad\x96\x1c\x20\x53\xbe"
+ "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72"
+ "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7"
+ "\xad\x49\x3a\xae\x98\xce\xa6\x66"
+ "\x10\x30\x90\x8c\x55\x83\xd7\x7c"
+ "\x8b\xe6\x53\xde\xd2\x6e\x18\x21"
+ "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73"
+ "\x57\xcc\x89\x09\x75\x9b\x78\x70"
+ "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5"
+ "\xfa\x70\x04\x70\xc6\x96\x1c\x7d"
+ "\x54\x41\x77\xa8\xe3\xb0\x7e\x96"
+ "\x82\xd9\xec\xa2\x87\x68\x55\xf9"
+ "\x8f\x9e\x73\x43\x47\x6a\x08\x36"
+ "\x93\x67\xa8\x2d\xde\xac\x41\xa9"
+ "\x5c\x4d\x73\x97\x0f\x70\x68\xfa"
+ "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9"
+ "\x78\x1f\x51\x07\xe3\x9a\x13\x4e"
+ "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7"
+ "\xab\x19\x37\xd9\xba\x76\x5e\xd2"
+ "\xf2\x53\x15\x17\x4c\x6b\x16\x9f"
+ "\x02\x66\x49\xca\x7c\x91\x05\xf2"
+ "\x45\x36\x1e\xf5\x77\xad\x1f\x46"
+ "\xa8\x13\xfb\x63\xb6\x08\x99\x63"
+ "\x82\xa2\xed\xb3\xac\xdf\x43\x19"
+ "\x45\xea\x78\x73\xd9\xb7\x39\x11"
+ "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81"
+ "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79"
+ "\xa4\x47\x7d\x80\x20\x26\xfd\x63"
+ "\x0a\xc7\x7e\x6d\x75\x47\xff\x76"
+ "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b"
+ "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1"
+ "\x54\x03\xa4\x09\x0c\x37\x7a\x15"
+ "\x23\x27\x5b\x8b\x4b\xa5\x64\x97"
+ "\xae\x4a\x50\x73\x1f\x66\x1c\x5c"
+ "\x03\x25\x3c\x8d\x48\x58\x71\x34"
+ "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5"
+ "\xb6\x19\x2b\x84\x2a\x20\xd1\xea"
+ "\x80\x6f\x96\x0e\x05\x62\xc7\x78"
+ "\x87\x79\x60\x38\x46\xb4\x25\x57"
+ "\x6e\x16\x63\xf8\xad\x6e\xd7\x42"
+ "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a"
+ "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22"
+ "\x86\x5c\x74\x3a\xeb\x24\x26\xc7"
+ "\x09\xfc\x91\x96\x47\x87\x4f\x1a"
+ "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24"
+ "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a"
+ "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5"
+ "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb"
+ "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe"
+ "\x0b\x63\xde\x87\x42\x79\x8a\x68"
+ "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f"
+ "\x9d\xd1\xc7\x45\x90\x08\xc9\x83"
+ "\xe9\x83\x84\xcb\x28\x69\x09\x69"
+ "\xce\x99\x46\x00\x54\xcb\xd8\x38"
+ "\xf9\x53\x4a\xbf\x31\xce\x57\x15"
+ "\x33\xfa\x96\x04\x33\x42\xe3\xc0"
+ "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6"
+ "\x19\x95\xd0\x0e\x82\x07\x63\xf9"
+ "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9"
+ "\xb5\x9f\x23\x28\x60\xe7\x20\x51"
+ "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2"
+ "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb"
+ "\x78\xc6\x91\x22\x40\x91\x80\xbe"
+ "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9"
+ "\x67\x10\xa4\x83\x98\x79\x23\xe7"
+ "\x92\xda\xa9\x22\x16\xb1\xe7\x78"
+ "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37"
+ "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9"
+ "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d"
+ "\x48\x11\x06\xbb\x2d\xf2\x63\x88"
+ "\x3f\x73\x09\xe2\x45\x56\x31\x51"
+ "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9"
+ "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66"
+ "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23"
+ "\x59\xfa\xfa\xaa\x44\x04\x01\xa7"
+ "\xa4\x78\xdb\x74\x3d\x8b\xb5",
+ .plen = 719,
+ .ctext = "\xc5\x50\x85\x02\x72\xa8\xb3\x62"
+ "\xf9\xcd\x77\x7b\x43\xa5\x04\x70"
+ "\x68\x40\x57\x21\x1c\xfe\xef\x05"
+ "\x4d\xb8\x44\xba\x59\xea\x62\x32"
+ "\xcb\x6b\x6a\x39\x9b\xf3\xe5\xa4"
+ "\x36\x38\xde\x7d\xcf\xb6\xcd\xe3"
+ "\x89\xbf\x37\xc9\x96\x3c\x70\x10"
+ "\x92\x47\xcc\xac\x6f\xf8\x55\x9a"
+ "\x26\x43\x34\xb4\x92\x7d\x68\xfc"
+ "\x60\x37\x74\x2a\x55\xba\xc7\xd7"
+ "\x98\x69\xb7\xcf\x42\xfd\xb2\x10"
+ "\xa0\x59\xe1\x2c\x73\x66\x12\x97"
+ "\x85\x8b\x28\xcc\x29\x02\x15\x89"
+ "\x23\xd3\x32\x92\x87\x57\x09\x13"
+ "\x04\x7e\x8b\x6c\x3a\xc1\x4e\x6c"
+ "\xe1\x9f\xc8\xcc\x47\x9c\xd8\x10"
+ "\xf4\xb7\x5c\x30\x7a\x8b\x0f\x01"
+ "\x52\x38\x02\x92\x99\xac\x03\x90"
+ "\x18\x32\x2d\x21\x6a\x0a\x2a\xe7"
+ "\xc2\xcc\x15\x84\x4e\x2b\x0b\x3a"
+ "\x4c\xdc\xb0\x6b\x10\xd1\x27\x10"
+ "\xf0\x4a\x5c\x43\xa0\x34\x34\x59"
+ "\x47\x43\x48\xcb\x69\xa7\xff\x52"
+ "\xb8\xca\x23\x09\x07\xd7\xc5\xe4"
+ "\x2a\x4f\x99\xd5\x83\x36\x2a\x2d"
+ "\x59\xd0\xca\xb0\xfa\x40\x8c\xab"
+ "\xdf\x69\x08\xd9\x79\x1d\xde\xa8"
+ "\x0b\x34\x74\x4d\xf5\xa0\x4c\x81"
+ "\x7f\x93\x06\x40\x24\xfe\x7d\xcd"
+ "\xe4\xfe\xf8\xf8\x30\xce\xd0\x5d"
+ "\x70\xfd\x0d\x5a\x78\x85\x74\x2d"
+ "\xe4\xb5\x40\x18\x99\x11\xe4\x6a"
+ "\xdf\xfa\x4f\x25\x2c\xde\x15\xb7"
+ "\x12\xd8\xc6\x90\x0d\x0f\xc9\xfb"
+ "\x21\xf1\xed\xfe\x98\xe1\x03\xe2"
+ "\x5c\xef\xb6\xc7\x87\x77\x0e\xcd"
+ "\xff\x78\x94\xc9\xbe\xd3\x47\xf7"
+ "\x8d\x37\x48\x01\x42\xe2\x17\x96"
+ "\xfc\xc0\xcb\x7b\x7b\x57\xaf\x3b"
+ "\xc9\xd0\x94\xce\x5e\x1b\xa9\x47"
+ "\x02\x4d\x74\xcc\x45\x1d\xd3\x2d"
+ "\x5f\x4f\x7f\xf2\x4b\xf9\x59\xee"
+ "\x9e\x9e\xb9\x95\x29\x19\xd1\x5f"
+ "\x72\xab\x8d\xf1\x28\xd1\x1c\xae"
+ "\xc2\xba\xf7\x22\x84\x2c\x83\x51"
+ "\x03\xad\xa3\xef\x81\xa7\xdc\xf1"
+ "\x44\x51\x50\x96\x70\xd1\xe5\x47"
+ "\x57\xf9\x30\x90\xe4\xbf\xfc\x75"
+ "\x14\xaa\x4d\xb7\xb1\xe7\x79\x33"
+ "\x43\xc2\x5c\xc1\xbc\x09\x92\x0f"
+ "\xa7\xaf\x68\x51\x51\xec\x0b\xc3"
+ "\x3d\x2b\x94\x30\x45\x29\x1b\x9e"
+ "\x70\x56\xf8\xd6\x67\x2d\x39\x3b"
+ "\x3c\xd2\xd0\xd3\xdc\x7d\x84\xe9"
+ "\x06\x31\x98\xa6\x5c\xbf\x10\x58"
+ "\xce\xbb\xa7\xe1\x65\x7e\x51\x87"
+ "\x70\x46\xb4\x7f\xf9\xec\x92\x1c"
+ "\x9b\x24\x49\xc1\x04\xbe\x1c\x5f"
+ "\xcc\xb3\x33\x8c\xad\xe7\xdc\x32"
+ "\x54\xa2\x0d\x83\x0f\x3c\x12\x5d"
+ "\x71\xe3\x9c\xae\x71\xa3\x2a\x10"
+ "\xc5\x91\xb4\x73\x96\x60\xdb\x5d"
+ "\x1f\xd5\x9a\xd2\x69\xc3\xd7\x4b"
+ "\xa2\x66\x81\x96\x4a\xaa\x02\xd6"
+ "\xd5\x44\x9b\x42\x3a\x15\x5f\xe7"
+ "\x4d\x7c\xf6\x71\x4a\xea\xe8\x43"
+ "\xd7\x68\xe4\xbc\x05\x87\x49\x05"
+ "\x3b\x47\xb2\x6d\x5f\xd1\x11\xa6"
+ "\x58\xd4\xa2\x45\xec\xb5\x54\x55"
+ "\xd3\xd6\xd2\x6a\x8b\x21\x9e\x2c"
+ "\xf1\x27\x4b\x5b\xe3\xff\xe0\xfd"
+ "\x4b\xf1\xe7\xe2\x84\xf2\x17\x37"
+ "\x11\x68\xc4\x92\x4b\x6b\xef\x8e"
+ "\x75\xf5\xc2\x7d\x5c\xe9\x7c\xfc"
+ "\x2b\x00\x33\x0e\x7d\x69\xd8\xd4"
+ "\x9b\xa8\x38\x54\x7e\x6d\x23\x51"
+ "\x2c\xd6\xc4\x58\x23\x1c\x22\x2a"
+ "\x59\xc5\x9b\xec\x9d\xbf\x03\x0f"
+ "\xb3\xdd\xba\x02\x22\xa0\x34\x37"
+ "\x19\x56\xc2\x5b\x32\x1d\x1e\x66"
+ "\x68\xf4\x47\x05\x04\x18\xa7\x28"
+ "\x80\xf2\xc7\x99\xed\x1e\x72\x48"
+ "\x8f\x97\x5d\xb3\x74\x42\xfd\x0c"
+ "\x0f\x5f\x29\x0c\xf1\x35\x22\x90"
+ "\xd6\x7c\xb8\xa3\x2a\x89\x38\x71"
+ "\xe9\x7a\x55\x3c\x3b\xf2\x6e\x1a"
+ "\x22\x8f\x07\x81\xc1\xe1\xf1\x76"
+ "\x2a\x75\xab\x86\xc4\xcc\x52\x59"
+ "\x83\x19\x5e\xb3\x53\xe2\x81\xdf"
+ "\xe6\x15\xb3\xba\x0c\x0e\xba"
+ "\xa9\x2c\xed\x51\xd5\x06\xc8\xc6"
+ "\x4b\x9f\x5d\x1b\x61\x31\xad\xf4",
+ .clen = 735,
}
};
@@ -15030,6 +15945,68 @@ static const struct hash_testvec sm4_cmac128_tv_template[] = {
}
};
+static const struct hash_testvec sm4_xcbc128_tv_template[] = {
+ { /* Generated from AES-XCBC128 test vectors */
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .plaintext = zeroed_string,
+ .digest = "\xa9\x9a\x5c\x44\xe2\x34\xee\x2c"
+ "\x9b\xe4\x9d\xca\x64\xb0\xa5\xc4",
+ .psize = 0,
+ .ksize = 16,
+ }, {
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .plaintext = "\x00\x01\x02",
+ .digest = "\x17\x27\x62\xf3\x8b\x88\x1d\xc0"
+ "\x97\x35\x9c\x3e\x9f\x27\xb7\x83",
+ .psize = 3,
+ .ksize = 16,
+ } , {
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .digest = "\xda\x45\xd1\xac\xec\x4d\xab\x46"
+ "\xdd\x59\xe0\x44\xff\x59\xd5\xfc",
+ .psize = 16,
+ .ksize = 16,
+ }, {
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13",
+ .digest = "\xbe\x24\x5d\x81\x8c\x8a\x10\xa4"
+ "\x8e\xc2\x16\xfa\xa4\x83\xc9\x2a",
+ .psize = 20,
+ .ksize = 16,
+ }, {
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+ .digest = "\x91\x82\x31\x56\xd5\x77\xa4\xc5"
+ "\x88\x2d\xce\x3a\x87\x5e\xbd\xba",
+ .psize = 32,
+ .ksize = 16,
+ }, {
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+ "\x20\x21",
+ .digest = "\x2a\xae\xa5\x24\x0c\x12\x9f\x5f"
+ "\x55\xfb\xae\x35\x13\x0d\x22\x2d",
+ .psize = 34,
+ .ksize = 16,
+ }
+};
+
/* Cast6 test vectors from RFC 2612 */
static const struct cipher_testvec cast6_tv_template[] = {
{
diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
index 7c55f4cf4a8b..c99c54cd99c6 100644
--- a/drivers/char/hw_random/cavium-rng-vf.c
+++ b/drivers/char/hw_random/cavium-rng-vf.c
@@ -225,7 +225,6 @@ static int cavium_rng_probe_vf(struct pci_dev *pdev,
return -ENOMEM;
rng->ops.read = cavium_rng_read;
- rng->ops.quality = 1000;
pci_set_drvdata(pdev, rng);
diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c
index a01e9307737c..c1193f85982c 100644
--- a/drivers/char/hw_random/cn10k-rng.c
+++ b/drivers/char/hw_random/cn10k-rng.c
@@ -145,7 +145,6 @@ static int cn10k_rng_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENOMEM;
rng->ops.read = cn10k_rng_read;
- rng->ops.quality = 1000;
rng->ops.priv = (unsigned long)rng;
reset_rng_health_state(rng);
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index cc002b0c2f0c..afde685f5e0a 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -41,14 +41,14 @@ static DEFINE_MUTEX(reading_mutex);
static int data_avail;
static u8 *rng_buffer, *rng_fillbuf;
static unsigned short current_quality;
-static unsigned short default_quality; /* = 0; default to "off" */
+static unsigned short default_quality = 1024; /* default to maximum */
module_param(current_quality, ushort, 0644);
MODULE_PARM_DESC(current_quality,
"current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead");
module_param(default_quality, ushort, 0644);
MODULE_PARM_DESC(default_quality,
- "default entropy content of hwrng per 1024 bits of input");
+ "default maximum entropy content of hwrng per 1024 bits of input");
static void drop_current_rng(void);
static int hwrng_init(struct hwrng *rng);
@@ -170,10 +170,7 @@ static int hwrng_init(struct hwrng *rng)
reinit_completion(&rng->cleanup_done);
skip_init:
- if (!rng->quality)
- rng->quality = default_quality;
- if (rng->quality > 1024)
- rng->quality = 1024;
+ rng->quality = min_t(u16, min_t(u16, default_quality, 1024), rng->quality ?: 1024);
current_quality = rng->quality; /* obsolete */
return 0;
diff --git a/drivers/char/hw_random/mpfs-rng.c b/drivers/char/hw_random/mpfs-rng.c
index 5813da617a48..c6972734ae62 100644
--- a/drivers/char/hw_random/mpfs-rng.c
+++ b/drivers/char/hw_random/mpfs-rng.c
@@ -78,7 +78,6 @@ static int mpfs_rng_probe(struct platform_device *pdev)
rng_priv->rng.read = mpfs_rng_read;
rng_priv->rng.name = pdev->name;
- rng_priv->rng.quality = 1024;
platform_set_drvdata(pdev, rng_priv);
diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c
index 6c00ea008555..aa993753ab12 100644
--- a/drivers/char/hw_random/mtk-rng.c
+++ b/drivers/char/hw_random/mtk-rng.c
@@ -22,7 +22,7 @@
#define RNG_AUTOSUSPEND_TIMEOUT 100
#define USEC_POLL 2
-#define TIMEOUT_POLL 20
+#define TIMEOUT_POLL 60
#define RNG_CTRL 0x00
#define RNG_EN BIT(0)
@@ -77,7 +77,7 @@ static bool mtk_rng_wait_ready(struct hwrng *rng, bool wait)
readl_poll_timeout_atomic(priv->base + RNG_CTRL, ready,
ready & RNG_READY, USEC_POLL,
TIMEOUT_POLL);
- return !!ready;
+ return !!(ready & RNG_READY);
}
static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
@@ -179,6 +179,7 @@ static const struct dev_pm_ops mtk_rng_pm_ops = {
#endif /* CONFIG_PM */
static const struct of_device_id mtk_rng_match[] = {
+ { .compatible = "mediatek,mt7986-rng" },
{ .compatible = "mediatek,mt7623-rng" },
{},
};
diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c
index 1ec5f267a656..9903d0357e06 100644
--- a/drivers/char/hw_random/npcm-rng.c
+++ b/drivers/char/hw_random/npcm-rng.c
@@ -13,11 +13,13 @@
#include <linux/delay.h>
#include <linux/of_irq.h>
#include <linux/pm_runtime.h>
+#include <linux/of_device.h>
#define NPCM_RNGCS_REG 0x00 /* Control and status register */
#define NPCM_RNGD_REG 0x04 /* Data register */
#define NPCM_RNGMODE_REG 0x08 /* Mode register */
+#define NPCM_RNG_CLK_SET_62_5MHZ BIT(2) /* 60-80 MHz */
#define NPCM_RNG_CLK_SET_25MHZ GENMASK(4, 3) /* 20-25 MHz */
#define NPCM_RNG_DATA_VALID BIT(1)
#define NPCM_RNG_ENABLE BIT(0)
@@ -31,14 +33,14 @@
struct npcm_rng {
void __iomem *base;
struct hwrng rng;
+ u32 clkp;
};
static int npcm_rng_init(struct hwrng *rng)
{
struct npcm_rng *priv = to_npcm_rng(rng);
- writel(NPCM_RNG_CLK_SET_25MHZ | NPCM_RNG_ENABLE,
- priv->base + NPCM_RNGCS_REG);
+ writel(priv->clkp | NPCM_RNG_ENABLE, priv->base + NPCM_RNGCS_REG);
return 0;
}
@@ -47,7 +49,7 @@ static void npcm_rng_cleanup(struct hwrng *rng)
{
struct npcm_rng *priv = to_npcm_rng(rng);
- writel(NPCM_RNG_CLK_SET_25MHZ, priv->base + NPCM_RNGCS_REG);
+ writel(priv->clkp, priv->base + NPCM_RNGCS_REG);
}
static int npcm_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
@@ -109,7 +111,7 @@ static int npcm_rng_probe(struct platform_device *pdev)
priv->rng.name = pdev->name;
priv->rng.read = npcm_rng_read;
priv->rng.priv = (unsigned long)&pdev->dev;
- priv->rng.quality = 1000;
+ priv->clkp = (u32)(uintptr_t)of_device_get_match_data(&pdev->dev);
writel(NPCM_RNG_M1ROSEL, priv->base + NPCM_RNGMODE_REG);
@@ -162,7 +164,10 @@ static const struct dev_pm_ops npcm_rng_pm_ops = {
};
static const struct of_device_id rng_dt_id[] __maybe_unused = {
- { .compatible = "nuvoton,npcm750-rng", },
+ { .compatible = "nuvoton,npcm750-rng",
+ .data = (void *)NPCM_RNG_CLK_SET_25MHZ },
+ { .compatible = "nuvoton,npcm845-rng",
+ .data = (void *)NPCM_RNG_CLK_SET_62_5MHZ },
{},
};
MODULE_DEVICE_TABLE(of, rng_dt_id);
diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c
index 795853dfc46b..cffa326ddc8d 100644
--- a/drivers/char/hw_random/s390-trng.c
+++ b/drivers/char/hw_random/s390-trng.c
@@ -191,7 +191,6 @@ static struct hwrng trng_hwrng_dev = {
.name = "s390-trng",
.data_read = trng_hwrng_data_read,
.read = trng_hwrng_read,
- .quality = 1024,
};
diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
index bc22178f83e8..a6731cf0627a 100644
--- a/drivers/char/hw_random/stm32-rng.c
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -44,16 +44,18 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
pm_runtime_get_sync((struct device *) priv->rng.priv);
- while (max > sizeof(u32)) {
+ while (max >= sizeof(u32)) {
sr = readl_relaxed(priv->base + RNG_SR);
/* Manage timeout which is based on timer and take */
/* care of initial delay time when enabling rng */
if (!sr && wait) {
- retval = readl_relaxed_poll_timeout_atomic(priv->base
+ int err;
+
+ err = readl_relaxed_poll_timeout_atomic(priv->base
+ RNG_SR,
sr, sr,
10, 50000);
- if (retval)
+ if (err)
dev_err((struct device *)priv->rng.priv,
"%s: timeout %x!\n", __func__, sr);
}
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 8ea1fc831eb7..26f322d19a88 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -145,8 +145,6 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
if (!of_property_read_u32(pdev->dev.of_node,
"quality", &i))
priv->rng_ops.quality = i;
- else
- priv->rng_ops.quality = 0;
} else {
period = pdata->period;
priv->rng_ops.quality = pdata->quality;
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index a6f3a8a2aca6..f7690e0f92ed 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -148,7 +148,6 @@ static int probe_common(struct virtio_device *vdev)
.cleanup = virtio_cleanup,
.priv = (unsigned long)vi,
.name = vi->name,
- .quality = 1000,
};
vdev->priv = vi;
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 55e75fbb658e..2947888d3b82 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -669,7 +669,12 @@ config CRYPTO_DEV_IMGTEC_HASH
config CRYPTO_DEV_ROCKCHIP
tristate "Rockchip's Cryptographic Engine driver"
depends on OF && ARCH_ROCKCHIP
+ depends on PM
+ select CRYPTO_ECB
+ select CRYPTO_CBC
+ select CRYPTO_DES
select CRYPTO_AES
+ select CRYPTO_ENGINE
select CRYPTO_LIB_DES
select CRYPTO_MD5
select CRYPTO_SHA1
@@ -681,6 +686,16 @@ config CRYPTO_DEV_ROCKCHIP
This driver interfaces with the hardware crypto accelerator.
Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode.
+config CRYPTO_DEV_ROCKCHIP_DEBUG
+ bool "Enable Rockchip crypto stats"
+ depends on CRYPTO_DEV_ROCKCHIP
+ depends on DEBUG_FS
+ help
+ Say y to enable Rockchip crypto debug stats.
+ This will create /sys/kernel/debug/rk3288_crypto/stats for displaying
+ the number of requests per algorithm and other internal stats.
+
+
config CRYPTO_DEV_ZYNQMP_AES
tristate "Support for Xilinx ZynqMP AES hw accelerator"
depends on ZYNQMP_FIRMWARE || COMPILE_TEST
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
index c4b0a8b58842..e2b9b9104694 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
@@ -108,7 +108,6 @@ int sun8i_ce_hwrng_register(struct sun8i_ce_dev *ce)
}
ce->trng.name = "sun8i Crypto Engine TRNG";
ce->trng.read = sun8i_ce_trng_read;
- ce->trng.quality = 1000;
ret = hwrng_register(&ce->trng);
if (ret)
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 910d6751644c..902f6be057ec 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -124,7 +124,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq)
unsigned int ivsize = crypto_skcipher_ivsize(tfm);
struct sun8i_ss_flow *sf = &ss->flows[rctx->flow];
int i = 0;
- u32 a;
+ dma_addr_t a;
int err;
rctx->ivlen = ivsize;
diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index a84b657598c6..c0103e7fc2e7 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -107,7 +107,6 @@ static int atmel_sha204a_probe(struct i2c_client *client,
i2c_priv->hwrng.name = dev_name(&client->dev);
i2c_priv->hwrng.read = atmel_sha204a_rng_read;
- i2c_priv->hwrng.quality = 1024;
ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng);
if (ret)
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 77d048dfe5d0..1f0e82050976 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -246,7 +246,6 @@ int caam_rng_init(struct device *ctrldev)
ctx->rng.cleanup = caam_cleanup;
ctx->rng.read = caam_read;
ctx->rng.priv = (unsigned long)ctx;
- ctx->rng.quality = 1024;
dev_info(ctrldev, "registering rng-caam\n");
diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
index 9e7308e39b30..d4e06999af9b 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
@@ -195,6 +195,7 @@ int nitrox_mbox_init(struct nitrox_device *ndev)
ndev->iov.pf2vf_wq = alloc_workqueue("nitrox_pf2vf", 0, 0);
if (!ndev->iov.pf2vf_wq) {
kfree(ndev->iov.vfdev);
+ ndev->iov.vfdev = NULL;
return -ENOMEM;
}
/* enable pf2vf mailbox interrupts */
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 5976530c00a8..dd86d2650bea 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -78,13 +78,6 @@ struct ccp_crypto_cmd {
int ret;
};
-struct ccp_crypto_cpu {
- struct work_struct work;
- struct completion completion;
- struct ccp_crypto_cmd *crypto_cmd;
- int err;
-};
-
static inline bool ccp_crypto_success(int err)
{
if (err && (err != -EINPROGRESS) && (err != -EBUSY))
@@ -400,7 +393,7 @@ static void ccp_unregister_algs(void)
}
}
-static int ccp_crypto_init(void)
+static int __init ccp_crypto_init(void)
{
int ret;
@@ -421,7 +414,7 @@ static int ccp_crypto_init(void)
return ret;
}
-static void ccp_crypto_exit(void)
+static void __exit ccp_crypto_exit(void)
{
ccp_unregister_algs();
}
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 792d6da7f0c0..084d052fddcc 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -381,6 +381,15 @@ static const struct psp_vdata pspv3 = {
.inten_reg = 0x10690,
.intsts_reg = 0x10694,
};
+
+static const struct psp_vdata pspv4 = {
+ .sev = &sevv2,
+ .tee = &teev1,
+ .feature_reg = 0x109fc,
+ .inten_reg = 0x10690,
+ .intsts_reg = 0x10694,
+};
+
#endif
static const struct sp_dev_vdata dev_vdata[] = {
@@ -426,7 +435,7 @@ static const struct sp_dev_vdata dev_vdata[] = {
{ /* 5 */
.bar = 2,
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
- .psp_vdata = &pspv2,
+ .psp_vdata = &pspv4,
#endif
},
{ /* 6 */
diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c
index 7083767602fc..8f008f024f8f 100644
--- a/drivers/crypto/ccree/cc_debugfs.c
+++ b/drivers/crypto/ccree/cc_debugfs.c
@@ -55,7 +55,7 @@ void __init cc_debugfs_global_init(void)
cc_debugfs_dir = debugfs_create_dir("ccree", NULL);
}
-void __exit cc_debugfs_global_fini(void)
+void cc_debugfs_global_fini(void)
{
debugfs_remove(cc_debugfs_dir);
}
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index cadead18b59e..d489c6f80892 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -651,9 +651,17 @@ static struct platform_driver ccree_driver = {
static int __init ccree_init(void)
{
+ int rc;
+
cc_debugfs_global_init();
- return platform_driver_register(&ccree_driver);
+ rc = platform_driver_register(&ccree_driver);
+ if (rc) {
+ cc_debugfs_global_fini();
+ return rc;
+ }
+
+ return 0;
}
module_init(ccree_init);
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index f886401af13e..5dd3f6a4781a 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -3,11 +3,11 @@ config CRYPTO_DEV_CHELSIO
tristate "Chelsio Crypto Co-processor Driver"
depends on CHELSIO_T4
select CRYPTO_LIB_AES
+ select CRYPTO_LIB_GF128MUL
select CRYPTO_SHA1
select CRYPTO_SHA256
select CRYPTO_SHA512
select CRYPTO_AUTHENC
- select CRYPTO_GF128MUL
help
The Chelsio Crypto Co-processor driver for T6 adapters.
diff --git a/drivers/crypto/hisilicon/Makefile b/drivers/crypto/hisilicon/Makefile
index 1e89269a2e4b..8595a5a5d228 100644
--- a/drivers/crypto/hisilicon/Makefile
+++ b/drivers/crypto/hisilicon/Makefile
@@ -3,6 +3,6 @@ obj-$(CONFIG_CRYPTO_DEV_HISI_HPRE) += hpre/
obj-$(CONFIG_CRYPTO_DEV_HISI_SEC) += sec/
obj-$(CONFIG_CRYPTO_DEV_HISI_SEC2) += sec2/
obj-$(CONFIG_CRYPTO_DEV_HISI_QM) += hisi_qm.o
-hisi_qm-objs = qm.o sgl.o
+hisi_qm-objs = qm.o sgl.o debugfs.o
obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += zip/
obj-$(CONFIG_CRYPTO_DEV_HISI_TRNG) += trng/
diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c
new file mode 100644
index 000000000000..2cc1591949db
--- /dev/null
+++ b/drivers/crypto/hisilicon/debugfs.c
@@ -0,0 +1,1147 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 HiSilicon Limited. */
+#include <linux/hisi_acc_qm.h>
+#include "qm_common.h"
+
+#define QM_DFX_BASE 0x0100000
+#define QM_DFX_STATE1 0x0104000
+#define QM_DFX_STATE2 0x01040C8
+#define QM_DFX_COMMON 0x0000
+#define QM_DFX_BASE_LEN 0x5A
+#define QM_DFX_STATE1_LEN 0x2E
+#define QM_DFX_STATE2_LEN 0x11
+#define QM_DFX_COMMON_LEN 0xC3
+#define QM_DFX_REGS_LEN 4UL
+#define QM_DBG_TMP_BUF_LEN 22
+#define CURRENT_FUN_MASK GENMASK(5, 0)
+#define CURRENT_Q_MASK GENMASK(31, 16)
+#define QM_SQE_ADDR_MASK GENMASK(7, 0)
+
+#define QM_DFX_MB_CNT_VF 0x104010
+#define QM_DFX_DB_CNT_VF 0x104020
+#define QM_DFX_SQE_CNT_VF_SQN 0x104030
+#define QM_DFX_CQE_CNT_VF_CQN 0x104040
+#define QM_DFX_QN_SHIFT 16
+#define QM_DFX_CNT_CLR_CE 0x100118
+#define QM_DBG_WRITE_LEN 1024
+
+static const char * const qm_debug_file_name[] = {
+ [CURRENT_QM] = "current_qm",
+ [CURRENT_Q] = "current_q",
+ [CLEAR_ENABLE] = "clear_enable",
+};
+
+struct qm_dfx_item {
+ const char *name;
+ u32 offset;
+};
+
+struct qm_cmd_dump_item {
+ const char *cmd;
+ char *info_name;
+ int (*dump_fn)(struct hisi_qm *qm, char *cmd, char *info_name);
+};
+
+static struct qm_dfx_item qm_dfx_files[] = {
+ {"err_irq", offsetof(struct qm_dfx, err_irq_cnt)},
+ {"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)},
+ {"abnormal_irq", offsetof(struct qm_dfx, abnormal_irq_cnt)},
+ {"create_qp_err", offsetof(struct qm_dfx, create_qp_err_cnt)},
+ {"mb_err", offsetof(struct qm_dfx, mb_err_cnt)},
+};
+
+#define CNT_CYC_REGS_NUM 10
+static const struct debugfs_reg32 qm_dfx_regs[] = {
+ /* XXX_CNT are reading clear register */
+ {"QM_ECC_1BIT_CNT ", 0x104000ull},
+ {"QM_ECC_MBIT_CNT ", 0x104008ull},
+ {"QM_DFX_MB_CNT ", 0x104018ull},
+ {"QM_DFX_DB_CNT ", 0x104028ull},
+ {"QM_DFX_SQE_CNT ", 0x104038ull},
+ {"QM_DFX_CQE_CNT ", 0x104048ull},
+ {"QM_DFX_SEND_SQE_TO_ACC_CNT ", 0x104050ull},
+ {"QM_DFX_WB_SQE_FROM_ACC_CNT ", 0x104058ull},
+ {"QM_DFX_ACC_FINISH_CNT ", 0x104060ull},
+ {"QM_DFX_CQE_ERR_CNT ", 0x1040b4ull},
+ {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull},
+ {"QM_ECC_1BIT_INF ", 0x104004ull},
+ {"QM_ECC_MBIT_INF ", 0x10400cull},
+ {"QM_DFX_ACC_RDY_VLD0 ", 0x1040a0ull},
+ {"QM_DFX_ACC_RDY_VLD1 ", 0x1040a4ull},
+ {"QM_DFX_AXI_RDY_VLD ", 0x1040a8ull},
+ {"QM_DFX_FF_ST0 ", 0x1040c8ull},
+ {"QM_DFX_FF_ST1 ", 0x1040ccull},
+ {"QM_DFX_FF_ST2 ", 0x1040d0ull},
+ {"QM_DFX_FF_ST3 ", 0x1040d4ull},
+ {"QM_DFX_FF_ST4 ", 0x1040d8ull},
+ {"QM_DFX_FF_ST5 ", 0x1040dcull},
+ {"QM_DFX_FF_ST6 ", 0x1040e0ull},
+ {"QM_IN_IDLE_ST ", 0x1040e4ull},
+};
+
+static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
+ {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull},
+};
+
+/* define the QM's dfx regs region and region length */
+static struct dfx_diff_registers qm_diff_regs[] = {
+ {
+ .reg_offset = QM_DFX_BASE,
+ .reg_len = QM_DFX_BASE_LEN,
+ }, {
+ .reg_offset = QM_DFX_STATE1,
+ .reg_len = QM_DFX_STATE1_LEN,
+ }, {
+ .reg_offset = QM_DFX_STATE2,
+ .reg_len = QM_DFX_STATE2_LEN,
+ }, {
+ .reg_offset = QM_DFX_COMMON,
+ .reg_len = QM_DFX_COMMON_LEN,
+ },
+};
+
+static struct hisi_qm *file_to_qm(struct debugfs_file *file)
+{
+ struct qm_debug *debug = file->debug;
+
+ return container_of(debug, struct hisi_qm, debug);
+}
+
+static ssize_t qm_cmd_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *pos)
+{
+ char buf[QM_DBG_READ_LEN];
+ int len;
+
+ len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n",
+ "Please echo help to cmd to get help information");
+
+ return simple_read_from_buffer(buffer, count, pos, buf, len);
+}
+
+static void dump_show(struct hisi_qm *qm, void *info,
+ unsigned int info_size, char *info_name)
+{
+ struct device *dev = &qm->pdev->dev;
+ u8 *info_curr = info;
+ u32 i;
+#define BYTE_PER_DW 4
+
+ dev_info(dev, "%s DUMP\n", info_name);
+ for (i = 0; i < info_size; i += BYTE_PER_DW, info_curr += BYTE_PER_DW) {
+ pr_info("DW%u: %02X%02X %02X%02X\n", i / BYTE_PER_DW,
+ *(info_curr + 3), *(info_curr + 2), *(info_curr + 1), *(info_curr));
+ }
+}
+
+static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name)
+{
+ struct device *dev = &qm->pdev->dev;
+ struct qm_sqc *sqc, *sqc_curr;
+ dma_addr_t sqc_dma;
+ u32 qp_id;
+ int ret;
+
+ if (!s)
+ return -EINVAL;
+
+ ret = kstrtou32(s, 0, &qp_id);
+ if (ret || qp_id >= qm->qp_num) {
+ dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1);
+ return -EINVAL;
+ }
+
+ sqc = hisi_qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma);
+ if (IS_ERR(sqc))
+ return PTR_ERR(sqc);
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 1);
+ if (ret) {
+ down_read(&qm->qps_lock);
+ if (qm->sqc) {
+ sqc_curr = qm->sqc + qp_id;
+
+ dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC");
+ }
+ up_read(&qm->qps_lock);
+
+ goto free_ctx;
+ }
+
+ dump_show(qm, sqc, sizeof(*sqc), name);
+
+free_ctx:
+ hisi_qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma);
+ return 0;
+}
+
+static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name)
+{
+ struct device *dev = &qm->pdev->dev;
+ struct qm_cqc *cqc, *cqc_curr;
+ dma_addr_t cqc_dma;
+ u32 qp_id;
+ int ret;
+
+ if (!s)
+ return -EINVAL;
+
+ ret = kstrtou32(s, 0, &qp_id);
+ if (ret || qp_id >= qm->qp_num) {
+ dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1);
+ return -EINVAL;
+ }
+
+ cqc = hisi_qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma);
+ if (IS_ERR(cqc))
+ return PTR_ERR(cqc);
+
+ ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 1);
+ if (ret) {
+ down_read(&qm->qps_lock);
+ if (qm->cqc) {
+ cqc_curr = qm->cqc + qp_id;
+
+ dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC");
+ }
+ up_read(&qm->qps_lock);
+
+ goto free_ctx;
+ }
+
+ dump_show(qm, cqc, sizeof(*cqc), name);
+
+free_ctx:
+ hisi_qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma);
+ return 0;
+}
+
+static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name)
+{
+ struct device *dev = &qm->pdev->dev;
+ dma_addr_t xeqc_dma;
+ size_t size;
+ void *xeqc;
+ int ret;
+ u8 cmd;
+
+ if (strsep(&s, " ")) {
+ dev_err(dev, "Please do not input extra characters!\n");
+ return -EINVAL;
+ }
+
+ if (!strcmp(name, "EQC")) {
+ cmd = QM_MB_CMD_EQC;
+ size = sizeof(struct qm_eqc);
+ } else {
+ cmd = QM_MB_CMD_AEQC;
+ size = sizeof(struct qm_aeqc);
+ }
+
+ xeqc = hisi_qm_ctx_alloc(qm, size, &xeqc_dma);
+ if (IS_ERR(xeqc))
+ return PTR_ERR(xeqc);
+
+ ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1);
+ if (ret)
+ goto err_free_ctx;
+
+ dump_show(qm, xeqc, size, name);
+
+err_free_ctx:
+ hisi_qm_ctx_free(qm, size, xeqc, &xeqc_dma);
+ return ret;
+}
+
+static int q_dump_param_parse(struct hisi_qm *qm, char *s,
+ u32 *e_id, u32 *q_id, u16 q_depth)
+{
+ struct device *dev = &qm->pdev->dev;
+ unsigned int qp_num = qm->qp_num;
+ char *presult;
+ int ret;
+
+ presult = strsep(&s, " ");
+ if (!presult) {
+ dev_err(dev, "Please input qp number!\n");
+ return -EINVAL;
+ }
+
+ ret = kstrtou32(presult, 0, q_id);
+ if (ret || *q_id >= qp_num) {
+ dev_err(dev, "Please input qp num (0-%u)", qp_num - 1);
+ return -EINVAL;
+ }
+
+ presult = strsep(&s, " ");
+ if (!presult) {
+ dev_err(dev, "Please input sqe number!\n");
+ return -EINVAL;
+ }
+
+ ret = kstrtou32(presult, 0, e_id);
+ if (ret || *e_id >= q_depth) {
+ dev_err(dev, "Please input sqe num (0-%u)", q_depth - 1);
+ return -EINVAL;
+ }
+
+ if (strsep(&s, " ")) {
+ dev_err(dev, "Please do not input extra characters!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name)
+{
+ u16 sq_depth = qm->qp_array->cq_depth;
+ void *sqe, *sqe_curr;
+ struct hisi_qp *qp;
+ u32 qp_id, sqe_id;
+ int ret;
+
+ ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth);
+ if (ret)
+ return ret;
+
+ sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL);
+ if (!sqe)
+ return -ENOMEM;
+
+ qp = &qm->qp_array[qp_id];
+ memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth);
+ sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size);
+ memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK,
+ qm->debug.sqe_mask_len);
+
+ dump_show(qm, sqe_curr, qm->sqe_size, name);
+
+ kfree(sqe);
+
+ return 0;
+}
+
+static int qm_cq_dump(struct hisi_qm *qm, char *s, char *name)
+{
+ struct qm_cqe *cqe_curr;
+ struct hisi_qp *qp;
+ u32 qp_id, cqe_id;
+ int ret;
+
+ ret = q_dump_param_parse(qm, s, &cqe_id, &qp_id, qm->qp_array->cq_depth);
+ if (ret)
+ return ret;
+
+ qp = &qm->qp_array[qp_id];
+ cqe_curr = qp->cqe + cqe_id;
+ dump_show(qm, cqe_curr, sizeof(struct qm_cqe), name);
+
+ return 0;
+}
+
+static int qm_eq_aeq_dump(struct hisi_qm *qm, char *s, char *name)
+{
+ struct device *dev = &qm->pdev->dev;
+ u16 xeq_depth;
+ size_t size;
+ void *xeqe;
+ u32 xeqe_id;
+ int ret;
+
+ if (!s)
+ return -EINVAL;
+
+ ret = kstrtou32(s, 0, &xeqe_id);
+ if (ret)
+ return -EINVAL;
+
+ if (!strcmp(name, "EQE")) {
+ xeq_depth = qm->eq_depth;
+ size = sizeof(struct qm_eqe);
+ } else {
+ xeq_depth = qm->aeq_depth;
+ size = sizeof(struct qm_aeqe);
+ }
+
+ if (xeqe_id >= xeq_depth) {
+ dev_err(dev, "Please input eqe or aeqe num (0-%u)", xeq_depth - 1);
+ return -EINVAL;
+ }
+
+ down_read(&qm->qps_lock);
+
+ if (qm->eqe && !strcmp(name, "EQE")) {
+ xeqe = qm->eqe + xeqe_id;
+ } else if (qm->aeqe && !strcmp(name, "AEQE")) {
+ xeqe = qm->aeqe + xeqe_id;
+ } else {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ dump_show(qm, xeqe, size, name);
+
+err_unlock:
+ up_read(&qm->qps_lock);
+ return ret;
+}
+
+static int qm_dbg_help(struct hisi_qm *qm, char *s)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ if (strsep(&s, " ")) {
+ dev_err(dev, "Please do not input extra characters!\n");
+ return -EINVAL;
+ }
+
+ dev_info(dev, "available commands:\n");
+ dev_info(dev, "sqc <num>\n");
+ dev_info(dev, "cqc <num>\n");
+ dev_info(dev, "eqc\n");
+ dev_info(dev, "aeqc\n");
+ dev_info(dev, "sq <num> <e>\n");
+ dev_info(dev, "cq <num> <e>\n");
+ dev_info(dev, "eq <e>\n");
+ dev_info(dev, "aeq <e>\n");
+
+ return 0;
+}
+
+static const struct qm_cmd_dump_item qm_cmd_dump_table[] = {
+ {
+ .cmd = "sqc",
+ .info_name = "SQC",
+ .dump_fn = qm_sqc_dump,
+ }, {
+ .cmd = "cqc",
+ .info_name = "CQC",
+ .dump_fn = qm_cqc_dump,
+ }, {
+ .cmd = "eqc",
+ .info_name = "EQC",
+ .dump_fn = qm_eqc_aeqc_dump,
+ }, {
+ .cmd = "aeqc",
+ .info_name = "AEQC",
+ .dump_fn = qm_eqc_aeqc_dump,
+ }, {
+ .cmd = "sq",
+ .info_name = "SQE",
+ .dump_fn = qm_sq_dump,
+ }, {
+ .cmd = "cq",
+ .info_name = "CQE",
+ .dump_fn = qm_cq_dump,
+ }, {
+ .cmd = "eq",
+ .info_name = "EQE",
+ .dump_fn = qm_eq_aeq_dump,
+ }, {
+ .cmd = "aeq",
+ .info_name = "AEQE",
+ .dump_fn = qm_eq_aeq_dump,
+ },
+};
+
+static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf)
+{
+ struct device *dev = &qm->pdev->dev;
+ char *presult, *s, *s_tmp;
+ int table_size, i, ret;
+
+ s = kstrdup(cmd_buf, GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ s_tmp = s;
+ presult = strsep(&s, " ");
+ if (!presult) {
+ ret = -EINVAL;
+ goto err_buffer_free;
+ }
+
+ if (!strcmp(presult, "help")) {
+ ret = qm_dbg_help(qm, s);
+ goto err_buffer_free;
+ }
+
+ table_size = ARRAY_SIZE(qm_cmd_dump_table);
+ for (i = 0; i < table_size; i++) {
+ if (!strcmp(presult, qm_cmd_dump_table[i].cmd)) {
+ ret = qm_cmd_dump_table[i].dump_fn(qm, s,
+ qm_cmd_dump_table[i].info_name);
+ break;
+ }
+ }
+
+ if (i == table_size) {
+ dev_info(dev, "Please echo help\n");
+ ret = -EINVAL;
+ }
+
+err_buffer_free:
+ kfree(s_tmp);
+
+ return ret;
+}
+
+static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *pos)
+{
+ struct hisi_qm *qm = filp->private_data;
+ char *cmd_buf, *cmd_buf_tmp;
+ int ret;
+
+ if (*pos)
+ return 0;
+
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
+ /* Judge if the instance is being reset. */
+ if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) {
+ ret = 0;
+ goto put_dfx_access;
+ }
+
+ if (count > QM_DBG_WRITE_LEN) {
+ ret = -ENOSPC;
+ goto put_dfx_access;
+ }
+
+ cmd_buf = memdup_user_nul(buffer, count);
+ if (IS_ERR(cmd_buf)) {
+ ret = PTR_ERR(cmd_buf);
+ goto put_dfx_access;
+ }
+
+ cmd_buf_tmp = strchr(cmd_buf, '\n');
+ if (cmd_buf_tmp) {
+ *cmd_buf_tmp = '\0';
+ count = cmd_buf_tmp - cmd_buf + 1;
+ }
+
+ ret = qm_cmd_write_dump(qm, cmd_buf);
+ if (ret) {
+ kfree(cmd_buf);
+ goto put_dfx_access;
+ }
+
+ kfree(cmd_buf);
+
+ ret = count;
+
+put_dfx_access:
+ hisi_qm_put_dfx_access(qm);
+ return ret;
+}
+
+static const struct file_operations qm_cmd_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = qm_cmd_read,
+ .write = qm_cmd_write,
+};
+
+/**
+ * hisi_qm_regs_dump() - Dump registers's value.
+ * @s: debugfs file handle.
+ * @regset: accelerator registers information.
+ *
+ * Dump accelerator registers.
+ */
+void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset)
+{
+ struct pci_dev *pdev = to_pci_dev(regset->dev);
+ struct hisi_qm *qm = pci_get_drvdata(pdev);
+ const struct debugfs_reg32 *regs = regset->regs;
+ int regs_len = regset->nregs;
+ int i, ret;
+ u32 val;
+
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return;
+
+ for (i = 0; i < regs_len; i++) {
+ val = readl(regset->base + regs[i].offset);
+ seq_printf(s, "%s= 0x%08x\n", regs[i].name, val);
+ }
+
+ hisi_qm_put_dfx_access(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_regs_dump);
+
+static int qm_regs_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+ struct debugfs_regset32 regset;
+
+ if (qm->fun_type == QM_HW_PF) {
+ regset.regs = qm_dfx_regs;
+ regset.nregs = ARRAY_SIZE(qm_dfx_regs);
+ } else {
+ regset.regs = qm_vf_dfx_regs;
+ regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs);
+ }
+
+ regset.base = qm->io_base;
+ regset.dev = &qm->pdev->dev;
+
+ hisi_qm_regs_dump(s, &regset);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(qm_regs);
+
+static u32 current_q_read(struct hisi_qm *qm)
+{
+ return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT;
+}
+
+static int current_q_write(struct hisi_qm *qm, u32 val)
+{
+ u32 tmp;
+
+ if (val >= qm->debug.curr_qm_qp_num)
+ return -EINVAL;
+
+ tmp = val << QM_DFX_QN_SHIFT |
+ (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_FUN_MASK);
+ writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+
+ tmp = val << QM_DFX_QN_SHIFT |
+ (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_FUN_MASK);
+ writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+ return 0;
+}
+
+static u32 clear_enable_read(struct hisi_qm *qm)
+{
+ return readl(qm->io_base + QM_DFX_CNT_CLR_CE);
+}
+
+/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */
+static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl)
+{
+ if (rd_clr_ctrl > 1)
+ return -EINVAL;
+
+ writel(rd_clr_ctrl, qm->io_base + QM_DFX_CNT_CLR_CE);
+
+ return 0;
+}
+
+static u32 current_qm_read(struct hisi_qm *qm)
+{
+ return readl(qm->io_base + QM_DFX_MB_CNT_VF);
+}
+
+static int qm_get_vf_qp_num(struct hisi_qm *qm, u32 fun_num)
+{
+ u32 remain_q_num, vfq_num;
+ u32 num_vfs = qm->vfs_num;
+
+ vfq_num = (qm->ctrl_qp_num - qm->qp_num) / num_vfs;
+ if (vfq_num >= qm->max_qp_num)
+ return qm->max_qp_num;
+
+ remain_q_num = (qm->ctrl_qp_num - qm->qp_num) % num_vfs;
+ if (vfq_num + remain_q_num <= qm->max_qp_num)
+ return fun_num == num_vfs ? vfq_num + remain_q_num : vfq_num;
+
+ /*
+ * if vfq_num + remain_q_num > max_qp_num, the last VFs,
+ * each with one more queue.
+ */
+ return fun_num + remain_q_num > num_vfs ? vfq_num + 1 : vfq_num;
+}
+
+static int current_qm_write(struct hisi_qm *qm, u32 val)
+{
+ u32 tmp;
+
+ if (val > qm->vfs_num)
+ return -EINVAL;
+
+ /* According PF or VF Dev ID to calculation curr_qm_qp_num and store */
+ if (!val)
+ qm->debug.curr_qm_qp_num = qm->qp_num;
+ else
+ qm->debug.curr_qm_qp_num = qm_get_vf_qp_num(qm, val);
+
+ writel(val, qm->io_base + QM_DFX_MB_CNT_VF);
+ writel(val, qm->io_base + QM_DFX_DB_CNT_VF);
+
+ tmp = val |
+ (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK);
+ writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+
+ tmp = val |
+ (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK);
+ writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+ return 0;
+}
+
+static ssize_t qm_debug_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct debugfs_file *file = filp->private_data;
+ enum qm_debug_file index = file->index;
+ struct hisi_qm *qm = file_to_qm(file);
+ char tbuf[QM_DBG_TMP_BUF_LEN];
+ u32 val;
+ int ret;
+
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
+ mutex_lock(&file->lock);
+ switch (index) {
+ case CURRENT_QM:
+ val = current_qm_read(qm);
+ break;
+ case CURRENT_Q:
+ val = current_q_read(qm);
+ break;
+ case CLEAR_ENABLE:
+ val = clear_enable_read(qm);
+ break;
+ default:
+ goto err_input;
+ }
+ mutex_unlock(&file->lock);
+
+ hisi_qm_put_dfx_access(qm);
+ ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val);
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+ mutex_unlock(&file->lock);
+ hisi_qm_put_dfx_access(qm);
+ return -EINVAL;
+}
+
+static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct debugfs_file *file = filp->private_data;
+ enum qm_debug_file index = file->index;
+ struct hisi_qm *qm = file_to_qm(file);
+ unsigned long val;
+ char tbuf[QM_DBG_TMP_BUF_LEN];
+ int len, ret;
+
+ if (*pos != 0)
+ return 0;
+
+ if (count >= QM_DBG_TMP_BUF_LEN)
+ return -ENOSPC;
+
+ len = simple_write_to_buffer(tbuf, QM_DBG_TMP_BUF_LEN - 1, pos, buf,
+ count);
+ if (len < 0)
+ return len;
+
+ tbuf[len] = '\0';
+ if (kstrtoul(tbuf, 0, &val))
+ return -EFAULT;
+
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return ret;
+
+ mutex_lock(&file->lock);
+ switch (index) {
+ case CURRENT_QM:
+ ret = current_qm_write(qm, val);
+ break;
+ case CURRENT_Q:
+ ret = current_q_write(qm, val);
+ break;
+ case CLEAR_ENABLE:
+ ret = clear_enable_write(qm, val);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ mutex_unlock(&file->lock);
+
+ hisi_qm_put_dfx_access(qm);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static const struct file_operations qm_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = qm_debug_read,
+ .write = qm_debug_write,
+};
+
+static void dfx_regs_uninit(struct hisi_qm *qm,
+ struct dfx_diff_registers *dregs, int reg_len)
+{
+ int i;
+
+ /* Setting the pointer is NULL to prevent double free */
+ for (i = 0; i < reg_len; i++) {
+ kfree(dregs[i].regs);
+ dregs[i].regs = NULL;
+ }
+ kfree(dregs);
+}
+
+static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm,
+ const struct dfx_diff_registers *cregs, u32 reg_len)
+{
+ struct dfx_diff_registers *diff_regs;
+ u32 j, base_offset;
+ int i;
+
+ diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL);
+ if (!diff_regs)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < reg_len; i++) {
+ if (!cregs[i].reg_len)
+ continue;
+
+ diff_regs[i].reg_offset = cregs[i].reg_offset;
+ diff_regs[i].reg_len = cregs[i].reg_len;
+ diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len,
+ GFP_KERNEL);
+ if (!diff_regs[i].regs)
+ goto alloc_error;
+
+ for (j = 0; j < diff_regs[i].reg_len; j++) {
+ base_offset = diff_regs[i].reg_offset +
+ j * QM_DFX_REGS_LEN;
+ diff_regs[i].regs[j] = readl(qm->io_base + base_offset);
+ }
+ }
+
+ return diff_regs;
+
+alloc_error:
+ while (i > 0) {
+ i--;
+ kfree(diff_regs[i].regs);
+ }
+ kfree(diff_regs);
+ return ERR_PTR(-ENOMEM);
+}
+
+static int qm_diff_regs_init(struct hisi_qm *qm,
+ struct dfx_diff_registers *dregs, u32 reg_len)
+{
+ qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+ if (IS_ERR(qm->debug.qm_diff_regs))
+ return PTR_ERR(qm->debug.qm_diff_regs);
+
+ qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
+ if (IS_ERR(qm->debug.acc_diff_regs)) {
+ dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+ return PTR_ERR(qm->debug.acc_diff_regs);
+ }
+
+ return 0;
+}
+
+static void qm_last_regs_uninit(struct hisi_qm *qm)
+{
+ struct qm_debug *debug = &qm->debug;
+
+ if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
+ return;
+
+ kfree(debug->qm_last_words);
+ debug->qm_last_words = NULL;
+}
+
+static int qm_last_regs_init(struct hisi_qm *qm)
+{
+ int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs);
+ struct qm_debug *debug = &qm->debug;
+ int i;
+
+ if (qm->fun_type == QM_HW_VF)
+ return 0;
+
+ debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
+ if (!debug->qm_last_words)
+ return -ENOMEM;
+
+ for (i = 0; i < dfx_regs_num; i++) {
+ debug->qm_last_words[i] = readl_relaxed(qm->io_base +
+ qm_dfx_regs[i].offset);
+ }
+
+ return 0;
+}
+
+static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len)
+{
+ dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
+ dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+}
+
+/**
+ * hisi_qm_regs_debugfs_init() - Allocate memory for registers.
+ * @qm: device qm handle.
+ * @dregs: diff registers handle.
+ * @reg_len: diff registers region length.
+ */
+int hisi_qm_regs_debugfs_init(struct hisi_qm *qm,
+ struct dfx_diff_registers *dregs, u32 reg_len)
+{
+ int ret;
+
+ if (!qm || !dregs)
+ return -EINVAL;
+
+ if (qm->fun_type != QM_HW_PF)
+ return 0;
+
+ ret = qm_last_regs_init(qm);
+ if (ret) {
+ dev_info(&qm->pdev->dev, "failed to init qm words memory!\n");
+ return ret;
+ }
+
+ ret = qm_diff_regs_init(qm, dregs, reg_len);
+ if (ret) {
+ qm_last_regs_uninit(qm);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_init);
+
+/**
+ * hisi_qm_regs_debugfs_uninit() - Free memory for registers.
+ * @qm: device qm handle.
+ * @reg_len: diff registers region length.
+ */
+void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len)
+{
+ if (!qm || qm->fun_type != QM_HW_PF)
+ return;
+
+ qm_diff_regs_uninit(qm, reg_len);
+ qm_last_regs_uninit(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_uninit);
+
+/**
+ * hisi_qm_acc_diff_regs_dump() - Dump registers's value.
+ * @qm: device qm handle.
+ * @s: Debugfs file handle.
+ * @dregs: diff registers handle.
+ * @regs_len: diff registers region length.
+ */
+void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
+ struct dfx_diff_registers *dregs, u32 regs_len)
+{
+ u32 j, val, base_offset;
+ int i, ret;
+
+ if (!qm || !s || !dregs)
+ return;
+
+ ret = hisi_qm_get_dfx_access(qm);
+ if (ret)
+ return;
+
+ down_read(&qm->qps_lock);
+ for (i = 0; i < regs_len; i++) {
+ if (!dregs[i].reg_len)
+ continue;
+
+ for (j = 0; j < dregs[i].reg_len; j++) {
+ base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN;
+ val = readl(qm->io_base + base_offset);
+ if (val != dregs[i].regs[j])
+ seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n",
+ base_offset, dregs[i].regs[j], val);
+ }
+ }
+ up_read(&qm->qps_lock);
+
+ hisi_qm_put_dfx_access(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump);
+
+void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm)
+{
+ struct qm_debug *debug = &qm->debug;
+ struct pci_dev *pdev = qm->pdev;
+ u32 val;
+ int i;
+
+ if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) {
+ val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset);
+ if (debug->qm_last_words[i] != val)
+ pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
+ qm_dfx_regs[i].name, debug->qm_last_words[i], val);
+ }
+}
+
+static int qm_diff_regs_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+
+ hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs,
+ ARRAY_SIZE(qm_diff_regs));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
+
+static ssize_t qm_status_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *pos)
+{
+ struct hisi_qm *qm = filp->private_data;
+ char buf[QM_DBG_READ_LEN];
+ int val, len;
+
+ val = atomic_read(&qm->status.flags);
+ len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]);
+
+ return simple_read_from_buffer(buffer, count, pos, buf, len);
+}
+
+static const struct file_operations qm_status_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = qm_status_read,
+};
+
+static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
+ enum qm_debug_file index)
+{
+ struct debugfs_file *file = qm->debug.files + index;
+
+ debugfs_create_file(qm_debug_file_name[index], 0600, dir, file,
+ &qm_debug_fops);
+
+ file->index = index;
+ mutex_init(&file->lock);
+ file->debug = &qm->debug;
+}
+
+static int qm_debugfs_atomic64_set(void *data, u64 val)
+{
+ if (val)
+ return -EINVAL;
+
+ atomic64_set((atomic64_t *)data, 0);
+
+ return 0;
+}
+
+static int qm_debugfs_atomic64_get(void *data, u64 *val)
+{
+ *val = atomic64_read((atomic64_t *)data);
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
+ qm_debugfs_atomic64_set, "%llu\n");
+
+/**
+ * hisi_qm_debug_init() - Initialize qm related debugfs files.
+ * @qm: The qm for which we want to add debugfs files.
+ *
+ * Create qm related debugfs files.
+ */
+void hisi_qm_debug_init(struct hisi_qm *qm)
+{
+ struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
+ struct qm_dfx *dfx = &qm->debug.dfx;
+ struct dentry *qm_d;
+ void *data;
+ int i;
+
+ qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
+ qm->debug.qm_d = qm_d;
+
+ /* only show this in PF */
+ if (qm->fun_type == QM_HW_PF) {
+ qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
+ for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
+ qm_create_debugfs_file(qm, qm->debug.qm_d, i);
+ }
+
+ if (qm_regs)
+ debugfs_create_file("diff_regs", 0444, qm->debug.qm_d,
+ qm, &qm_diff_regs_fops);
+
+ debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
+
+ debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
+
+ debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
+ &qm_status_fops);
+ for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
+ data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
+ debugfs_create_file(qm_dfx_files[i].name,
+ 0644,
+ qm_d,
+ data,
+ &qm_atomic64_ops);
+ }
+
+ if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps))
+ hisi_qm_set_algqos_init(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
+
+/**
+ * hisi_qm_debug_regs_clear() - clear qm debug related registers.
+ * @qm: The qm for which we want to clear its debug registers.
+ */
+void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
+{
+ const struct debugfs_reg32 *regs;
+ int i;
+
+ /* clear current_qm */
+ writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
+ writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
+
+ /* clear current_q */
+ writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+ writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+ /*
+ * these registers are reading and clearing, so clear them after
+ * reading them.
+ */
+ writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE);
+
+ regs = qm_dfx_regs;
+ for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
+ readl(qm->io_base + regs->offset);
+ regs++;
+ }
+
+ /* clear clear_enable */
+ writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 471e5ca720f5..923f9c279265 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -1101,8 +1101,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET;
qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
- ret = hisi_qm_diff_regs_init(qm, hpre_diff_regs,
- ARRAY_SIZE(hpre_diff_regs));
+ ret = hisi_qm_regs_debugfs_init(qm, hpre_diff_regs, ARRAY_SIZE(hpre_diff_regs));
if (ret) {
dev_warn(dev, "Failed to init HPRE diff regs!\n");
goto debugfs_remove;
@@ -1121,7 +1120,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
return 0;
failed_to_create:
- hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
debugfs_remove:
debugfs_remove_recursive(qm->debug.debug_root);
return ret;
@@ -1129,7 +1128,7 @@ debugfs_remove:
static void hpre_debugfs_exit(struct hisi_qm *qm)
{
- hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
debugfs_remove_recursive(qm->debug.debug_root);
}
@@ -1437,18 +1436,12 @@ err_with_qm_init:
static void hpre_remove(struct pci_dev *pdev)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
- int ret;
hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &hpre_devices);
hisi_qm_alg_unregister(qm, &hpre_devices);
- if (qm->fun_type == QM_HW_PF && qm->vfs_num) {
- ret = hisi_qm_sriov_disable(pdev, true);
- if (ret) {
- pci_err(pdev, "Disable SRIOV fail!\n");
- return;
- }
- }
+ if (qm->fun_type == QM_HW_PF && qm->vfs_num)
+ hisi_qm_sriov_disable(pdev, true);
hpre_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 8b387de69d22..36d70b9f6117 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -16,6 +16,7 @@
#include <linux/uaccess.h>
#include <uapi/misc/uacce/hisi_qm.h>
#include <linux/hisi_acc_qm.h>
+#include "qm_common.h"
/* eq/aeq irq enable */
#define QM_VF_AEQ_INT_SOURCE 0x0
@@ -119,8 +120,6 @@
#define QM_SQC_VFT_NUM_SHIFT_V2 45
#define QM_SQC_VFT_NUM_MASK_v2 GENMASK(9, 0)
-#define QM_DFX_CNT_CLR_CE 0x100118
-
#define QM_ABNORMAL_INT_SOURCE 0x100000
#define QM_ABNORMAL_INT_MASK 0x100004
#define QM_ABNORMAL_INT_MASK_VALUE 0x7fff
@@ -187,14 +186,6 @@
#define QM_VF_RESET_WAIT_TIMEOUT_US \
(QM_VF_RESET_WAIT_US * QM_VF_RESET_WAIT_CNT)
-#define QM_DFX_MB_CNT_VF 0x104010
-#define QM_DFX_DB_CNT_VF 0x104020
-#define QM_DFX_SQE_CNT_VF_SQN 0x104030
-#define QM_DFX_CQE_CNT_VF_CQN 0x104040
-#define QM_DFX_QN_SHIFT 16
-#define CURRENT_FUN_MASK GENMASK(5, 0)
-#define CURRENT_Q_MASK GENMASK(31, 16)
-
#define POLL_PERIOD 10
#define POLL_TIMEOUT 1000
#define WAIT_PERIOD_US_MAX 200
@@ -211,19 +202,15 @@
#define QMC_ALIGN(sz) ALIGN(sz, 32)
#define QM_DBG_READ_LEN 256
-#define QM_DBG_WRITE_LEN 1024
-#define QM_DBG_TMP_BUF_LEN 22
#define QM_PCI_COMMAND_INVALID ~0
#define QM_RESET_STOP_TX_OFFSET 1
#define QM_RESET_STOP_RX_OFFSET 2
#define WAIT_PERIOD 20
#define REMOVE_WAIT_DELAY 10
-#define QM_SQE_ADDR_MASK GENMASK(7, 0)
#define QM_DRIVER_REMOVING 0
#define QM_RST_SCHED 1
-#define QM_RESETTING 2
#define QM_QOS_PARAM_NUM 2
#define QM_QOS_VAL_NUM 1
#define QM_QOS_BDF_PARAM_NUM 4
@@ -250,16 +237,6 @@
#define QM_QOS_MIN_CIR_B 100
#define QM_QOS_MAX_CIR_U 6
#define QM_QOS_MAX_CIR_S 11
-#define QM_QOS_VAL_MAX_LEN 32
-#define QM_DFX_BASE 0x0100000
-#define QM_DFX_STATE1 0x0104000
-#define QM_DFX_STATE2 0x01040C8
-#define QM_DFX_COMMON 0x0000
-#define QM_DFX_BASE_LEN 0x5A
-#define QM_DFX_STATE1_LEN 0x2E
-#define QM_DFX_STATE2_LEN 0x11
-#define QM_DFX_COMMON_LEN 0xC3
-#define QM_DFX_REGS_LEN 4UL
#define QM_AUTOSUSPEND_DELAY 3000
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
@@ -369,73 +346,6 @@ static const struct hisi_qm_cap_info qm_basic_info[] = {
{QM_VF_IRQ_NUM_CAP, 0x311c, 0, GENMASK(15, 0), 0x1, 0x2, 0x3},
};
-struct qm_cqe {
- __le32 rsvd0;
- __le16 cmd_id;
- __le16 rsvd1;
- __le16 sq_head;
- __le16 sq_num;
- __le16 rsvd2;
- __le16 w7;
-};
-
-struct qm_eqe {
- __le32 dw0;
-};
-
-struct qm_aeqe {
- __le32 dw0;
-};
-
-struct qm_sqc {
- __le16 head;
- __le16 tail;
- __le32 base_l;
- __le32 base_h;
- __le32 dw3;
- __le16 w8;
- __le16 rsvd0;
- __le16 pasid;
- __le16 w11;
- __le16 cq_num;
- __le16 w13;
- __le32 rsvd1;
-};
-
-struct qm_cqc {
- __le16 head;
- __le16 tail;
- __le32 base_l;
- __le32 base_h;
- __le32 dw3;
- __le16 w8;
- __le16 rsvd0;
- __le16 pasid;
- __le16 w11;
- __le32 dw6;
- __le32 rsvd1;
-};
-
-struct qm_eqc {
- __le16 head;
- __le16 tail;
- __le32 base_l;
- __le32 base_h;
- __le32 dw3;
- __le32 rsvd[2];
- __le32 dw6;
-};
-
-struct qm_aeqc {
- __le16 head;
- __le16 tail;
- __le32 base_l;
- __le32 base_h;
- __le32 dw3;
- __le32 rsvd[2];
- __le32 dw6;
-};
-
struct qm_mailbox {
__le16 w0;
__le16 queue_num;
@@ -468,25 +378,6 @@ struct hisi_qm_hw_ops {
int (*set_msi)(struct hisi_qm *qm, bool set);
};
-struct qm_dfx_item {
- const char *name;
- u32 offset;
-};
-
-static struct qm_dfx_item qm_dfx_files[] = {
- {"err_irq", offsetof(struct qm_dfx, err_irq_cnt)},
- {"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)},
- {"abnormal_irq", offsetof(struct qm_dfx, abnormal_irq_cnt)},
- {"create_qp_err", offsetof(struct qm_dfx, create_qp_err_cnt)},
- {"mb_err", offsetof(struct qm_dfx, mb_err_cnt)},
-};
-
-static const char * const qm_debug_file_name[] = {
- [CURRENT_QM] = "current_qm",
- [CURRENT_Q] = "current_q",
- [CLEAR_ENABLE] = "clear_enable",
-};
-
struct hisi_qm_hw_error {
u32 int_msk;
const char *msg;
@@ -511,23 +402,6 @@ static const struct hisi_qm_hw_error qm_hw_error[] = {
{ /* sentinel */ }
};
-/* define the QM's dfx regs region and region length */
-static struct dfx_diff_registers qm_diff_regs[] = {
- {
- .reg_offset = QM_DFX_BASE,
- .reg_len = QM_DFX_BASE_LEN,
- }, {
- .reg_offset = QM_DFX_STATE1,
- .reg_len = QM_DFX_STATE1_LEN,
- }, {
- .reg_offset = QM_DFX_STATE2,
- .reg_len = QM_DFX_STATE2_LEN,
- }, {
- .reg_offset = QM_DFX_COMMON,
- .reg_len = QM_DFX_COMMON_LEN,
- },
-};
-
static const char * const qm_db_timeout[] = {
"sq", "cq", "eq", "aeq",
};
@@ -536,10 +410,6 @@ static const char * const qm_fifo_overflow[] = {
"cq", "eq", "aeq",
};
-static const char * const qm_s[] = {
- "init", "start", "close", "stop",
-};
-
static const char * const qp_s[] = {
"none", "init", "start", "stop", "close",
};
@@ -909,8 +779,8 @@ static void qm_get_xqc_depth(struct hisi_qm *qm, u16 *low_bits,
u32 depth;
depth = hisi_qm_get_hw_info(qm, qm_basic_info, type, qm->cap_ver);
- *high_bits = depth & QM_XQ_DEPTH_MASK;
- *low_bits = (depth >> QM_XQ_DEPTH_SHIFT) & QM_XQ_DEPTH_MASK;
+ *low_bits = depth & QM_XQ_DEPTH_MASK;
+ *high_bits = (depth >> QM_XQ_DEPTH_SHIFT) & QM_XQ_DEPTH_MASK;
}
static u32 qm_get_irq_num(struct hisi_qm *qm)
@@ -1440,452 +1310,7 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
return 0;
}
-static int qm_get_vf_qp_num(struct hisi_qm *qm, u32 fun_num)
-{
- u32 remain_q_num, vfq_num;
- u32 num_vfs = qm->vfs_num;
-
- vfq_num = (qm->ctrl_qp_num - qm->qp_num) / num_vfs;
- if (vfq_num >= qm->max_qp_num)
- return qm->max_qp_num;
-
- remain_q_num = (qm->ctrl_qp_num - qm->qp_num) % num_vfs;
- if (vfq_num + remain_q_num <= qm->max_qp_num)
- return fun_num == num_vfs ? vfq_num + remain_q_num : vfq_num;
-
- /*
- * if vfq_num + remain_q_num > max_qp_num, the last VFs,
- * each with one more queue.
- */
- return fun_num + remain_q_num > num_vfs ? vfq_num + 1 : vfq_num;
-}
-
-static struct hisi_qm *file_to_qm(struct debugfs_file *file)
-{
- struct qm_debug *debug = file->debug;
-
- return container_of(debug, struct hisi_qm, debug);
-}
-
-static u32 current_q_read(struct hisi_qm *qm)
-{
- return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT;
-}
-
-static int current_q_write(struct hisi_qm *qm, u32 val)
-{
- u32 tmp;
-
- if (val >= qm->debug.curr_qm_qp_num)
- return -EINVAL;
-
- tmp = val << QM_DFX_QN_SHIFT |
- (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_FUN_MASK);
- writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
-
- tmp = val << QM_DFX_QN_SHIFT |
- (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_FUN_MASK);
- writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
-
- return 0;
-}
-
-static u32 clear_enable_read(struct hisi_qm *qm)
-{
- return readl(qm->io_base + QM_DFX_CNT_CLR_CE);
-}
-
-/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */
-static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl)
-{
- if (rd_clr_ctrl > 1)
- return -EINVAL;
-
- writel(rd_clr_ctrl, qm->io_base + QM_DFX_CNT_CLR_CE);
-
- return 0;
-}
-
-static u32 current_qm_read(struct hisi_qm *qm)
-{
- return readl(qm->io_base + QM_DFX_MB_CNT_VF);
-}
-
-static int current_qm_write(struct hisi_qm *qm, u32 val)
-{
- u32 tmp;
-
- if (val > qm->vfs_num)
- return -EINVAL;
-
- /* According PF or VF Dev ID to calculation curr_qm_qp_num and store */
- if (!val)
- qm->debug.curr_qm_qp_num = qm->qp_num;
- else
- qm->debug.curr_qm_qp_num = qm_get_vf_qp_num(qm, val);
-
- writel(val, qm->io_base + QM_DFX_MB_CNT_VF);
- writel(val, qm->io_base + QM_DFX_DB_CNT_VF);
-
- tmp = val |
- (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK);
- writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
-
- tmp = val |
- (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK);
- writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
-
- return 0;
-}
-
-static ssize_t qm_debug_read(struct file *filp, char __user *buf,
- size_t count, loff_t *pos)
-{
- struct debugfs_file *file = filp->private_data;
- enum qm_debug_file index = file->index;
- struct hisi_qm *qm = file_to_qm(file);
- char tbuf[QM_DBG_TMP_BUF_LEN];
- u32 val;
- int ret;
-
- ret = hisi_qm_get_dfx_access(qm);
- if (ret)
- return ret;
-
- mutex_lock(&file->lock);
- switch (index) {
- case CURRENT_QM:
- val = current_qm_read(qm);
- break;
- case CURRENT_Q:
- val = current_q_read(qm);
- break;
- case CLEAR_ENABLE:
- val = clear_enable_read(qm);
- break;
- default:
- goto err_input;
- }
- mutex_unlock(&file->lock);
-
- hisi_qm_put_dfx_access(qm);
- ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val);
- return simple_read_from_buffer(buf, count, pos, tbuf, ret);
-
-err_input:
- mutex_unlock(&file->lock);
- hisi_qm_put_dfx_access(qm);
- return -EINVAL;
-}
-
-static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
- size_t count, loff_t *pos)
-{
- struct debugfs_file *file = filp->private_data;
- enum qm_debug_file index = file->index;
- struct hisi_qm *qm = file_to_qm(file);
- unsigned long val;
- char tbuf[QM_DBG_TMP_BUF_LEN];
- int len, ret;
-
- if (*pos != 0)
- return 0;
-
- if (count >= QM_DBG_TMP_BUF_LEN)
- return -ENOSPC;
-
- len = simple_write_to_buffer(tbuf, QM_DBG_TMP_BUF_LEN - 1, pos, buf,
- count);
- if (len < 0)
- return len;
-
- tbuf[len] = '\0';
- if (kstrtoul(tbuf, 0, &val))
- return -EFAULT;
-
- ret = hisi_qm_get_dfx_access(qm);
- if (ret)
- return ret;
-
- mutex_lock(&file->lock);
- switch (index) {
- case CURRENT_QM:
- ret = current_qm_write(qm, val);
- break;
- case CURRENT_Q:
- ret = current_q_write(qm, val);
- break;
- case CLEAR_ENABLE:
- ret = clear_enable_write(qm, val);
- break;
- default:
- ret = -EINVAL;
- }
- mutex_unlock(&file->lock);
-
- hisi_qm_put_dfx_access(qm);
-
- if (ret)
- return ret;
-
- return count;
-}
-
-static const struct file_operations qm_debug_fops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .read = qm_debug_read,
- .write = qm_debug_write,
-};
-
-#define CNT_CYC_REGS_NUM 10
-static const struct debugfs_reg32 qm_dfx_regs[] = {
- /* XXX_CNT are reading clear register */
- {"QM_ECC_1BIT_CNT ", 0x104000ull},
- {"QM_ECC_MBIT_CNT ", 0x104008ull},
- {"QM_DFX_MB_CNT ", 0x104018ull},
- {"QM_DFX_DB_CNT ", 0x104028ull},
- {"QM_DFX_SQE_CNT ", 0x104038ull},
- {"QM_DFX_CQE_CNT ", 0x104048ull},
- {"QM_DFX_SEND_SQE_TO_ACC_CNT ", 0x104050ull},
- {"QM_DFX_WB_SQE_FROM_ACC_CNT ", 0x104058ull},
- {"QM_DFX_ACC_FINISH_CNT ", 0x104060ull},
- {"QM_DFX_CQE_ERR_CNT ", 0x1040b4ull},
- {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull},
- {"QM_ECC_1BIT_INF ", 0x104004ull},
- {"QM_ECC_MBIT_INF ", 0x10400cull},
- {"QM_DFX_ACC_RDY_VLD0 ", 0x1040a0ull},
- {"QM_DFX_ACC_RDY_VLD1 ", 0x1040a4ull},
- {"QM_DFX_AXI_RDY_VLD ", 0x1040a8ull},
- {"QM_DFX_FF_ST0 ", 0x1040c8ull},
- {"QM_DFX_FF_ST1 ", 0x1040ccull},
- {"QM_DFX_FF_ST2 ", 0x1040d0ull},
- {"QM_DFX_FF_ST3 ", 0x1040d4ull},
- {"QM_DFX_FF_ST4 ", 0x1040d8ull},
- {"QM_DFX_FF_ST5 ", 0x1040dcull},
- {"QM_DFX_FF_ST6 ", 0x1040e0ull},
- {"QM_IN_IDLE_ST ", 0x1040e4ull},
-};
-
-static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
- {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull},
-};
-
-/**
- * hisi_qm_regs_dump() - Dump registers's value.
- * @s: debugfs file handle.
- * @regset: accelerator registers information.
- *
- * Dump accelerator registers.
- */
-void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset)
-{
- struct pci_dev *pdev = to_pci_dev(regset->dev);
- struct hisi_qm *qm = pci_get_drvdata(pdev);
- const struct debugfs_reg32 *regs = regset->regs;
- int regs_len = regset->nregs;
- int i, ret;
- u32 val;
-
- ret = hisi_qm_get_dfx_access(qm);
- if (ret)
- return;
-
- for (i = 0; i < regs_len; i++) {
- val = readl(regset->base + regs[i].offset);
- seq_printf(s, "%s= 0x%08x\n", regs[i].name, val);
- }
-
- hisi_qm_put_dfx_access(qm);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_regs_dump);
-
-static int qm_regs_show(struct seq_file *s, void *unused)
-{
- struct hisi_qm *qm = s->private;
- struct debugfs_regset32 regset;
-
- if (qm->fun_type == QM_HW_PF) {
- regset.regs = qm_dfx_regs;
- regset.nregs = ARRAY_SIZE(qm_dfx_regs);
- } else {
- regset.regs = qm_vf_dfx_regs;
- regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs);
- }
-
- regset.base = qm->io_base;
- regset.dev = &qm->pdev->dev;
-
- hisi_qm_regs_dump(s, &regset);
-
- return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(qm_regs);
-
-static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm,
- const struct dfx_diff_registers *cregs, int reg_len)
-{
- struct dfx_diff_registers *diff_regs;
- u32 j, base_offset;
- int i;
-
- diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL);
- if (!diff_regs)
- return ERR_PTR(-ENOMEM);
-
- for (i = 0; i < reg_len; i++) {
- if (!cregs[i].reg_len)
- continue;
-
- diff_regs[i].reg_offset = cregs[i].reg_offset;
- diff_regs[i].reg_len = cregs[i].reg_len;
- diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len,
- GFP_KERNEL);
- if (!diff_regs[i].regs)
- goto alloc_error;
-
- for (j = 0; j < diff_regs[i].reg_len; j++) {
- base_offset = diff_regs[i].reg_offset +
- j * QM_DFX_REGS_LEN;
- diff_regs[i].regs[j] = readl(qm->io_base + base_offset);
- }
- }
-
- return diff_regs;
-
-alloc_error:
- while (i > 0) {
- i--;
- kfree(diff_regs[i].regs);
- }
- kfree(diff_regs);
- return ERR_PTR(-ENOMEM);
-}
-
-static void dfx_regs_uninit(struct hisi_qm *qm,
- struct dfx_diff_registers *dregs, int reg_len)
-{
- int i;
-
- /* Setting the pointer is NULL to prevent double free */
- for (i = 0; i < reg_len; i++) {
- kfree(dregs[i].regs);
- dregs[i].regs = NULL;
- }
- kfree(dregs);
- dregs = NULL;
-}
-
-/**
- * hisi_qm_diff_regs_init() - Allocate memory for registers.
- * @qm: device qm handle.
- * @dregs: diff registers handle.
- * @reg_len: diff registers region length.
- */
-int hisi_qm_diff_regs_init(struct hisi_qm *qm,
- struct dfx_diff_registers *dregs, int reg_len)
-{
- if (!qm || !dregs || reg_len <= 0)
- return -EINVAL;
-
- if (qm->fun_type != QM_HW_PF)
- return 0;
-
- qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs,
- ARRAY_SIZE(qm_diff_regs));
- if (IS_ERR(qm->debug.qm_diff_regs))
- return PTR_ERR(qm->debug.qm_diff_regs);
-
- qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
- if (IS_ERR(qm->debug.acc_diff_regs)) {
- dfx_regs_uninit(qm, qm->debug.qm_diff_regs,
- ARRAY_SIZE(qm_diff_regs));
- return PTR_ERR(qm->debug.acc_diff_regs);
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init);
-
-/**
- * hisi_qm_diff_regs_uninit() - Free memory for registers.
- * @qm: device qm handle.
- * @reg_len: diff registers region length.
- */
-void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len)
-{
- if (!qm || reg_len <= 0 || qm->fun_type != QM_HW_PF)
- return;
-
- dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
- dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
-}
-EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit);
-
-/**
- * hisi_qm_acc_diff_regs_dump() - Dump registers's value.
- * @qm: device qm handle.
- * @s: Debugfs file handle.
- * @dregs: diff registers handle.
- * @regs_len: diff registers region length.
- */
-void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
- struct dfx_diff_registers *dregs, int regs_len)
-{
- u32 j, val, base_offset;
- int i, ret;
-
- if (!qm || !s || !dregs || regs_len <= 0)
- return;
-
- ret = hisi_qm_get_dfx_access(qm);
- if (ret)
- return;
-
- down_read(&qm->qps_lock);
- for (i = 0; i < regs_len; i++) {
- if (!dregs[i].reg_len)
- continue;
-
- for (j = 0; j < dregs[i].reg_len; j++) {
- base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN;
- val = readl(qm->io_base + base_offset);
- if (val != dregs[i].regs[j])
- seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n",
- base_offset, dregs[i].regs[j], val);
- }
- }
- up_read(&qm->qps_lock);
-
- hisi_qm_put_dfx_access(qm);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump);
-
-static int qm_diff_regs_show(struct seq_file *s, void *unused)
-{
- struct hisi_qm *qm = s->private;
-
- hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs,
- ARRAY_SIZE(qm_diff_regs));
-
- return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
-
-static ssize_t qm_cmd_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *pos)
-{
- char buf[QM_DBG_READ_LEN];
- int len;
-
- len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n",
- "Please echo help to cmd to get help information");
-
- return simple_read_from_buffer(buffer, count, pos, buf, len);
-}
-
-static void *qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
+void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
dma_addr_t *dma_addr)
{
struct device *dev = &qm->pdev->dev;
@@ -1905,7 +1330,7 @@ static void *qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
return ctx_addr;
}
-static void qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
+void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
const void *ctx_addr, dma_addr_t *dma_addr)
{
struct device *dev = &qm->pdev->dev;
@@ -1914,21 +1339,6 @@ static void qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
kfree(ctx_addr);
}
-static void dump_show(struct hisi_qm *qm, void *info,
- unsigned int info_size, char *info_name)
-{
- struct device *dev = &qm->pdev->dev;
- u8 *info_curr = info;
- u32 i;
-#define BYTE_PER_DW 4
-
- dev_info(dev, "%s DUMP\n", info_name);
- for (i = 0; i < info_size; i += BYTE_PER_DW, info_curr += BYTE_PER_DW) {
- pr_info("DW%u: %02X%02X %02X%02X\n", i / BYTE_PER_DW,
- *(info_curr + 3), *(info_curr + 2), *(info_curr + 1), *(info_curr));
- }
-}
-
static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
{
return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1);
@@ -1939,387 +1349,6 @@ static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1);
}
-static int qm_sqc_dump(struct hisi_qm *qm, const char *s)
-{
- struct device *dev = &qm->pdev->dev;
- struct qm_sqc *sqc, *sqc_curr;
- dma_addr_t sqc_dma;
- u32 qp_id;
- int ret;
-
- if (!s)
- return -EINVAL;
-
- ret = kstrtou32(s, 0, &qp_id);
- if (ret || qp_id >= qm->qp_num) {
- dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1);
- return -EINVAL;
- }
-
- sqc = qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma);
- if (IS_ERR(sqc))
- return PTR_ERR(sqc);
-
- ret = qm_dump_sqc_raw(qm, sqc_dma, qp_id);
- if (ret) {
- down_read(&qm->qps_lock);
- if (qm->sqc) {
- sqc_curr = qm->sqc + qp_id;
-
- dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC");
- }
- up_read(&qm->qps_lock);
-
- goto free_ctx;
- }
-
- dump_show(qm, sqc, sizeof(*sqc), "SQC");
-
-free_ctx:
- qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma);
- return 0;
-}
-
-static int qm_cqc_dump(struct hisi_qm *qm, const char *s)
-{
- struct device *dev = &qm->pdev->dev;
- struct qm_cqc *cqc, *cqc_curr;
- dma_addr_t cqc_dma;
- u32 qp_id;
- int ret;
-
- if (!s)
- return -EINVAL;
-
- ret = kstrtou32(s, 0, &qp_id);
- if (ret || qp_id >= qm->qp_num) {
- dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1);
- return -EINVAL;
- }
-
- cqc = qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma);
- if (IS_ERR(cqc))
- return PTR_ERR(cqc);
-
- ret = qm_dump_cqc_raw(qm, cqc_dma, qp_id);
- if (ret) {
- down_read(&qm->qps_lock);
- if (qm->cqc) {
- cqc_curr = qm->cqc + qp_id;
-
- dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC");
- }
- up_read(&qm->qps_lock);
-
- goto free_ctx;
- }
-
- dump_show(qm, cqc, sizeof(*cqc), "CQC");
-
-free_ctx:
- qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma);
- return 0;
-}
-
-static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size,
- int cmd, char *name)
-{
- struct device *dev = &qm->pdev->dev;
- dma_addr_t xeqc_dma;
- void *xeqc;
- int ret;
-
- if (strsep(&s, " ")) {
- dev_err(dev, "Please do not input extra characters!\n");
- return -EINVAL;
- }
-
- xeqc = qm_ctx_alloc(qm, size, &xeqc_dma);
- if (IS_ERR(xeqc))
- return PTR_ERR(xeqc);
-
- ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1);
- if (ret)
- goto err_free_ctx;
-
- dump_show(qm, xeqc, size, name);
-
-err_free_ctx:
- qm_ctx_free(qm, size, xeqc, &xeqc_dma);
- return ret;
-}
-
-static int q_dump_param_parse(struct hisi_qm *qm, char *s,
- u32 *e_id, u32 *q_id, u16 q_depth)
-{
- struct device *dev = &qm->pdev->dev;
- unsigned int qp_num = qm->qp_num;
- char *presult;
- int ret;
-
- presult = strsep(&s, " ");
- if (!presult) {
- dev_err(dev, "Please input qp number!\n");
- return -EINVAL;
- }
-
- ret = kstrtou32(presult, 0, q_id);
- if (ret || *q_id >= qp_num) {
- dev_err(dev, "Please input qp num (0-%u)", qp_num - 1);
- return -EINVAL;
- }
-
- presult = strsep(&s, " ");
- if (!presult) {
- dev_err(dev, "Please input sqe number!\n");
- return -EINVAL;
- }
-
- ret = kstrtou32(presult, 0, e_id);
- if (ret || *e_id >= q_depth) {
- dev_err(dev, "Please input sqe num (0-%u)", q_depth - 1);
- return -EINVAL;
- }
-
- if (strsep(&s, " ")) {
- dev_err(dev, "Please do not input extra characters!\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int qm_sq_dump(struct hisi_qm *qm, char *s)
-{
- u16 sq_depth = qm->qp_array->cq_depth;
- void *sqe, *sqe_curr;
- struct hisi_qp *qp;
- u32 qp_id, sqe_id;
- int ret;
-
- ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth);
- if (ret)
- return ret;
-
- sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL);
- if (!sqe)
- return -ENOMEM;
-
- qp = &qm->qp_array[qp_id];
- memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth);
- sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size);
- memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK,
- qm->debug.sqe_mask_len);
-
- dump_show(qm, sqe_curr, qm->sqe_size, "SQE");
-
- kfree(sqe);
-
- return 0;
-}
-
-static int qm_cq_dump(struct hisi_qm *qm, char *s)
-{
- struct qm_cqe *cqe_curr;
- struct hisi_qp *qp;
- u32 qp_id, cqe_id;
- int ret;
-
- ret = q_dump_param_parse(qm, s, &cqe_id, &qp_id, qm->qp_array->cq_depth);
- if (ret)
- return ret;
-
- qp = &qm->qp_array[qp_id];
- cqe_curr = qp->cqe + cqe_id;
- dump_show(qm, cqe_curr, sizeof(struct qm_cqe), "CQE");
-
- return 0;
-}
-
-static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s,
- size_t size, char *name)
-{
- struct device *dev = &qm->pdev->dev;
- void *xeqe;
- u32 xeqe_id;
- int ret;
-
- if (!s)
- return -EINVAL;
-
- ret = kstrtou32(s, 0, &xeqe_id);
- if (ret)
- return -EINVAL;
-
- if (!strcmp(name, "EQE") && xeqe_id >= qm->eq_depth) {
- dev_err(dev, "Please input eqe num (0-%u)", qm->eq_depth - 1);
- return -EINVAL;
- } else if (!strcmp(name, "AEQE") && xeqe_id >= qm->aeq_depth) {
- dev_err(dev, "Please input aeqe num (0-%u)", qm->eq_depth - 1);
- return -EINVAL;
- }
-
- down_read(&qm->qps_lock);
-
- if (qm->eqe && !strcmp(name, "EQE")) {
- xeqe = qm->eqe + xeqe_id;
- } else if (qm->aeqe && !strcmp(name, "AEQE")) {
- xeqe = qm->aeqe + xeqe_id;
- } else {
- ret = -EINVAL;
- goto err_unlock;
- }
-
- dump_show(qm, xeqe, size, name);
-
-err_unlock:
- up_read(&qm->qps_lock);
- return ret;
-}
-
-static int qm_dbg_help(struct hisi_qm *qm, char *s)
-{
- struct device *dev = &qm->pdev->dev;
-
- if (strsep(&s, " ")) {
- dev_err(dev, "Please do not input extra characters!\n");
- return -EINVAL;
- }
-
- dev_info(dev, "available commands:\n");
- dev_info(dev, "sqc <num>\n");
- dev_info(dev, "cqc <num>\n");
- dev_info(dev, "eqc\n");
- dev_info(dev, "aeqc\n");
- dev_info(dev, "sq <num> <e>\n");
- dev_info(dev, "cq <num> <e>\n");
- dev_info(dev, "eq <e>\n");
- dev_info(dev, "aeq <e>\n");
-
- return 0;
-}
-
-static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf)
-{
- struct device *dev = &qm->pdev->dev;
- char *presult, *s, *s_tmp;
- int ret;
-
- s = kstrdup(cmd_buf, GFP_KERNEL);
- if (!s)
- return -ENOMEM;
-
- s_tmp = s;
- presult = strsep(&s, " ");
- if (!presult) {
- ret = -EINVAL;
- goto err_buffer_free;
- }
-
- if (!strcmp(presult, "sqc"))
- ret = qm_sqc_dump(qm, s);
- else if (!strcmp(presult, "cqc"))
- ret = qm_cqc_dump(qm, s);
- else if (!strcmp(presult, "eqc"))
- ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_eqc),
- QM_MB_CMD_EQC, "EQC");
- else if (!strcmp(presult, "aeqc"))
- ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_aeqc),
- QM_MB_CMD_AEQC, "AEQC");
- else if (!strcmp(presult, "sq"))
- ret = qm_sq_dump(qm, s);
- else if (!strcmp(presult, "cq"))
- ret = qm_cq_dump(qm, s);
- else if (!strcmp(presult, "eq"))
- ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_eqe), "EQE");
- else if (!strcmp(presult, "aeq"))
- ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_aeqe), "AEQE");
- else if (!strcmp(presult, "help"))
- ret = qm_dbg_help(qm, s);
- else
- ret = -EINVAL;
-
- if (ret)
- dev_info(dev, "Please echo help\n");
-
-err_buffer_free:
- kfree(s_tmp);
-
- return ret;
-}
-
-static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
- size_t count, loff_t *pos)
-{
- struct hisi_qm *qm = filp->private_data;
- char *cmd_buf, *cmd_buf_tmp;
- int ret;
-
- if (*pos)
- return 0;
-
- ret = hisi_qm_get_dfx_access(qm);
- if (ret)
- return ret;
-
- /* Judge if the instance is being reset. */
- if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) {
- ret = 0;
- goto put_dfx_access;
- }
-
- if (count > QM_DBG_WRITE_LEN) {
- ret = -ENOSPC;
- goto put_dfx_access;
- }
-
- cmd_buf = memdup_user_nul(buffer, count);
- if (IS_ERR(cmd_buf)) {
- ret = PTR_ERR(cmd_buf);
- goto put_dfx_access;
- }
-
- cmd_buf_tmp = strchr(cmd_buf, '\n');
- if (cmd_buf_tmp) {
- *cmd_buf_tmp = '\0';
- count = cmd_buf_tmp - cmd_buf + 1;
- }
-
- ret = qm_cmd_write_dump(qm, cmd_buf);
- if (ret) {
- kfree(cmd_buf);
- goto put_dfx_access;
- }
-
- kfree(cmd_buf);
-
- ret = count;
-
-put_dfx_access:
- hisi_qm_put_dfx_access(qm);
- return ret;
-}
-
-static const struct file_operations qm_cmd_fops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .read = qm_cmd_read,
- .write = qm_cmd_write,
-};
-
-static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
- enum qm_debug_file index)
-{
- struct debugfs_file *file = qm->debug.files + index;
-
- debugfs_create_file(qm_debug_file_name[index], 0600, dir, file,
- &qm_debug_fops);
-
- file->index = index;
- mutex_init(&file->lock);
- file->debug = &qm->debug;
-}
-
static void qm_hw_error_init_v1(struct hisi_qm *qm)
{
writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
@@ -3101,7 +2130,7 @@ static int qm_drain_qp(struct hisi_qp *qp)
return ret;
}
- addr = qm_ctx_alloc(qm, size, &dma_addr);
+ addr = hisi_qm_ctx_alloc(qm, size, &dma_addr);
if (IS_ERR(addr)) {
dev_err(dev, "Failed to alloc ctx for sqc and cqc!\n");
return -ENOMEM;
@@ -3136,7 +2165,7 @@ static int qm_drain_qp(struct hisi_qp *qp)
usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
}
- qm_ctx_free(qm, size, addr, &dma_addr);
+ hisi_qm_ctx_free(qm, size, addr, &dma_addr);
return ret;
}
@@ -3721,17 +2750,6 @@ static void hisi_qm_set_state(struct hisi_qm *qm, u8 state)
writel(state, qm->io_base + QM_VF_STATE);
}
-static void qm_last_regs_uninit(struct hisi_qm *qm)
-{
- struct qm_debug *debug = &qm->debug;
-
- if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
- return;
-
- kfree(debug->qm_last_words);
- debug->qm_last_words = NULL;
-}
-
static void hisi_qm_unint_work(struct hisi_qm *qm)
{
destroy_workqueue(qm->wq);
@@ -3762,8 +2780,6 @@ static void hisi_qm_memory_uninit(struct hisi_qm *qm)
*/
void hisi_qm_uninit(struct hisi_qm *qm)
{
- qm_last_regs_uninit(qm);
-
qm_cmd_uninit(qm);
hisi_qm_unint_work(qm);
down_write(&qm->qps_lock);
@@ -4132,45 +3148,6 @@ err_unlock:
}
EXPORT_SYMBOL_GPL(hisi_qm_stop);
-static ssize_t qm_status_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *pos)
-{
- struct hisi_qm *qm = filp->private_data;
- char buf[QM_DBG_READ_LEN];
- int val, len;
-
- val = atomic_read(&qm->status.flags);
- len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]);
-
- return simple_read_from_buffer(buffer, count, pos, buf, len);
-}
-
-static const struct file_operations qm_status_fops = {
- .owner = THIS_MODULE,
- .open = simple_open,
- .read = qm_status_read,
-};
-
-static int qm_debugfs_atomic64_set(void *data, u64 val)
-{
- if (val)
- return -EINVAL;
-
- atomic64_set((atomic64_t *)data, 0);
-
- return 0;
-}
-
-static int qm_debugfs_atomic64_get(void *data, u64 *val)
-{
- *val = atomic64_read((atomic64_t *)data);
-
- return 0;
-}
-
-DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
- qm_debugfs_atomic64_set, "%llu\n");
-
static void qm_hw_error_init(struct hisi_qm *qm)
{
if (!qm->ops->hw_error_init) {
@@ -4277,16 +3254,14 @@ static int hisi_qm_sort_devices(int node, struct list_head *head,
struct hisi_qm *qm;
struct list_head *n;
struct device *dev;
- int dev_node = 0;
+ int dev_node;
list_for_each_entry(qm, &qm_list->list, list) {
dev = &qm->pdev->dev;
- if (IS_ENABLED(CONFIG_NUMA)) {
- dev_node = dev_to_node(dev);
- if (dev_node < 0)
- dev_node = 0;
- }
+ dev_node = dev_to_node(dev);
+ if (dev_node < 0)
+ dev_node = 0;
res = kzalloc(sizeof(*res), GFP_KERNEL);
if (!res)
@@ -4592,49 +3567,36 @@ err_put_dfx_access:
return ret;
}
-static ssize_t qm_qos_value_init(const char *buf, unsigned long *val)
-{
- int buflen = strlen(buf);
- int ret, i;
-
- for (i = 0; i < buflen; i++) {
- if (!isdigit(buf[i]))
- return -EINVAL;
- }
-
- ret = sscanf(buf, "%lu", val);
- if (ret != QM_QOS_VAL_NUM)
- return -EINVAL;
-
- return 0;
-}
-
static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf,
unsigned long *val,
unsigned int *fun_index)
{
+ struct bus_type *bus_type = qm->pdev->dev.bus;
char tbuf_bdf[QM_DBG_READ_LEN] = {0};
- char val_buf[QM_QOS_VAL_MAX_LEN] = {0};
- u32 tmp1, device, function;
- int ret, bus;
+ char val_buf[QM_DBG_READ_LEN] = {0};
+ struct pci_dev *pdev;
+ struct device *dev;
+ int ret;
ret = sscanf(buf, "%s %s", tbuf_bdf, val_buf);
if (ret != QM_QOS_PARAM_NUM)
return -EINVAL;
- ret = qm_qos_value_init(val_buf, val);
+ ret = kstrtoul(val_buf, 10, val);
if (ret || *val == 0 || *val > QM_QOS_MAX_VAL) {
pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n");
return -EINVAL;
}
- ret = sscanf(tbuf_bdf, "%u:%x:%u.%u", &tmp1, &bus, &device, &function);
- if (ret != QM_QOS_BDF_PARAM_NUM) {
- pci_err(qm->pdev, "input pci bdf value is error!\n");
- return -EINVAL;
+ dev = bus_find_device_by_name(bus_type, NULL, tbuf_bdf);
+ if (!dev) {
+ pci_err(qm->pdev, "input pci bdf number is error!\n");
+ return -ENODEV;
}
- *fun_index = PCI_DEVFN(device, function);
+ pdev = container_of(dev, struct pci_dev, dev);
+
+ *fun_index = pdev->devfn;
return 0;
}
@@ -4648,9 +3610,6 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
unsigned long val;
int len, ret;
- if (qm->fun_type == QM_HW_VF)
- return -EINVAL;
-
if (*pos != 0)
return 0;
@@ -4709,7 +3668,7 @@ static const struct file_operations qm_algqos_fops = {
*
* Create function qos debugfs files, VF ping PF to get function qos.
*/
-static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
+void hisi_qm_set_algqos_init(struct hisi_qm *qm)
{
if (qm->fun_type == QM_HW_PF)
debugfs_create_file("alg_qos", 0644, qm->debug.debug_root,
@@ -4719,88 +3678,6 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
qm, &qm_algqos_fops);
}
-/**
- * hisi_qm_debug_init() - Initialize qm related debugfs files.
- * @qm: The qm for which we want to add debugfs files.
- *
- * Create qm related debugfs files.
- */
-void hisi_qm_debug_init(struct hisi_qm *qm)
-{
- struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
- struct qm_dfx *dfx = &qm->debug.dfx;
- struct dentry *qm_d;
- void *data;
- int i;
-
- qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
- qm->debug.qm_d = qm_d;
-
- /* only show this in PF */
- if (qm->fun_type == QM_HW_PF) {
- qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
- for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
- qm_create_debugfs_file(qm, qm->debug.qm_d, i);
- }
-
- if (qm_regs)
- debugfs_create_file("diff_regs", 0444, qm->debug.qm_d,
- qm, &qm_diff_regs_fops);
-
- debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
-
- debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
-
- debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
- &qm_status_fops);
- for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
- data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
- debugfs_create_file(qm_dfx_files[i].name,
- 0644,
- qm_d,
- data,
- &qm_atomic64_ops);
- }
-
- if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps))
- hisi_qm_set_algqos_init(qm);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
-
-/**
- * hisi_qm_debug_regs_clear() - clear qm debug related registers.
- * @qm: The qm for which we want to clear its debug registers.
- */
-void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
-{
- const struct debugfs_reg32 *regs;
- int i;
-
- /* clear current_qm */
- writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
- writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
-
- /* clear current_q */
- writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
- writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
-
- /*
- * these registers are reading and clearing, so clear them after
- * reading them.
- */
- writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE);
-
- regs = qm_dfx_regs;
- for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
- readl(qm->io_base + regs->offset);
- regs++;
- }
-
- /* clear clear_enable */
- writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
-
static void hisi_qm_init_vf_qos(struct hisi_qm *qm, int total_func)
{
int i;
@@ -5439,24 +4316,6 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
return 0;
}
-static void qm_show_last_dfx_regs(struct hisi_qm *qm)
-{
- struct qm_debug *debug = &qm->debug;
- struct pci_dev *pdev = qm->pdev;
- u32 val;
- int i;
-
- if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
- return;
-
- for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) {
- val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset);
- if (debug->qm_last_words[i] != val)
- pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
- qm_dfx_regs[i].name, debug->qm_last_words[i], val);
- }
-}
-
static int qm_controller_reset(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
@@ -5472,7 +4331,7 @@ static int qm_controller_reset(struct hisi_qm *qm)
return ret;
}
- qm_show_last_dfx_regs(qm);
+ hisi_qm_show_last_dfx_regs(qm);
if (qm->err_ini->show_last_dfx_regs)
qm->err_ini->show_last_dfx_regs(qm);
@@ -5725,6 +4584,7 @@ static void qm_pf_reset_vf_done(struct hisi_qm *qm)
cmd = QM_VF_START_FAIL;
}
+ qm_cmd_init(qm);
ret = qm_ping_pf(qm, cmd);
if (ret)
dev_warn(&pdev->dev, "PF responds timeout in reset done!\n");
@@ -5786,7 +4646,6 @@ static void qm_pf_reset_vf_process(struct hisi_qm *qm,
goto err_get_status;
qm_pf_reset_vf_done(qm);
- qm_cmd_init(qm);
dev_info(dev, "device reset done.\n");
@@ -6359,26 +5218,6 @@ err_destroy_idr:
return ret;
}
-static void qm_last_regs_init(struct hisi_qm *qm)
-{
- int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs);
- struct qm_debug *debug = &qm->debug;
- int i;
-
- if (qm->fun_type == QM_HW_VF)
- return;
-
- debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int),
- GFP_KERNEL);
- if (!debug->qm_last_words)
- return;
-
- for (i = 0; i < dfx_regs_num; i++) {
- debug->qm_last_words[i] = readl_relaxed(qm->io_base +
- qm_dfx_regs[i].offset);
- }
-}
-
/**
* hisi_qm_init() - Initialize configures about qm.
* @qm: The qm needing init.
@@ -6427,8 +5266,6 @@ int hisi_qm_init(struct hisi_qm *qm)
qm_cmd_init(qm);
atomic_set(&qm->status.flags, QM_INIT);
- qm_last_regs_init(qm);
-
return 0;
err_free_qm_memory:
diff --git a/drivers/crypto/hisilicon/qm_common.h b/drivers/crypto/hisilicon/qm_common.h
new file mode 100644
index 000000000000..1406a422d455
--- /dev/null
+++ b/drivers/crypto/hisilicon/qm_common.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 HiSilicon Limited. */
+#ifndef QM_COMMON_H
+#define QM_COMMON_H
+
+#define QM_DBG_READ_LEN 256
+#define QM_RESETTING 2
+
+struct qm_cqe {
+ __le32 rsvd0;
+ __le16 cmd_id;
+ __le16 rsvd1;
+ __le16 sq_head;
+ __le16 sq_num;
+ __le16 rsvd2;
+ __le16 w7;
+};
+
+struct qm_eqe {
+ __le32 dw0;
+};
+
+struct qm_aeqe {
+ __le32 dw0;
+};
+
+struct qm_sqc {
+ __le16 head;
+ __le16 tail;
+ __le32 base_l;
+ __le32 base_h;
+ __le32 dw3;
+ __le16 w8;
+ __le16 rsvd0;
+ __le16 pasid;
+ __le16 w11;
+ __le16 cq_num;
+ __le16 w13;
+ __le32 rsvd1;
+};
+
+struct qm_cqc {
+ __le16 head;
+ __le16 tail;
+ __le32 base_l;
+ __le32 base_h;
+ __le32 dw3;
+ __le16 w8;
+ __le16 rsvd0;
+ __le16 pasid;
+ __le16 w11;
+ __le32 dw6;
+ __le32 rsvd1;
+};
+
+struct qm_eqc {
+ __le16 head;
+ __le16 tail;
+ __le32 base_l;
+ __le32 base_h;
+ __le32 dw3;
+ __le32 rsvd[2];
+ __le32 dw6;
+};
+
+struct qm_aeqc {
+ __le16 head;
+ __le16 tail;
+ __le32 base_l;
+ __le32 base_h;
+ __le32 dw3;
+ __le32 rsvd[2];
+ __le32 dw6;
+};
+
+static const char * const qm_s[] = {
+ "init", "start", "close", "stop",
+};
+
+void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
+ dma_addr_t *dma_addr);
+void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
+ const void *ctx_addr, dma_addr_t *dma_addr);
+void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm);
+void hisi_qm_set_algqos_init(struct hisi_qm *qm);
+
+#endif
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 84ae8ddd1a13..f5bfc9755a4a 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -283,7 +283,6 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
spin_lock_bh(&qp_ctx->req_lock);
ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe);
-
if (ctx->fake_req_limit <=
atomic_read(&qp_ctx->qp->qp_status.used) && !ret) {
list_add_tail(&req->backlog_head, &qp_ctx->backlog);
@@ -2009,7 +2008,7 @@ static int sec_aead_sha512_ctx_init(struct crypto_aead *tfm)
return sec_aead_ctx_init(tfm, "sha512");
}
-static int sec_skcipher_cryptlen_ckeck(struct sec_ctx *ctx,
+static int sec_skcipher_cryptlen_check(struct sec_ctx *ctx,
struct sec_req *sreq)
{
u32 cryptlen = sreq->c_req.sk_req->cryptlen;
@@ -2071,7 +2070,7 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
}
return 0;
} else if (c_alg == SEC_CALG_AES || c_alg == SEC_CALG_SM4) {
- return sec_skcipher_cryptlen_ckeck(ctx, sreq);
+ return sec_skcipher_cryptlen_check(ctx, sreq);
}
dev_err(dev, "skcipher algorithm error!\n");
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 3705412bac5f..93572c0d4faa 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -55,7 +55,7 @@
#define SEC_CONTROL_REG 0x301200
#define SEC_DYNAMIC_GATE_REG 0x30121c
#define SEC_CORE_AUTO_GATE 0x30212c
-#define SEC_DYNAMIC_GATE_EN 0x7bff
+#define SEC_DYNAMIC_GATE_EN 0x7fff
#define SEC_CORE_AUTO_GATE_EN GENMASK(3, 0)
#define SEC_CLK_GATE_ENABLE BIT(3)
#define SEC_CLK_GATE_DISABLE (~BIT(3))
@@ -427,7 +427,6 @@ static void sec_set_endian(struct hisi_qm *qm)
if (!IS_ENABLED(CONFIG_64BIT))
reg |= BIT(1);
-
if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
reg |= BIT(0);
@@ -899,8 +898,7 @@ static int sec_debugfs_init(struct hisi_qm *qm)
qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET;
qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN;
- ret = hisi_qm_diff_regs_init(qm, sec_diff_regs,
- ARRAY_SIZE(sec_diff_regs));
+ ret = hisi_qm_regs_debugfs_init(qm, sec_diff_regs, ARRAY_SIZE(sec_diff_regs));
if (ret) {
dev_warn(dev, "Failed to init SEC diff regs!\n");
goto debugfs_remove;
@@ -915,7 +913,7 @@ static int sec_debugfs_init(struct hisi_qm *qm)
return 0;
failed_to_create:
- hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
debugfs_remove:
debugfs_remove_recursive(sec_debugfs_root);
return ret;
@@ -923,7 +921,7 @@ debugfs_remove:
static void sec_debugfs_exit(struct hisi_qm *qm)
{
- hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
debugfs_remove_recursive(qm->debug.debug_root);
}
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index c863435e8c75..1549bec3aea5 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -849,8 +849,7 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET;
qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN;
qm->debug.debug_root = dev_d;
- ret = hisi_qm_diff_regs_init(qm, hzip_diff_regs,
- ARRAY_SIZE(hzip_diff_regs));
+ ret = hisi_qm_regs_debugfs_init(qm, hzip_diff_regs, ARRAY_SIZE(hzip_diff_regs));
if (ret) {
dev_warn(dev, "Failed to init ZIP diff regs!\n");
goto debugfs_remove;
@@ -869,7 +868,7 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
return 0;
failed_to_create:
- hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
debugfs_remove:
debugfs_remove_recursive(hzip_debugfs_root);
return ret;
@@ -895,7 +894,7 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm)
static void hisi_zip_debugfs_exit(struct hisi_qm *qm)
{
- hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
debugfs_remove_recursive(qm->debug.debug_root);
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index ad0d8c4a71ac..ae6110376e21 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -316,14 +316,20 @@ static void eip197_init_firmware(struct safexcel_crypto_priv *priv)
static int eip197_write_firmware(struct safexcel_crypto_priv *priv,
const struct firmware *fw)
{
- const __be32 *data = (const __be32 *)fw->data;
+ u32 val;
int i;
/* Write the firmware */
- for (i = 0; i < fw->size / sizeof(u32); i++)
- writel(be32_to_cpu(data[i]),
+ for (i = 0; i < fw->size / sizeof(u32); i++) {
+ if (priv->data->fw_little_endian)
+ val = le32_to_cpu(((const __le32 *)fw->data)[i]);
+ else
+ val = be32_to_cpu(((const __be32 *)fw->data)[i]);
+
+ writel(val,
priv->base + EIP197_CLASSIFICATION_RAMS +
- i * sizeof(__be32));
+ i * sizeof(val));
+ }
/* Exclude final 2 NOPs from size */
return i - EIP197_FW_TERMINAL_NOPS;
@@ -410,11 +416,13 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
int i, j, ret = 0, pe;
int ipuesz, ifppsz, minifw = 0;
- if (priv->version == EIP197D_MRVL)
+ if (priv->data->version == EIP197D_MRVL)
dir = "eip197d";
- else if (priv->version == EIP197B_MRVL ||
- priv->version == EIP197_DEVBRD)
+ else if (priv->data->version == EIP197B_MRVL ||
+ priv->data->version == EIP197_DEVBRD)
dir = "eip197b";
+ else if (priv->data->version == EIP197C_MXL)
+ dir = "eip197c";
else
return -ENODEV;
@@ -423,7 +431,7 @@ retry_fw:
snprintf(fw_path, 37, "inside-secure/%s/%s", dir, fw_name[i]);
ret = firmware_request_nowarn(&fw[i], fw_path, priv->dev);
if (ret) {
- if (minifw || priv->version != EIP197B_MRVL)
+ if (minifw || priv->data->version != EIP197B_MRVL)
goto release_fw;
/* Fallback to the old firmware location for the
@@ -1597,7 +1605,7 @@ static int safexcel_probe_generic(void *pdev,
safexcel_configure(priv);
- if (IS_ENABLED(CONFIG_PCI) && priv->version == EIP197_DEVBRD) {
+ if (IS_ENABLED(CONFIG_PCI) && priv->data->version == EIP197_DEVBRD) {
/*
* Request MSI vectors for global + 1 per ring -
* or just 1 for older dev images
@@ -1731,7 +1739,7 @@ static int safexcel_probe(struct platform_device *pdev)
return -ENOMEM;
priv->dev = dev;
- priv->version = (enum safexcel_eip_version)of_device_get_match_data(dev);
+ priv->data = (struct safexcel_priv_data *)of_device_get_match_data(dev);
platform_set_drvdata(pdev, priv);
@@ -1806,27 +1814,52 @@ static int safexcel_remove(struct platform_device *pdev)
return 0;
}
+static const struct safexcel_priv_data eip97ies_mrvl_data = {
+ .version = EIP97IES_MRVL,
+};
+
+static const struct safexcel_priv_data eip197b_mrvl_data = {
+ .version = EIP197B_MRVL,
+};
+
+static const struct safexcel_priv_data eip197d_mrvl_data = {
+ .version = EIP197D_MRVL,
+};
+
+static const struct safexcel_priv_data eip197_devbrd_data = {
+ .version = EIP197_DEVBRD,
+};
+
+static const struct safexcel_priv_data eip197c_mxl_data = {
+ .version = EIP197C_MXL,
+ .fw_little_endian = true,
+};
+
static const struct of_device_id safexcel_of_match_table[] = {
{
.compatible = "inside-secure,safexcel-eip97ies",
- .data = (void *)EIP97IES_MRVL,
+ .data = &eip97ies_mrvl_data,
},
{
.compatible = "inside-secure,safexcel-eip197b",
- .data = (void *)EIP197B_MRVL,
+ .data = &eip197b_mrvl_data,
},
{
.compatible = "inside-secure,safexcel-eip197d",
- .data = (void *)EIP197D_MRVL,
+ .data = &eip197d_mrvl_data,
+ },
+ {
+ .compatible = "inside-secure,safexcel-eip197c-mxl",
+ .data = &eip197c_mxl_data,
},
/* For backward compatibility and intended for generic use */
{
.compatible = "inside-secure,safexcel-eip97",
- .data = (void *)EIP97IES_MRVL,
+ .data = &eip97ies_mrvl_data,
},
{
.compatible = "inside-secure,safexcel-eip197",
- .data = (void *)EIP197B_MRVL,
+ .data = &eip197b_mrvl_data,
},
{},
};
@@ -1862,7 +1895,7 @@ static int safexcel_pci_probe(struct pci_dev *pdev,
return -ENOMEM;
priv->dev = dev;
- priv->version = (enum safexcel_eip_version)ent->driver_data;
+ priv->data = (struct safexcel_priv_data *)ent->driver_data;
pci_set_drvdata(pdev, priv);
@@ -1881,7 +1914,7 @@ static int safexcel_pci_probe(struct pci_dev *pdev,
}
priv->base = pcim_iomap_table(pdev)[0];
- if (priv->version == EIP197_DEVBRD) {
+ if (priv->data->version == EIP197_DEVBRD) {
dev_dbg(dev, "Device identified as FPGA based development board - applying HW reset\n");
rc = pcim_iomap_regions(pdev, 4, "crypto_safexcel");
@@ -1949,7 +1982,7 @@ static const struct pci_device_id safexcel_pci_ids[] = {
{
PCI_DEVICE_SUB(PCI_VENDOR_ID_XILINX, 0x9038,
0x16ae, 0xc522),
- .driver_data = EIP197_DEVBRD,
+ .driver_data = (kernel_ulong_t)&eip197_devbrd_data,
},
{},
};
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index 797ff91512e0..6c2fc662f64f 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -730,7 +730,13 @@ enum safexcel_eip_version {
EIP97IES_MRVL,
EIP197B_MRVL,
EIP197D_MRVL,
- EIP197_DEVBRD
+ EIP197_DEVBRD,
+ EIP197C_MXL,
+};
+
+struct safexcel_priv_data {
+ enum safexcel_eip_version version;
+ bool fw_little_endian;
};
/* Priority we use for advertising our algorithms */
@@ -815,7 +821,7 @@ struct safexcel_crypto_priv {
struct clk *reg_clk;
struct safexcel_config config;
- enum safexcel_eip_version version;
+ struct safexcel_priv_data *data;
struct safexcel_register_offsets offsets;
struct safexcel_hwconfig hwconfig;
u32 flags;
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index d39a386b31ac..984b3cc0237c 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -420,7 +420,7 @@ static void one_packet(dma_addr_t phys)
break;
case CTL_FLAG_GEN_REVAES:
ctx = crypto_tfm_ctx(crypt->data.tfm);
- *(u32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR);
+ *(__be32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR);
if (atomic_dec_and_test(&ctx->configuring))
complete(&ctx->completion);
break;
@@ -720,7 +720,7 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
crypt->init_len = init_len;
crypt->ctl_flags |= CTL_FLAG_GEN_ICV;
- buf->next = 0;
+ buf->next = NULL;
buf->buf_len = HMAC_PAD_BLOCKLEN;
buf->pkt_len = 0;
buf->phys_addr = pad_phys;
@@ -751,7 +751,7 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize
#ifndef __ARMEB__
cfgword ^= 0xAA000000; /* change the "byte swap" flags */
#endif
- *(u32 *)cinfo = cpu_to_be32(cfgword);
+ *(__be32 *)cinfo = cpu_to_be32(cfgword);
cinfo += sizeof(cfgword);
/* write ICV to cryptinfo */
@@ -788,7 +788,7 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm)
if (!crypt)
return -EAGAIN;
- *(u32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR);
+ *(__be32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR);
crypt->data.tfm = tfm;
crypt->crypt_offs = 0;
@@ -846,7 +846,7 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt, const u8 *key,
return err;
}
/* write cfg word to cryptinfo */
- *(u32 *)cinfo = cpu_to_be32(cipher_cfg);
+ *(__be32 *)cinfo = cpu_to_be32(cipher_cfg);
cinfo += sizeof(cipher_cfg);
/* write cipher key to cryptinfo */
diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
index 205eacac4a34..f8aedafdfdc5 100644
--- a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
+++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
@@ -534,7 +534,7 @@ union otx_cptx_vqx_misc_ena_w1s {
* Word0
* reserved_20_63:44 [63:20] Reserved.
* dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add
- * to the CPT instruction doorbell count. Readback value is the the
+ * to the CPT instruction doorbell count. Readback value is the
* current number of pending doorbell requests. If counter overflows
* CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to
* zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF],
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 31e24df18877..20d0dcd50344 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1229,6 +1229,7 @@ struct n2_hash_tmpl {
const u8 *hash_init;
u8 hw_op_hashsz;
u8 digest_size;
+ u8 statesize;
u8 block_size;
u8 auth_type;
u8 hmac_type;
@@ -1260,6 +1261,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
.hmac_type = AUTH_TYPE_HMAC_MD5,
.hw_op_hashsz = MD5_DIGEST_SIZE,
.digest_size = MD5_DIGEST_SIZE,
+ .statesize = sizeof(struct md5_state),
.block_size = MD5_HMAC_BLOCK_SIZE },
{ .name = "sha1",
.hash_zero = sha1_zero_message_hash,
@@ -1268,6 +1270,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
.hmac_type = AUTH_TYPE_HMAC_SHA1,
.hw_op_hashsz = SHA1_DIGEST_SIZE,
.digest_size = SHA1_DIGEST_SIZE,
+ .statesize = sizeof(struct sha1_state),
.block_size = SHA1_BLOCK_SIZE },
{ .name = "sha256",
.hash_zero = sha256_zero_message_hash,
@@ -1276,6 +1279,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
.hmac_type = AUTH_TYPE_HMAC_SHA256,
.hw_op_hashsz = SHA256_DIGEST_SIZE,
.digest_size = SHA256_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_state),
.block_size = SHA256_BLOCK_SIZE },
{ .name = "sha224",
.hash_zero = sha224_zero_message_hash,
@@ -1284,6 +1288,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
.hmac_type = AUTH_TYPE_RESERVED,
.hw_op_hashsz = SHA256_DIGEST_SIZE,
.digest_size = SHA224_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_state),
.block_size = SHA224_BLOCK_SIZE },
};
#define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls)
@@ -1424,6 +1429,7 @@ static int __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl)
halg = &ahash->halg;
halg->digestsize = tmpl->digest_size;
+ halg->statesize = tmpl->statesize;
base = &halg->base;
snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
index fa18d8009f53..cf4ef83e186f 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
@@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto out_err_dev_shutdown;
- set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
-
ret = adf_dev_start(accel_dev);
if (ret)
goto out_err_dev_stop;
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
index 686ec752d0e9..0e642c94b929 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
@@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto out_err_dev_shutdown;
- set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
-
ret = adf_dev_start(accel_dev);
if (ret)
goto out_err_dev_stop;
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
index 18756b2e1c91..c1485e702b3e 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
@@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto out_err_dev_shutdown;
- set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
-
ret = adf_dev_start(accel_dev);
if (ret)
goto out_err_dev_stop;
diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 35d73061d156..9f6ba770a90a 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -14,235 +14,162 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/clk.h>
#include <linux/crypto.h>
#include <linux/reset.h>
-static int rk_crypto_enable_clk(struct rk_crypto_info *dev)
-{
- int err;
-
- err = clk_prepare_enable(dev->sclk);
- if (err) {
- dev_err(dev->dev, "[%s:%d], Couldn't enable clock sclk\n",
- __func__, __LINE__);
- goto err_return;
- }
- err = clk_prepare_enable(dev->aclk);
- if (err) {
- dev_err(dev->dev, "[%s:%d], Couldn't enable clock aclk\n",
- __func__, __LINE__);
- goto err_aclk;
- }
- err = clk_prepare_enable(dev->hclk);
- if (err) {
- dev_err(dev->dev, "[%s:%d], Couldn't enable clock hclk\n",
- __func__, __LINE__);
- goto err_hclk;
- }
- err = clk_prepare_enable(dev->dmaclk);
- if (err) {
- dev_err(dev->dev, "[%s:%d], Couldn't enable clock dmaclk\n",
- __func__, __LINE__);
- goto err_dmaclk;
- }
- return err;
-err_dmaclk:
- clk_disable_unprepare(dev->hclk);
-err_hclk:
- clk_disable_unprepare(dev->aclk);
-err_aclk:
- clk_disable_unprepare(dev->sclk);
-err_return:
- return err;
-}
+static struct rockchip_ip rocklist = {
+ .dev_list = LIST_HEAD_INIT(rocklist.dev_list),
+ .lock = __SPIN_LOCK_UNLOCKED(rocklist.lock),
+};
-static void rk_crypto_disable_clk(struct rk_crypto_info *dev)
+struct rk_crypto_info *get_rk_crypto(void)
{
- clk_disable_unprepare(dev->dmaclk);
- clk_disable_unprepare(dev->hclk);
- clk_disable_unprepare(dev->aclk);
- clk_disable_unprepare(dev->sclk);
+ struct rk_crypto_info *first;
+
+ spin_lock(&rocklist.lock);
+ first = list_first_entry_or_null(&rocklist.dev_list,
+ struct rk_crypto_info, list);
+ list_rotate_left(&rocklist.dev_list);
+ spin_unlock(&rocklist.lock);
+ return first;
}
-static int check_alignment(struct scatterlist *sg_src,
- struct scatterlist *sg_dst,
- int align_mask)
-{
- int in, out, align;
+static const struct rk_variant rk3288_variant = {
+ .num_clks = 4,
+ .rkclks = {
+ { "sclk", 150000000},
+ }
+};
- in = IS_ALIGNED((uint32_t)sg_src->offset, 4) &&
- IS_ALIGNED((uint32_t)sg_src->length, align_mask);
- if (!sg_dst)
- return in;
- out = IS_ALIGNED((uint32_t)sg_dst->offset, 4) &&
- IS_ALIGNED((uint32_t)sg_dst->length, align_mask);
- align = in && out;
+static const struct rk_variant rk3328_variant = {
+ .num_clks = 3,
+};
- return (align && (sg_src->length == sg_dst->length));
-}
+static const struct rk_variant rk3399_variant = {
+ .num_clks = 3,
+};
-static int rk_load_data(struct rk_crypto_info *dev,
- struct scatterlist *sg_src,
- struct scatterlist *sg_dst)
+static int rk_crypto_get_clks(struct rk_crypto_info *dev)
{
- unsigned int count;
-
- dev->aligned = dev->aligned ?
- check_alignment(sg_src, sg_dst, dev->align_size) :
- dev->aligned;
- if (dev->aligned) {
- count = min(dev->left_bytes, sg_src->length);
- dev->left_bytes -= count;
-
- if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) {
- dev_err(dev->dev, "[%s:%d] dma_map_sg(src) error\n",
- __func__, __LINE__);
- return -EINVAL;
- }
- dev->addr_in = sg_dma_address(sg_src);
-
- if (sg_dst) {
- if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) {
- dev_err(dev->dev,
- "[%s:%d] dma_map_sg(dst) error\n",
- __func__, __LINE__);
- dma_unmap_sg(dev->dev, sg_src, 1,
- DMA_TO_DEVICE);
- return -EINVAL;
- }
- dev->addr_out = sg_dma_address(sg_dst);
- }
- } else {
- count = (dev->left_bytes > PAGE_SIZE) ?
- PAGE_SIZE : dev->left_bytes;
-
- if (!sg_pcopy_to_buffer(dev->first, dev->src_nents,
- dev->addr_vir, count,
- dev->total - dev->left_bytes)) {
- dev_err(dev->dev, "[%s:%d] pcopy err\n",
- __func__, __LINE__);
- return -EINVAL;
- }
- dev->left_bytes -= count;
- sg_init_one(&dev->sg_tmp, dev->addr_vir, count);
- if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1, DMA_TO_DEVICE)) {
- dev_err(dev->dev, "[%s:%d] dma_map_sg(sg_tmp) error\n",
- __func__, __LINE__);
- return -ENOMEM;
- }
- dev->addr_in = sg_dma_address(&dev->sg_tmp);
-
- if (sg_dst) {
- if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1,
- DMA_FROM_DEVICE)) {
- dev_err(dev->dev,
- "[%s:%d] dma_map_sg(sg_tmp) error\n",
- __func__, __LINE__);
- dma_unmap_sg(dev->dev, &dev->sg_tmp, 1,
- DMA_TO_DEVICE);
- return -ENOMEM;
+ int i, j, err;
+ unsigned long cr;
+
+ dev->num_clks = devm_clk_bulk_get_all(dev->dev, &dev->clks);
+ if (dev->num_clks < dev->variant->num_clks) {
+ dev_err(dev->dev, "Missing clocks, got %d instead of %d\n",
+ dev->num_clks, dev->variant->num_clks);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < dev->num_clks; i++) {
+ cr = clk_get_rate(dev->clks[i].clk);
+ for (j = 0; j < ARRAY_SIZE(dev->variant->rkclks); j++) {
+ if (dev->variant->rkclks[j].max == 0)
+ continue;
+ if (strcmp(dev->variant->rkclks[j].name, dev->clks[i].id))
+ continue;
+ if (cr > dev->variant->rkclks[j].max) {
+ err = clk_set_rate(dev->clks[i].clk,
+ dev->variant->rkclks[j].max);
+ if (err)
+ dev_err(dev->dev, "Fail downclocking %s from %lu to %lu\n",
+ dev->variant->rkclks[j].name, cr,
+ dev->variant->rkclks[j].max);
+ else
+ dev_info(dev->dev, "Downclocking %s from %lu to %lu\n",
+ dev->variant->rkclks[j].name, cr,
+ dev->variant->rkclks[j].max);
}
- dev->addr_out = sg_dma_address(&dev->sg_tmp);
}
}
- dev->count = count;
return 0;
}
-static void rk_unload_data(struct rk_crypto_info *dev)
+static int rk_crypto_enable_clk(struct rk_crypto_info *dev)
{
- struct scatterlist *sg_in, *sg_out;
+ int err;
- sg_in = dev->aligned ? dev->sg_src : &dev->sg_tmp;
- dma_unmap_sg(dev->dev, sg_in, 1, DMA_TO_DEVICE);
+ err = clk_bulk_prepare_enable(dev->num_clks, dev->clks);
+ if (err)
+ dev_err(dev->dev, "Could not enable clock clks\n");
- if (dev->sg_dst) {
- sg_out = dev->aligned ? dev->sg_dst : &dev->sg_tmp;
- dma_unmap_sg(dev->dev, sg_out, 1, DMA_FROM_DEVICE);
- }
+ return err;
}
-static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id)
+static void rk_crypto_disable_clk(struct rk_crypto_info *dev)
{
- struct rk_crypto_info *dev = platform_get_drvdata(dev_id);
- u32 interrupt_status;
+ clk_bulk_disable_unprepare(dev->num_clks, dev->clks);
+}
- spin_lock(&dev->lock);
- interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS);
- CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status);
+/*
+ * Power management strategy: The device is suspended until a request
+ * is handled. For avoiding suspend/resume yoyo, the autosuspend is set to 2s.
+ */
+static int rk_crypto_pm_suspend(struct device *dev)
+{
+ struct rk_crypto_info *rkdev = dev_get_drvdata(dev);
- if (interrupt_status & 0x0a) {
- dev_warn(dev->dev, "DMA Error\n");
- dev->err = -EFAULT;
- }
- tasklet_schedule(&dev->done_task);
+ rk_crypto_disable_clk(rkdev);
+ reset_control_assert(rkdev->rst);
- spin_unlock(&dev->lock);
- return IRQ_HANDLED;
+ return 0;
}
-static int rk_crypto_enqueue(struct rk_crypto_info *dev,
- struct crypto_async_request *async_req)
+static int rk_crypto_pm_resume(struct device *dev)
{
- unsigned long flags;
+ struct rk_crypto_info *rkdev = dev_get_drvdata(dev);
int ret;
- spin_lock_irqsave(&dev->lock, flags);
- ret = crypto_enqueue_request(&dev->queue, async_req);
- if (dev->busy) {
- spin_unlock_irqrestore(&dev->lock, flags);
+ ret = rk_crypto_enable_clk(rkdev);
+ if (ret)
return ret;
- }
- dev->busy = true;
- spin_unlock_irqrestore(&dev->lock, flags);
- tasklet_schedule(&dev->queue_task);
- return ret;
-}
+ reset_control_deassert(rkdev->rst);
+ return 0;
-static void rk_crypto_queue_task_cb(unsigned long data)
-{
- struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
- struct crypto_async_request *async_req, *backlog;
- unsigned long flags;
- int err = 0;
+}
- dev->err = 0;
- spin_lock_irqsave(&dev->lock, flags);
- backlog = crypto_get_backlog(&dev->queue);
- async_req = crypto_dequeue_request(&dev->queue);
+static const struct dev_pm_ops rk_crypto_pm_ops = {
+ SET_RUNTIME_PM_OPS(rk_crypto_pm_suspend, rk_crypto_pm_resume, NULL)
+};
- if (!async_req) {
- dev->busy = false;
- spin_unlock_irqrestore(&dev->lock, flags);
- return;
- }
- spin_unlock_irqrestore(&dev->lock, flags);
+static int rk_crypto_pm_init(struct rk_crypto_info *rkdev)
+{
+ int err;
- if (backlog) {
- backlog->complete(backlog, -EINPROGRESS);
- backlog = NULL;
- }
+ pm_runtime_use_autosuspend(rkdev->dev);
+ pm_runtime_set_autosuspend_delay(rkdev->dev, 2000);
- dev->async_req = async_req;
- err = dev->start(dev);
+ err = pm_runtime_set_suspended(rkdev->dev);
if (err)
- dev->complete(dev->async_req, err);
+ return err;
+ pm_runtime_enable(rkdev->dev);
+ return err;
}
-static void rk_crypto_done_task_cb(unsigned long data)
+static void rk_crypto_pm_exit(struct rk_crypto_info *rkdev)
{
- struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
+ pm_runtime_disable(rkdev->dev);
+}
- if (dev->err) {
- dev->complete(dev->async_req, dev->err);
- return;
+static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id)
+{
+ struct rk_crypto_info *dev = platform_get_drvdata(dev_id);
+ u32 interrupt_status;
+
+ interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS);
+ CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status);
+
+ dev->status = 1;
+ if (interrupt_status & 0x0a) {
+ dev_warn(dev->dev, "DMA Error\n");
+ dev->status = 0;
}
+ complete(&dev->complete);
- dev->err = dev->update(dev);
- if (dev->err)
- dev->complete(dev->async_req, dev->err);
+ return IRQ_HANDLED;
}
static struct rk_crypto_tmp *rk_cipher_algs[] = {
@@ -257,6 +184,62 @@ static struct rk_crypto_tmp *rk_cipher_algs[] = {
&rk_ahash_md5,
};
+#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+static int rk_crypto_debugfs_show(struct seq_file *seq, void *v)
+{
+ struct rk_crypto_info *dd;
+ unsigned int i;
+
+ spin_lock(&rocklist.lock);
+ list_for_each_entry(dd, &rocklist.dev_list, list) {
+ seq_printf(seq, "%s %s requests: %lu\n",
+ dev_driver_string(dd->dev), dev_name(dd->dev),
+ dd->nreq);
+ }
+ spin_unlock(&rocklist.lock);
+
+ for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
+ if (!rk_cipher_algs[i]->dev)
+ continue;
+ switch (rk_cipher_algs[i]->type) {
+ case CRYPTO_ALG_TYPE_SKCIPHER:
+ seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
+ rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name,
+ rk_cipher_algs[i]->alg.skcipher.base.cra_name,
+ rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb);
+ seq_printf(seq, "\tfallback due to length: %lu\n",
+ rk_cipher_algs[i]->stat_fb_len);
+ seq_printf(seq, "\tfallback due to alignment: %lu\n",
+ rk_cipher_algs[i]->stat_fb_align);
+ seq_printf(seq, "\tfallback due to SGs: %lu\n",
+ rk_cipher_algs[i]->stat_fb_sgdiff);
+ break;
+ case CRYPTO_ALG_TYPE_AHASH:
+ seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
+ rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name,
+ rk_cipher_algs[i]->alg.hash.halg.base.cra_name,
+ rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb);
+ break;
+ }
+ }
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(rk_crypto_debugfs);
+#endif
+
+static void register_debugfs(struct rk_crypto_info *crypto_info)
+{
+#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+ /* Ignore error of debugfs */
+ rocklist.dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL);
+ rocklist.dbgfs_stats = debugfs_create_file("stats", 0444,
+ rocklist.dbgfs_dir,
+ &rocklist,
+ &rk_crypto_debugfs_fops);
+#endif
+}
+
static int rk_crypto_register(struct rk_crypto_info *crypto_info)
{
unsigned int i, k;
@@ -264,12 +247,22 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info)
for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
rk_cipher_algs[i]->dev = crypto_info;
- if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER)
- err = crypto_register_skcipher(
- &rk_cipher_algs[i]->alg.skcipher);
- else
- err = crypto_register_ahash(
- &rk_cipher_algs[i]->alg.hash);
+ switch (rk_cipher_algs[i]->type) {
+ case CRYPTO_ALG_TYPE_SKCIPHER:
+ dev_info(crypto_info->dev, "Register %s as %s\n",
+ rk_cipher_algs[i]->alg.skcipher.base.cra_name,
+ rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name);
+ err = crypto_register_skcipher(&rk_cipher_algs[i]->alg.skcipher);
+ break;
+ case CRYPTO_ALG_TYPE_AHASH:
+ dev_info(crypto_info->dev, "Register %s as %s\n",
+ rk_cipher_algs[i]->alg.hash.halg.base.cra_name,
+ rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name);
+ err = crypto_register_ahash(&rk_cipher_algs[i]->alg.hash);
+ break;
+ default:
+ dev_err(crypto_info->dev, "unknown algorithm\n");
+ }
if (err)
goto err_cipher_algs;
}
@@ -277,7 +270,7 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info)
err_cipher_algs:
for (k = 0; k < i; k++) {
- if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER)
+ if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER)
crypto_unregister_skcipher(&rk_cipher_algs[k]->alg.skcipher);
else
crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
@@ -290,22 +283,23 @@ static void rk_crypto_unregister(void)
unsigned int i;
for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
- if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER)
+ if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER)
crypto_unregister_skcipher(&rk_cipher_algs[i]->alg.skcipher);
else
crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
}
}
-static void rk_crypto_action(void *data)
-{
- struct rk_crypto_info *crypto_info = data;
-
- reset_control_assert(crypto_info->rst);
-}
-
static const struct of_device_id crypto_of_id_table[] = {
- { .compatible = "rockchip,rk3288-crypto" },
+ { .compatible = "rockchip,rk3288-crypto",
+ .data = &rk3288_variant,
+ },
+ { .compatible = "rockchip,rk3328-crypto",
+ .data = &rk3328_variant,
+ },
+ { .compatible = "rockchip,rk3399-crypto",
+ .data = &rk3399_variant,
+ },
{}
};
MODULE_DEVICE_TABLE(of, crypto_of_id_table);
@@ -313,7 +307,7 @@ MODULE_DEVICE_TABLE(of, crypto_of_id_table);
static int rk_crypto_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct rk_crypto_info *crypto_info;
+ struct rk_crypto_info *crypto_info, *first;
int err = 0;
crypto_info = devm_kzalloc(&pdev->dev,
@@ -323,7 +317,16 @@ static int rk_crypto_probe(struct platform_device *pdev)
goto err_crypto;
}
- crypto_info->rst = devm_reset_control_get(dev, "crypto-rst");
+ crypto_info->dev = &pdev->dev;
+ platform_set_drvdata(pdev, crypto_info);
+
+ crypto_info->variant = of_device_get_match_data(&pdev->dev);
+ if (!crypto_info->variant) {
+ dev_err(&pdev->dev, "Missing variant\n");
+ return -EINVAL;
+ }
+
+ crypto_info->rst = devm_reset_control_array_get_exclusive(dev);
if (IS_ERR(crypto_info->rst)) {
err = PTR_ERR(crypto_info->rst);
goto err_crypto;
@@ -333,46 +336,18 @@ static int rk_crypto_probe(struct platform_device *pdev)
usleep_range(10, 20);
reset_control_deassert(crypto_info->rst);
- err = devm_add_action_or_reset(dev, rk_crypto_action, crypto_info);
- if (err)
- goto err_crypto;
-
- spin_lock_init(&crypto_info->lock);
-
crypto_info->reg = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(crypto_info->reg)) {
err = PTR_ERR(crypto_info->reg);
goto err_crypto;
}
- crypto_info->aclk = devm_clk_get(&pdev->dev, "aclk");
- if (IS_ERR(crypto_info->aclk)) {
- err = PTR_ERR(crypto_info->aclk);
- goto err_crypto;
- }
-
- crypto_info->hclk = devm_clk_get(&pdev->dev, "hclk");
- if (IS_ERR(crypto_info->hclk)) {
- err = PTR_ERR(crypto_info->hclk);
- goto err_crypto;
- }
-
- crypto_info->sclk = devm_clk_get(&pdev->dev, "sclk");
- if (IS_ERR(crypto_info->sclk)) {
- err = PTR_ERR(crypto_info->sclk);
- goto err_crypto;
- }
-
- crypto_info->dmaclk = devm_clk_get(&pdev->dev, "apb_pclk");
- if (IS_ERR(crypto_info->dmaclk)) {
- err = PTR_ERR(crypto_info->dmaclk);
+ err = rk_crypto_get_clks(crypto_info);
+ if (err)
goto err_crypto;
- }
crypto_info->irq = platform_get_irq(pdev, 0);
if (crypto_info->irq < 0) {
- dev_warn(crypto_info->dev,
- "control Interrupt is not available.\n");
err = crypto_info->irq;
goto err_crypto;
}
@@ -382,49 +357,64 @@ static int rk_crypto_probe(struct platform_device *pdev)
"rk-crypto", pdev);
if (err) {
- dev_err(crypto_info->dev, "irq request failed.\n");
+ dev_err(&pdev->dev, "irq request failed.\n");
goto err_crypto;
}
- crypto_info->dev = &pdev->dev;
- platform_set_drvdata(pdev, crypto_info);
-
- tasklet_init(&crypto_info->queue_task,
- rk_crypto_queue_task_cb, (unsigned long)crypto_info);
- tasklet_init(&crypto_info->done_task,
- rk_crypto_done_task_cb, (unsigned long)crypto_info);
- crypto_init_queue(&crypto_info->queue, 50);
+ crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true);
+ crypto_engine_start(crypto_info->engine);
+ init_completion(&crypto_info->complete);
- crypto_info->enable_clk = rk_crypto_enable_clk;
- crypto_info->disable_clk = rk_crypto_disable_clk;
- crypto_info->load_data = rk_load_data;
- crypto_info->unload_data = rk_unload_data;
- crypto_info->enqueue = rk_crypto_enqueue;
- crypto_info->busy = false;
+ err = rk_crypto_pm_init(crypto_info);
+ if (err)
+ goto err_pm;
+
+ spin_lock(&rocklist.lock);
+ first = list_first_entry_or_null(&rocklist.dev_list,
+ struct rk_crypto_info, list);
+ list_add_tail(&crypto_info->list, &rocklist.dev_list);
+ spin_unlock(&rocklist.lock);
+
+ if (!first) {
+ err = rk_crypto_register(crypto_info);
+ if (err) {
+ dev_err(dev, "Fail to register crypto algorithms");
+ goto err_register_alg;
+ }
- err = rk_crypto_register(crypto_info);
- if (err) {
- dev_err(dev, "err in register alg");
- goto err_register_alg;
+ register_debugfs(crypto_info);
}
- dev_info(dev, "Crypto Accelerator successfully registered\n");
return 0;
err_register_alg:
- tasklet_kill(&crypto_info->queue_task);
- tasklet_kill(&crypto_info->done_task);
+ rk_crypto_pm_exit(crypto_info);
+err_pm:
+ crypto_engine_exit(crypto_info->engine);
err_crypto:
+ dev_err(dev, "Crypto Accelerator not successfully registered\n");
return err;
}
static int rk_crypto_remove(struct platform_device *pdev)
{
struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev);
-
- rk_crypto_unregister();
- tasklet_kill(&crypto_tmp->done_task);
- tasklet_kill(&crypto_tmp->queue_task);
+ struct rk_crypto_info *first;
+
+ spin_lock_bh(&rocklist.lock);
+ list_del(&crypto_tmp->list);
+ first = list_first_entry_or_null(&rocklist.dev_list,
+ struct rk_crypto_info, list);
+ spin_unlock_bh(&rocklist.lock);
+
+ if (!first) {
+#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+ debugfs_remove_recursive(rocklist.dbgfs_dir);
+#endif
+ rk_crypto_unregister();
+ }
+ rk_crypto_pm_exit(crypto_tmp);
+ crypto_engine_exit(crypto_tmp->engine);
return 0;
}
@@ -433,6 +423,7 @@ static struct platform_driver crypto_driver = {
.remove = rk_crypto_remove,
.driver = {
.name = "rk3288-crypto",
+ .pm = &rk_crypto_pm_ops,
.of_match_table = crypto_of_id_table,
},
};
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 97278c2574ff..b2695258cade 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -5,9 +5,13 @@
#include <crypto/aes.h>
#include <crypto/internal/des.h>
#include <crypto/algapi.h>
+#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
+#include <linux/debugfs.h>
#include <linux/delay.h>
+#include <linux/pm_runtime.h>
#include <linux/scatterlist.h>
+#include <crypto/engine.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
@@ -184,85 +188,91 @@
#define CRYPTO_WRITE(dev, offset, val) \
writel_relaxed((val), ((dev)->reg + (offset)))
+#define RK_MAX_CLKS 4
+
+/*
+ * struct rockchip_ip - struct for managing a list of RK crypto instance
+ * @dev_list: Used for doing a list of rk_crypto_info
+ * @lock: Control access to dev_list
+ * @dbgfs_dir: Debugfs dentry for statistic directory
+ * @dbgfs_stats: Debugfs dentry for statistic counters
+ */
+struct rockchip_ip {
+ struct list_head dev_list;
+ spinlock_t lock; /* Control access to dev_list */
+ struct dentry *dbgfs_dir;
+ struct dentry *dbgfs_stats;
+};
+
+struct rk_clks {
+ const char *name;
+ unsigned long max;
+};
+
+struct rk_variant {
+ int num_clks;
+ struct rk_clks rkclks[RK_MAX_CLKS];
+};
+
struct rk_crypto_info {
+ struct list_head list;
struct device *dev;
- struct clk *aclk;
- struct clk *hclk;
- struct clk *sclk;
- struct clk *dmaclk;
+ struct clk_bulk_data *clks;
+ int num_clks;
struct reset_control *rst;
void __iomem *reg;
int irq;
- struct crypto_queue queue;
- struct tasklet_struct queue_task;
- struct tasklet_struct done_task;
- struct crypto_async_request *async_req;
- int err;
- /* device lock */
- spinlock_t lock;
-
- /* the public variable */
- struct scatterlist *sg_src;
- struct scatterlist *sg_dst;
- struct scatterlist sg_tmp;
- struct scatterlist *first;
- unsigned int left_bytes;
- void *addr_vir;
- int aligned;
- int align_size;
- size_t src_nents;
- size_t dst_nents;
- unsigned int total;
- unsigned int count;
- dma_addr_t addr_in;
- dma_addr_t addr_out;
- bool busy;
- int (*start)(struct rk_crypto_info *dev);
- int (*update)(struct rk_crypto_info *dev);
- void (*complete)(struct crypto_async_request *base, int err);
- int (*enable_clk)(struct rk_crypto_info *dev);
- void (*disable_clk)(struct rk_crypto_info *dev);
- int (*load_data)(struct rk_crypto_info *dev,
- struct scatterlist *sg_src,
- struct scatterlist *sg_dst);
- void (*unload_data)(struct rk_crypto_info *dev);
- int (*enqueue)(struct rk_crypto_info *dev,
- struct crypto_async_request *async_req);
+ const struct rk_variant *variant;
+ unsigned long nreq;
+ struct crypto_engine *engine;
+ struct completion complete;
+ int status;
};
/* the private variable of hash */
struct rk_ahash_ctx {
- struct rk_crypto_info *dev;
+ struct crypto_engine_ctx enginectx;
/* for fallback */
struct crypto_ahash *fallback_tfm;
};
-/* the privete variable of hash for fallback */
+/* the private variable of hash for fallback */
struct rk_ahash_rctx {
+ struct rk_crypto_info *dev;
struct ahash_request fallback_req;
u32 mode;
+ int nrsg;
};
/* the private variable of cipher */
struct rk_cipher_ctx {
- struct rk_crypto_info *dev;
+ struct crypto_engine_ctx enginectx;
unsigned int keylen;
- u32 mode;
+ u8 key[AES_MAX_KEY_SIZE];
u8 iv[AES_BLOCK_SIZE];
+ struct crypto_skcipher *fallback_tfm;
};
-enum alg_type {
- ALG_TYPE_HASH,
- ALG_TYPE_CIPHER,
+struct rk_cipher_rctx {
+ struct rk_crypto_info *dev;
+ u8 backup_iv[AES_BLOCK_SIZE];
+ u32 mode;
+ struct skcipher_request fallback_req; // keep at the end
};
struct rk_crypto_tmp {
- struct rk_crypto_info *dev;
+ u32 type;
+ struct rk_crypto_info *dev;
union {
struct skcipher_alg skcipher;
struct ahash_alg hash;
} alg;
- enum alg_type type;
+ unsigned long stat_req;
+ unsigned long stat_fb;
+ unsigned long stat_fb_len;
+ unsigned long stat_fb_sglen;
+ unsigned long stat_fb_align;
+ unsigned long stat_fb_sgdiff;
};
extern struct rk_crypto_tmp rk_ecb_aes_alg;
@@ -276,4 +286,5 @@ extern struct rk_crypto_tmp rk_ahash_sha1;
extern struct rk_crypto_tmp rk_ahash_sha256;
extern struct rk_crypto_tmp rk_ahash_md5;
+struct rk_crypto_info *get_rk_crypto(void);
#endif
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index ed03058497bc..a78ff3dcd0b1 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -9,6 +9,8 @@
* Some ideas are from marvell/cesa.c and s5p-sss.c driver.
*/
#include <linux/device.h>
+#include <asm/unaligned.h>
+#include <linux/iopoll.h>
#include "rk3288_crypto.h"
/*
@@ -16,6 +18,44 @@
* so we put the fixed hash out when met zero message.
*/
+static bool rk_ahash_need_fallback(struct ahash_request *req)
+{
+ struct scatterlist *sg;
+
+ sg = req->src;
+ while (sg) {
+ if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
+ return true;
+ }
+ if (sg->length % 4) {
+ return true;
+ }
+ sg = sg_next(sg);
+ }
+ return false;
+}
+
+static int rk_ahash_digest_fb(struct ahash_request *areq)
+{
+ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+ struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm);
+ struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
+ struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
+
+ algt->stat_fb++;
+
+ ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
+ rctx->fallback_req.base.flags = areq->base.flags &
+ CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ rctx->fallback_req.nbytes = areq->nbytes;
+ rctx->fallback_req.src = areq->src;
+ rctx->fallback_req.result = areq->result;
+
+ return crypto_ahash_digest(&rctx->fallback_req);
+}
+
static int zero_message_process(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -38,15 +78,9 @@ static int zero_message_process(struct ahash_request *req)
return 0;
}
-static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err)
+static void rk_ahash_reg_init(struct ahash_request *req,
+ struct rk_crypto_info *dev)
{
- if (base->complete)
- base->complete(base, err);
-}
-
-static void rk_ahash_reg_init(struct rk_crypto_info *dev)
-{
- struct ahash_request *req = ahash_request_cast(dev->async_req);
struct rk_ahash_rctx *rctx = ahash_request_ctx(req);
int reg_status;
@@ -74,7 +108,7 @@ static void rk_ahash_reg_init(struct rk_crypto_info *dev)
RK_CRYPTO_BYTESWAP_BRFIFO |
RK_CRYPTO_BYTESWAP_BTFIFO);
- CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, dev->total);
+ CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, req->nbytes);
}
static int rk_ahash_init(struct ahash_request *req)
@@ -164,51 +198,80 @@ static int rk_ahash_export(struct ahash_request *req, void *out)
static int rk_ahash_digest(struct ahash_request *req)
{
- struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
- struct rk_crypto_info *dev = tctx->dev;
+ struct rk_ahash_rctx *rctx = ahash_request_ctx(req);
+ struct rk_crypto_info *dev;
+ struct crypto_engine *engine;
+
+ if (rk_ahash_need_fallback(req))
+ return rk_ahash_digest_fb(req);
if (!req->nbytes)
return zero_message_process(req);
- else
- return dev->enqueue(dev, &req->base);
+
+ dev = get_rk_crypto();
+
+ rctx->dev = dev;
+ engine = dev->engine;
+
+ return crypto_transfer_hash_request_to_engine(engine, req);
}
-static void crypto_ahash_dma_start(struct rk_crypto_info *dev)
+static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg)
{
- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, dev->addr_in);
- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, (dev->count + 3) / 4);
+ CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, sg_dma_address(sg));
+ CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, sg_dma_len(sg) / 4);
CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START |
(RK_CRYPTO_HASH_START << 16));
}
-static int rk_ahash_set_data_start(struct rk_crypto_info *dev)
+static int rk_hash_prepare(struct crypto_engine *engine, void *breq)
{
- int err;
+ struct ahash_request *areq = container_of(breq, struct ahash_request, base);
+ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
+ struct rk_crypto_info *rkc = rctx->dev;
+ int ret;
- err = dev->load_data(dev, dev->sg_src, NULL);
- if (!err)
- crypto_ahash_dma_start(dev);
- return err;
+ ret = dma_map_sg(rkc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
+ if (ret <= 0)
+ return -EINVAL;
+
+ rctx->nrsg = ret;
+
+ return 0;
}
-static int rk_ahash_start(struct rk_crypto_info *dev)
+static int rk_hash_unprepare(struct crypto_engine *engine, void *breq)
{
- struct ahash_request *req = ahash_request_cast(dev->async_req);
- struct crypto_ahash *tfm;
- struct rk_ahash_rctx *rctx;
-
- dev->total = req->nbytes;
- dev->left_bytes = req->nbytes;
- dev->aligned = 0;
- dev->align_size = 4;
- dev->sg_dst = NULL;
- dev->sg_src = req->src;
- dev->first = req->src;
- dev->src_nents = sg_nents(req->src);
- rctx = ahash_request_ctx(req);
+ struct ahash_request *areq = container_of(breq, struct ahash_request, base);
+ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
+ struct rk_crypto_info *rkc = rctx->dev;
+
+ dma_unmap_sg(rkc->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE);
+ return 0;
+}
+
+static int rk_hash_run(struct crypto_engine *engine, void *breq)
+{
+ struct ahash_request *areq = container_of(breq, struct ahash_request, base);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
+ struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
+ struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
+ struct scatterlist *sg = areq->src;
+ struct rk_crypto_info *rkc = rctx->dev;
+ int err = 0;
+ int i;
+ u32 v;
+
+ err = pm_runtime_resume_and_get(rkc->dev);
+ if (err)
+ return err;
+
rctx->mode = 0;
- tfm = crypto_ahash_reqtfm(req);
+ algt->stat_req++;
+ rkc->nreq++;
+
switch (crypto_ahash_digestsize(tfm)) {
case SHA1_DIGEST_SIZE:
rctx->mode = RK_CRYPTO_HASH_SHA1;
@@ -220,100 +283,88 @@ static int rk_ahash_start(struct rk_crypto_info *dev)
rctx->mode = RK_CRYPTO_HASH_MD5;
break;
default:
- return -EINVAL;
+ err = -EINVAL;
+ goto theend;
}
- rk_ahash_reg_init(dev);
- return rk_ahash_set_data_start(dev);
-}
-
-static int rk_ahash_crypto_rx(struct rk_crypto_info *dev)
-{
- int err = 0;
- struct ahash_request *req = ahash_request_cast(dev->async_req);
- struct crypto_ahash *tfm;
-
- dev->unload_data(dev);
- if (dev->left_bytes) {
- if (dev->aligned) {
- if (sg_is_last(dev->sg_src)) {
- dev_warn(dev->dev, "[%s:%d], Lack of data\n",
- __func__, __LINE__);
- err = -ENOMEM;
- goto out_rx;
- }
- dev->sg_src = sg_next(dev->sg_src);
+ rk_ahash_reg_init(areq, rkc);
+
+ while (sg) {
+ reinit_completion(&rkc->complete);
+ rkc->status = 0;
+ crypto_ahash_dma_start(rkc, sg);
+ wait_for_completion_interruptible_timeout(&rkc->complete,
+ msecs_to_jiffies(2000));
+ if (!rkc->status) {
+ dev_err(rkc->dev, "DMA timeout\n");
+ err = -EFAULT;
+ goto theend;
}
- err = rk_ahash_set_data_start(dev);
- } else {
- /*
- * it will take some time to process date after last dma
- * transmission.
- *
- * waiting time is relative with the last date len,
- * so cannot set a fixed time here.
- * 10us makes system not call here frequently wasting
- * efficiency, and make it response quickly when dma
- * complete.
- */
- while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS))
- udelay(10);
-
- tfm = crypto_ahash_reqtfm(req);
- memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0,
- crypto_ahash_digestsize(tfm));
- dev->complete(dev->async_req, 0);
- tasklet_schedule(&dev->queue_task);
+ sg = sg_next(sg);
}
-out_rx:
- return err;
+ /*
+ * it will take some time to process date after last dma
+ * transmission.
+ *
+ * waiting time is relative with the last date len,
+ * so cannot set a fixed time here.
+ * 10us makes system not call here frequently wasting
+ * efficiency, and make it response quickly when dma
+ * complete.
+ */
+ readl_poll_timeout(rkc->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000);
+
+ for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) {
+ v = readl(rkc->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4);
+ put_unaligned_le32(v, areq->result + i * 4);
+ }
+
+theend:
+ pm_runtime_put_autosuspend(rkc->dev);
+
+ local_bh_disable();
+ crypto_finalize_hash_request(engine, breq, err);
+ local_bh_enable();
+
+ return 0;
}
static int rk_cra_hash_init(struct crypto_tfm *tfm)
{
struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
- struct rk_crypto_tmp *algt;
- struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
-
const char *alg_name = crypto_tfm_alg_name(tfm);
-
- algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
-
- tctx->dev = algt->dev;
- tctx->dev->addr_vir = (void *)__get_free_page(GFP_KERNEL);
- if (!tctx->dev->addr_vir) {
- dev_err(tctx->dev->dev, "failed to kmalloc for addr_vir\n");
- return -ENOMEM;
- }
- tctx->dev->start = rk_ahash_start;
- tctx->dev->update = rk_ahash_crypto_rx;
- tctx->dev->complete = rk_ahash_crypto_complete;
+ struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
+ struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
/* for fallback */
tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(tctx->fallback_tfm)) {
- dev_err(tctx->dev->dev, "Could not load fallback driver.\n");
+ dev_err(algt->dev->dev, "Could not load fallback driver.\n");
return PTR_ERR(tctx->fallback_tfm);
}
+
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct rk_ahash_rctx) +
crypto_ahash_reqsize(tctx->fallback_tfm));
- return tctx->dev->enable_clk(tctx->dev);
+ tctx->enginectx.op.do_one_request = rk_hash_run;
+ tctx->enginectx.op.prepare_request = rk_hash_prepare;
+ tctx->enginectx.op.unprepare_request = rk_hash_unprepare;
+
+ return 0;
}
static void rk_cra_hash_exit(struct crypto_tfm *tfm)
{
struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
- free_page((unsigned long)tctx->dev->addr_vir);
- return tctx->dev->disable_clk(tctx->dev);
+ crypto_free_ahash(tctx->fallback_tfm);
}
struct rk_crypto_tmp rk_ahash_sha1 = {
- .type = ALG_TYPE_HASH,
+ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.init = rk_ahash_init,
.update = rk_ahash_update,
@@ -337,13 +388,13 @@ struct rk_crypto_tmp rk_ahash_sha1 = {
.cra_init = rk_cra_hash_init,
.cra_exit = rk_cra_hash_exit,
.cra_module = THIS_MODULE,
- }
- }
+ }
+ }
}
};
struct rk_crypto_tmp rk_ahash_sha256 = {
- .type = ALG_TYPE_HASH,
+ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.init = rk_ahash_init,
.update = rk_ahash_update,
@@ -367,13 +418,13 @@ struct rk_crypto_tmp rk_ahash_sha256 = {
.cra_init = rk_cra_hash_init,
.cra_exit = rk_cra_hash_exit,
.cra_module = THIS_MODULE,
- }
- }
+ }
+ }
}
};
struct rk_crypto_tmp rk_ahash_md5 = {
- .type = ALG_TYPE_HASH,
+ .type = CRYPTO_ALG_TYPE_AHASH,
.alg.hash = {
.init = rk_ahash_init,
.update = rk_ahash_update,
@@ -397,7 +448,7 @@ struct rk_crypto_tmp rk_ahash_md5 = {
.cra_init = rk_cra_hash_init,
.cra_exit = rk_cra_hash_exit,
.cra_module = THIS_MODULE,
- }
}
+ }
}
};
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 5bbf0d2722e1..59069457582b 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -9,23 +9,94 @@
* Some ideas are from marvell-cesa.c and s5p-sss.c driver.
*/
#include <linux/device.h>
+#include <crypto/scatterwalk.h>
#include "rk3288_crypto.h"
#define RK_CRYPTO_DEC BIT(0)
-static void rk_crypto_complete(struct crypto_async_request *base, int err)
+static int rk_cipher_need_fallback(struct skcipher_request *req)
{
- if (base->complete)
- base->complete(base, err);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+ struct scatterlist *sgs, *sgd;
+ unsigned int stodo, dtodo, len;
+ unsigned int bs = crypto_skcipher_blocksize(tfm);
+
+ if (!req->cryptlen)
+ return true;
+
+ len = req->cryptlen;
+ sgs = req->src;
+ sgd = req->dst;
+ while (sgs && sgd) {
+ if (!IS_ALIGNED(sgs->offset, sizeof(u32))) {
+ algt->stat_fb_align++;
+ return true;
+ }
+ if (!IS_ALIGNED(sgd->offset, sizeof(u32))) {
+ algt->stat_fb_align++;
+ return true;
+ }
+ stodo = min(len, sgs->length);
+ if (stodo % bs) {
+ algt->stat_fb_len++;
+ return true;
+ }
+ dtodo = min(len, sgd->length);
+ if (dtodo % bs) {
+ algt->stat_fb_len++;
+ return true;
+ }
+ if (stodo != dtodo) {
+ algt->stat_fb_sgdiff++;
+ return true;
+ }
+ len -= stodo;
+ sgs = sg_next(sgs);
+ sgd = sg_next(sgd);
+ }
+ return false;
}
-static int rk_handle_req(struct rk_crypto_info *dev,
- struct skcipher_request *req)
+static int rk_cipher_fallback(struct skcipher_request *areq)
{
- if (!IS_ALIGNED(req->cryptlen, dev->align_size))
- return -EINVAL;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm);
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+ int err;
+
+ algt->stat_fb++;
+
+ skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
+ skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
+ areq->base.complete, areq->base.data);
+ skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
+ areq->cryptlen, areq->iv);
+ if (rctx->mode & RK_CRYPTO_DEC)
+ err = crypto_skcipher_decrypt(&rctx->fallback_req);
else
- return dev->enqueue(dev, &req->base);
+ err = crypto_skcipher_encrypt(&rctx->fallback_req);
+ return err;
+}
+
+static int rk_cipher_handle_req(struct skcipher_request *req)
+{
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+ struct rk_crypto_info *rkc;
+ struct crypto_engine *engine;
+
+ if (rk_cipher_need_fallback(req))
+ return rk_cipher_fallback(req);
+
+ rkc = get_rk_crypto();
+
+ engine = rkc->engine;
+ rctx->dev = rkc;
+
+ return crypto_transfer_skcipher_request_to_engine(engine, req);
}
static int rk_aes_setkey(struct crypto_skcipher *cipher,
@@ -38,8 +109,9 @@ static int rk_aes_setkey(struct crypto_skcipher *cipher,
keylen != AES_KEYSIZE_256)
return -EINVAL;
ctx->keylen = keylen;
- memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen);
- return 0;
+ memcpy(ctx->key, key, keylen);
+
+ return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
}
static int rk_des_setkey(struct crypto_skcipher *cipher,
@@ -53,8 +125,9 @@ static int rk_des_setkey(struct crypto_skcipher *cipher,
return err;
ctx->keylen = keylen;
- memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
- return 0;
+ memcpy(ctx->key, key, keylen);
+
+ return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
}
static int rk_tdes_setkey(struct crypto_skcipher *cipher,
@@ -68,161 +141,136 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher,
return err;
ctx->keylen = keylen;
- memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
- return 0;
+ memcpy(ctx->key, key, keylen);
+
+ return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
}
static int rk_aes_ecb_encrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_AES_ECB_MODE;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_AES_ECB_MODE;
+ return rk_cipher_handle_req(req);
}
static int rk_aes_ecb_decrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
+ return rk_cipher_handle_req(req);
}
static int rk_aes_cbc_encrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_AES_CBC_MODE;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_AES_CBC_MODE;
+ return rk_cipher_handle_req(req);
}
static int rk_aes_cbc_decrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
+ return rk_cipher_handle_req(req);
}
static int rk_des_ecb_encrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = 0;
- return rk_handle_req(dev, req);
+ rctx->mode = 0;
+ return rk_cipher_handle_req(req);
}
static int rk_des_ecb_decrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_DEC;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_DEC;
+ return rk_cipher_handle_req(req);
}
static int rk_des_cbc_encrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
+ return rk_cipher_handle_req(req);
}
static int rk_des_cbc_decrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
+ return rk_cipher_handle_req(req);
}
static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_TDES_SELECT;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_TDES_SELECT;
+ return rk_cipher_handle_req(req);
}
static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
+ return rk_cipher_handle_req(req);
}
static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
- return rk_handle_req(dev, req);
+ rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
+ return rk_cipher_handle_req(req);
}
static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct rk_crypto_info *dev = ctx->dev;
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
- ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
+ rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
RK_CRYPTO_DEC;
- return rk_handle_req(dev, req);
+ return rk_cipher_handle_req(req);
}
-static void rk_ablk_hw_init(struct rk_crypto_info *dev)
+static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req)
{
- struct skcipher_request *req =
- skcipher_request_cast(dev->async_req);
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
- u32 ivsize, block, conf_reg = 0;
+ u32 block, conf_reg = 0;
block = crypto_tfm_alg_blocksize(tfm);
- ivsize = crypto_skcipher_ivsize(cipher);
if (block == DES_BLOCK_SIZE) {
- ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
+ rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
RK_CRYPTO_TDES_BYTESWAP_KEY |
RK_CRYPTO_TDES_BYTESWAP_IV;
- CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode);
- memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize);
+ CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode);
+ memcpy_toio(dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen);
conf_reg = RK_CRYPTO_DESSEL;
} else {
- ctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
+ rctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
RK_CRYPTO_AES_KEY_CHANGE |
RK_CRYPTO_AES_BYTESWAP_KEY |
RK_CRYPTO_AES_BYTESWAP_IV;
if (ctx->keylen == AES_KEYSIZE_192)
- ctx->mode |= RK_CRYPTO_AES_192BIT_key;
+ rctx->mode |= RK_CRYPTO_AES_192BIT_key;
else if (ctx->keylen == AES_KEYSIZE_256)
- ctx->mode |= RK_CRYPTO_AES_256BIT_key;
- CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode);
- memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize);
+ rctx->mode |= RK_CRYPTO_AES_256BIT_key;
+ CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode);
+ memcpy_toio(dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen);
}
conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO |
RK_CRYPTO_BYTESWAP_BRFIFO;
@@ -231,189 +279,196 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev)
RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA);
}
-static void crypto_dma_start(struct rk_crypto_info *dev)
+static void crypto_dma_start(struct rk_crypto_info *dev,
+ struct scatterlist *sgs,
+ struct scatterlist *sgd, unsigned int todo)
{
- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, dev->addr_in);
- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, dev->count / 4);
- CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, dev->addr_out);
+ CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, sg_dma_address(sgs));
+ CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, todo);
+ CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, sg_dma_address(sgd));
CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START |
_SBF(RK_CRYPTO_BLOCK_START, 16));
}
-static int rk_set_data_start(struct rk_crypto_info *dev)
+static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
{
- int err;
- struct skcipher_request *req =
- skcipher_request_cast(dev->async_req);
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 ivsize = crypto_skcipher_ivsize(tfm);
- u8 *src_last_blk = page_address(sg_page(dev->sg_src)) +
- dev->sg_src->offset + dev->sg_src->length - ivsize;
-
- /* Store the iv that need to be updated in chain mode.
- * And update the IV buffer to contain the next IV for decryption mode.
- */
- if (ctx->mode & RK_CRYPTO_DEC) {
- memcpy(ctx->iv, src_last_blk, ivsize);
- sg_pcopy_to_buffer(dev->first, dev->src_nents, req->iv,
- ivsize, dev->total - ivsize);
- }
-
- err = dev->load_data(dev, dev->sg_src, dev->sg_dst);
- if (!err)
- crypto_dma_start(dev);
- return err;
-}
-
-static int rk_ablk_start(struct rk_crypto_info *dev)
-{
- struct skcipher_request *req =
- skcipher_request_cast(dev->async_req);
- unsigned long flags;
+ struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq);
+ struct scatterlist *sgs, *sgd;
int err = 0;
+ int ivsize = crypto_skcipher_ivsize(tfm);
+ int offset;
+ u8 iv[AES_BLOCK_SIZE];
+ u8 biv[AES_BLOCK_SIZE];
+ u8 *ivtouse = areq->iv;
+ unsigned int len = areq->cryptlen;
+ unsigned int todo;
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+ struct rk_crypto_info *rkc = rctx->dev;
- dev->left_bytes = req->cryptlen;
- dev->total = req->cryptlen;
- dev->sg_src = req->src;
- dev->first = req->src;
- dev->src_nents = sg_nents(req->src);
- dev->sg_dst = req->dst;
- dev->dst_nents = sg_nents(req->dst);
- dev->aligned = 1;
-
- spin_lock_irqsave(&dev->lock, flags);
- rk_ablk_hw_init(dev);
- err = rk_set_data_start(dev);
- spin_unlock_irqrestore(&dev->lock, flags);
- return err;
-}
+ err = pm_runtime_resume_and_get(rkc->dev);
+ if (err)
+ return err;
-static void rk_iv_copyback(struct rk_crypto_info *dev)
-{
- struct skcipher_request *req =
- skcipher_request_cast(dev->async_req);
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 ivsize = crypto_skcipher_ivsize(tfm);
+ algt->stat_req++;
+ rkc->nreq++;
- /* Update the IV buffer to contain the next IV for encryption mode. */
- if (!(ctx->mode & RK_CRYPTO_DEC)) {
- if (dev->aligned) {
- memcpy(req->iv, sg_virt(dev->sg_dst) +
- dev->sg_dst->length - ivsize, ivsize);
- } else {
- memcpy(req->iv, dev->addr_vir +
- dev->count - ivsize, ivsize);
+ ivsize = crypto_skcipher_ivsize(tfm);
+ if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
+ if (rctx->mode & RK_CRYPTO_DEC) {
+ offset = areq->cryptlen - ivsize;
+ scatterwalk_map_and_copy(rctx->backup_iv, areq->src,
+ offset, ivsize, 0);
}
}
-}
-static void rk_update_iv(struct rk_crypto_info *dev)
-{
- struct skcipher_request *req =
- skcipher_request_cast(dev->async_req);
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 ivsize = crypto_skcipher_ivsize(tfm);
- u8 *new_iv = NULL;
+ sgs = areq->src;
+ sgd = areq->dst;
- if (ctx->mode & RK_CRYPTO_DEC) {
- new_iv = ctx->iv;
- } else {
- new_iv = page_address(sg_page(dev->sg_dst)) +
- dev->sg_dst->offset + dev->sg_dst->length - ivsize;
+ while (sgs && sgd && len) {
+ if (!sgs->length) {
+ sgs = sg_next(sgs);
+ sgd = sg_next(sgd);
+ continue;
+ }
+ if (rctx->mode & RK_CRYPTO_DEC) {
+ /* we backup last block of source to be used as IV at next step */
+ offset = sgs->length - ivsize;
+ scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0);
+ }
+ if (sgs == sgd) {
+ err = dma_map_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL);
+ if (err <= 0) {
+ err = -EINVAL;
+ goto theend_iv;
+ }
+ } else {
+ err = dma_map_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE);
+ if (err <= 0) {
+ err = -EINVAL;
+ goto theend_iv;
+ }
+ err = dma_map_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE);
+ if (err <= 0) {
+ err = -EINVAL;
+ goto theend_sgs;
+ }
+ }
+ err = 0;
+ rk_cipher_hw_init(rkc, areq);
+ if (ivsize) {
+ if (ivsize == DES_BLOCK_SIZE)
+ memcpy_toio(rkc->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize);
+ else
+ memcpy_toio(rkc->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize);
+ }
+ reinit_completion(&rkc->complete);
+ rkc->status = 0;
+
+ todo = min(sg_dma_len(sgs), len);
+ len -= todo;
+ crypto_dma_start(rkc, sgs, sgd, todo / 4);
+ wait_for_completion_interruptible_timeout(&rkc->complete,
+ msecs_to_jiffies(2000));
+ if (!rkc->status) {
+ dev_err(rkc->dev, "DMA timeout\n");
+ err = -EFAULT;
+ goto theend;
+ }
+ if (sgs == sgd) {
+ dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL);
+ } else {
+ dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE);
+ dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE);
+ }
+ if (rctx->mode & RK_CRYPTO_DEC) {
+ memcpy(iv, biv, ivsize);
+ ivtouse = iv;
+ } else {
+ offset = sgd->length - ivsize;
+ scatterwalk_map_and_copy(iv, sgd, offset, ivsize, 0);
+ ivtouse = iv;
+ }
+ sgs = sg_next(sgs);
+ sgd = sg_next(sgd);
}
- if (ivsize == DES_BLOCK_SIZE)
- memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize);
- else if (ivsize == AES_BLOCK_SIZE)
- memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize);
-}
-
-/* return:
- * true some err was occurred
- * fault no err, continue
- */
-static int rk_ablk_rx(struct rk_crypto_info *dev)
-{
- int err = 0;
- struct skcipher_request *req =
- skcipher_request_cast(dev->async_req);
-
- dev->unload_data(dev);
- if (!dev->aligned) {
- if (!sg_pcopy_from_buffer(req->dst, dev->dst_nents,
- dev->addr_vir, dev->count,
- dev->total - dev->left_bytes -
- dev->count)) {
- err = -EINVAL;
- goto out_rx;
+ if (areq->iv && ivsize > 0) {
+ offset = areq->cryptlen - ivsize;
+ if (rctx->mode & RK_CRYPTO_DEC) {
+ memcpy(areq->iv, rctx->backup_iv, ivsize);
+ memzero_explicit(rctx->backup_iv, ivsize);
+ } else {
+ scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
+ ivsize, 0);
}
}
- if (dev->left_bytes) {
- rk_update_iv(dev);
- if (dev->aligned) {
- if (sg_is_last(dev->sg_src)) {
- dev_err(dev->dev, "[%s:%d] Lack of data\n",
- __func__, __LINE__);
- err = -ENOMEM;
- goto out_rx;
- }
- dev->sg_src = sg_next(dev->sg_src);
- dev->sg_dst = sg_next(dev->sg_dst);
- }
- err = rk_set_data_start(dev);
+
+theend:
+ pm_runtime_put_autosuspend(rkc->dev);
+
+ local_bh_disable();
+ crypto_finalize_skcipher_request(engine, areq, err);
+ local_bh_enable();
+ return 0;
+
+theend_sgs:
+ if (sgs == sgd) {
+ dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL);
} else {
- rk_iv_copyback(dev);
- /* here show the calculation is over without any err */
- dev->complete(dev->async_req, 0);
- tasklet_schedule(&dev->queue_task);
+ dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE);
+ dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE);
}
-out_rx:
+theend_iv:
return err;
}
-static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
+static int rk_cipher_tfm_init(struct crypto_skcipher *tfm)
{
struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+ const char *name = crypto_tfm_alg_name(&tfm->base);
struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
- struct rk_crypto_tmp *algt;
+ struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
- algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+ ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->fallback_tfm)) {
+ dev_err(algt->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
+ name, PTR_ERR(ctx->fallback_tfm));
+ return PTR_ERR(ctx->fallback_tfm);
+ }
+
+ tfm->reqsize = sizeof(struct rk_cipher_rctx) +
+ crypto_skcipher_reqsize(ctx->fallback_tfm);
- ctx->dev = algt->dev;
- ctx->dev->align_size = crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)) + 1;
- ctx->dev->start = rk_ablk_start;
- ctx->dev->update = rk_ablk_rx;
- ctx->dev->complete = rk_crypto_complete;
- ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL);
+ ctx->enginectx.op.do_one_request = rk_cipher_run;
- return ctx->dev->addr_vir ? ctx->dev->enable_clk(ctx->dev) : -ENOMEM;
+ return 0;
}
-static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
+static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm)
{
struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
- free_page((unsigned long)ctx->dev->addr_vir);
- ctx->dev->disable_clk(ctx->dev);
+ memzero_explicit(ctx->key, ctx->keylen);
+ crypto_free_skcipher(ctx->fallback_tfm);
}
struct rk_crypto_tmp rk_ecb_aes_alg = {
- .type = ALG_TYPE_CIPHER,
+ .type = CRYPTO_ALG_TYPE_SKCIPHER,
.alg.skcipher = {
.base.cra_name = "ecb(aes)",
.base.cra_driver_name = "ecb-aes-rk",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
.base.cra_alignmask = 0x0f,
.base.cra_module = THIS_MODULE,
- .init = rk_ablk_init_tfm,
- .exit = rk_ablk_exit_tfm,
+ .init = rk_cipher_tfm_init,
+ .exit = rk_cipher_tfm_exit,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = rk_aes_setkey,
@@ -423,19 +478,19 @@ struct rk_crypto_tmp rk_ecb_aes_alg = {
};
struct rk_crypto_tmp rk_cbc_aes_alg = {
- .type = ALG_TYPE_CIPHER,
+ .type = CRYPTO_ALG_TYPE_SKCIPHER,
.alg.skcipher = {
.base.cra_name = "cbc(aes)",
.base.cra_driver_name = "cbc-aes-rk",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
.base.cra_alignmask = 0x0f,
.base.cra_module = THIS_MODULE,
- .init = rk_ablk_init_tfm,
- .exit = rk_ablk_exit_tfm,
+ .init = rk_cipher_tfm_init,
+ .exit = rk_cipher_tfm_exit,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
@@ -446,19 +501,19 @@ struct rk_crypto_tmp rk_cbc_aes_alg = {
};
struct rk_crypto_tmp rk_ecb_des_alg = {
- .type = ALG_TYPE_CIPHER,
+ .type = CRYPTO_ALG_TYPE_SKCIPHER,
.alg.skcipher = {
.base.cra_name = "ecb(des)",
.base.cra_driver_name = "ecb-des-rk",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
.base.cra_blocksize = DES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
.base.cra_alignmask = 0x07,
.base.cra_module = THIS_MODULE,
- .init = rk_ablk_init_tfm,
- .exit = rk_ablk_exit_tfm,
+ .init = rk_cipher_tfm_init,
+ .exit = rk_cipher_tfm_exit,
.min_keysize = DES_KEY_SIZE,
.max_keysize = DES_KEY_SIZE,
.setkey = rk_des_setkey,
@@ -468,19 +523,19 @@ struct rk_crypto_tmp rk_ecb_des_alg = {
};
struct rk_crypto_tmp rk_cbc_des_alg = {
- .type = ALG_TYPE_CIPHER,
+ .type = CRYPTO_ALG_TYPE_SKCIPHER,
.alg.skcipher = {
.base.cra_name = "cbc(des)",
.base.cra_driver_name = "cbc-des-rk",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
.base.cra_blocksize = DES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
.base.cra_alignmask = 0x07,
.base.cra_module = THIS_MODULE,
- .init = rk_ablk_init_tfm,
- .exit = rk_ablk_exit_tfm,
+ .init = rk_cipher_tfm_init,
+ .exit = rk_cipher_tfm_exit,
.min_keysize = DES_KEY_SIZE,
.max_keysize = DES_KEY_SIZE,
.ivsize = DES_BLOCK_SIZE,
@@ -491,19 +546,19 @@ struct rk_crypto_tmp rk_cbc_des_alg = {
};
struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
- .type = ALG_TYPE_CIPHER,
+ .type = CRYPTO_ALG_TYPE_SKCIPHER,
.alg.skcipher = {
.base.cra_name = "ecb(des3_ede)",
.base.cra_driver_name = "ecb-des3-ede-rk",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
.base.cra_blocksize = DES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
.base.cra_alignmask = 0x07,
.base.cra_module = THIS_MODULE,
- .init = rk_ablk_init_tfm,
- .exit = rk_ablk_exit_tfm,
+ .init = rk_cipher_tfm_init,
+ .exit = rk_cipher_tfm_exit,
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.setkey = rk_tdes_setkey,
@@ -513,19 +568,19 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
};
struct rk_crypto_tmp rk_cbc_des3_ede_alg = {
- .type = ALG_TYPE_CIPHER,
+ .type = CRYPTO_ALG_TYPE_SKCIPHER,
.alg.skcipher = {
.base.cra_name = "cbc(des3_ede)",
.base.cra_driver_name = "cbc-des3-ede-rk",
.base.cra_priority = 300,
- .base.cra_flags = CRYPTO_ALG_ASYNC,
+ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
.base.cra_blocksize = DES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct rk_cipher_ctx),
.base.cra_alignmask = 0x07,
.base.cra_module = THIS_MODULE,
- .init = rk_ablk_init_tfm,
- .exit = rk_ablk_exit_tfm,
+ .init = rk_cipher_tfm_init,
+ .exit = rk_cipher_tfm_exit,
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.ivsize = DES_BLOCK_SIZE,
diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index 59ef541123ae..59638dfce573 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -1400,7 +1400,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
/* wait end of process */
err = stm32_cryp_wait_output(cryp);
if (err) {
- dev_err(cryp->dev, "Timeout (wite ccm padded data)\n");
+ dev_err(cryp->dev, "Timeout (write ccm padded data)\n");
return stm32_cryp_finish_req(cryp, err);
}
@@ -1440,7 +1440,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
/* h) wait for completion */
err = stm32_cryp_wait_busy(cryp);
if (err)
- dev_err(cryp->dev, "Timeout (wite ccm padded data)\n");
+ dev_err(cryp->dev, "Timeout (write ccm padded data)\n");
/* i) run the he normal Final phase */
stm32_cryp_finish_req(cryp, err);
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index c9ad6c213090..71db6450b6aa 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1999,7 +1999,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
/* Buffer up to one whole block */
nents = sg_nents_for_len(areq->src, nbytes);
if (nents < 0) {
- dev_err(ctx->dev, "Invalid number of src SG.\n");
+ dev_err(dev, "Invalid number of src SG.\n");
return nents;
}
sg_copy_to_buffer(areq->src, nents,
@@ -2040,7 +2040,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
offset = nbytes_to_hash - req_ctx->nbuf;
nents = sg_nents_for_len(areq->src, offset);
if (nents < 0) {
- dev_err(ctx->dev, "Invalid number of src SG.\n");
+ dev_err(dev, "Invalid number of src SG.\n");
return nents;
}
sg_copy_to_buffer(areq->src, nents,
@@ -2054,7 +2054,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
if (to_hash_later) {
nents = sg_nents_for_len(areq->src, nbytes);
if (nents < 0) {
- dev_err(ctx->dev, "Invalid number of src SG.\n");
+ dev_err(dev, "Invalid number of src SG.\n");
return nents;
}
sg_pcopy_to_buffer(areq->src, nents,
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 32825119e880..1a93ee355929 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -65,8 +65,8 @@ struct talitos_edesc {
dma_addr_t dma_link_tbl;
struct talitos_desc desc;
union {
- struct talitos_ptr link_tbl[0];
- u8 buf[0];
+ DECLARE_FLEX_ARRAY(struct talitos_ptr, link_tbl);
+ DECLARE_FLEX_ARRAY(u8, buf);
};
};
diff --git a/drivers/firmware/turris-mox-rwtm.c b/drivers/firmware/turris-mox-rwtm.c
index c2d34dc8ba46..6ea5789a89e2 100644
--- a/drivers/firmware/turris-mox-rwtm.c
+++ b/drivers/firmware/turris-mox-rwtm.c
@@ -528,7 +528,6 @@ static int turris_mox_rwtm_probe(struct platform_device *pdev)
rwtm->hwrng.name = DRIVER_NAME "_hwrng";
rwtm->hwrng.read = mox_hwrng_read;
rwtm->hwrng.priv = (unsigned long) rwtm;
- rwtm->hwrng.quality = 1024;
ret = devm_hwrng_register(dev, &rwtm->hwrng);
if (ret < 0) {
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index f94b43ce9a65..4bf36e53fe3e 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -53,10 +53,6 @@ MODULE_LICENSE("GPL");
EXPORT_TRACEPOINT_SYMBOL(s390_zcrypt_req);
EXPORT_TRACEPOINT_SYMBOL(s390_zcrypt_rep);
-static int zcrypt_hwrng_seed = 1;
-module_param_named(hwrng_seed, zcrypt_hwrng_seed, int, 0440);
-MODULE_PARM_DESC(hwrng_seed, "Turn on/off hwrng auto seed, default is 1 (on).");
-
DEFINE_SPINLOCK(zcrypt_list_lock);
LIST_HEAD(zcrypt_card_list);
@@ -2063,8 +2059,6 @@ int zcrypt_rng_device_add(void)
goto out;
}
zcrypt_rng_buffer_index = 0;
- if (!zcrypt_hwrng_seed)
- zcrypt_rng_dev.quality = 0;
rc = hwrng_register(&zcrypt_rng_dev);
if (rc)
goto out_free;
diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c
index 87067c3d6109..6fb5140e29b9 100644
--- a/drivers/usb/misc/chaoskey.c
+++ b/drivers/usb/misc/chaoskey.c
@@ -200,7 +200,6 @@ static int chaoskey_probe(struct usb_interface *interface,
dev->hwrng.name = dev->name ? dev->name : chaoskey_driver.name;
dev->hwrng.read = chaoskey_rng_read;
- dev->hwrng.quality = 1024;
dev->hwrng_registered = (hwrng_register(&dev->hwrng) == 0);
if (!dev->hwrng_registered)
diff --git a/include/crypto/gcm.h b/include/crypto/gcm.h
index 9d7eff04f224..fd9df607a836 100644
--- a/include/crypto/gcm.h
+++ b/include/crypto/gcm.h
@@ -3,6 +3,9 @@
#include <linux/errno.h>
+#include <crypto/aes.h>
+#include <crypto/gf128mul.h>
+
#define GCM_AES_IV_SIZE 12
#define GCM_RFC4106_IV_SIZE 8
#define GCM_RFC4543_IV_SIZE 8
@@ -60,4 +63,23 @@ static inline int crypto_ipsec_check_assoclen(unsigned int assoclen)
return 0;
}
+
+struct aesgcm_ctx {
+ be128 ghash_key;
+ struct crypto_aes_ctx aes_ctx;
+ unsigned int authsize;
+};
+
+int aesgcm_expandkey(struct aesgcm_ctx *ctx, const u8 *key,
+ unsigned int keysize, unsigned int authsize);
+
+void aesgcm_encrypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src,
+ int crypt_len, const u8 *assoc, int assoc_len,
+ const u8 iv[GCM_AES_IV_SIZE], u8 *authtag);
+
+bool __must_check aesgcm_decrypt(const struct aesgcm_ctx *ctx, u8 *dst,
+ const u8 *src, int crypt_len, const u8 *assoc,
+ int assoc_len, const u8 iv[GCM_AES_IV_SIZE],
+ const u8 *authtag);
+
#endif
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index a2339f80a615..2a97540156bb 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -14,6 +14,14 @@
#include <linux/list.h>
#include <linux/types.h>
+/*
+ * Set this if your algorithm is sync but needs a reqsize larger
+ * than MAX_SYNC_SKCIPHER_REQSIZE.
+ *
+ * Reuse bit that is specific to hash algorithms.
+ */
+#define CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE CRYPTO_ALG_OPTIONAL_KEY
+
struct aead_request;
struct rtattr;
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index ccdb05f68a75..f2c42b4111b1 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -93,7 +93,6 @@ static inline void scatterwalk_done(struct scatter_walk *walk, int out,
void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
size_t nbytes, int out);
-void *scatterwalk_map(struct scatter_walk *walk);
void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
unsigned int start, unsigned int nbytes, int out);
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index e230c7c46110..be3aedaa96dc 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -384,14 +384,14 @@ struct hisi_qp {
static inline int q_num_set(const char *val, const struct kernel_param *kp,
unsigned int device)
{
- struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
- device, NULL);
+ struct pci_dev *pdev;
u32 n, q_num;
int ret;
if (!val)
return -EINVAL;
+ pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL);
if (!pdev) {
q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2);
pr_info("No device found currently, suppose queue number is %u\n",
@@ -401,6 +401,8 @@ static inline int q_num_set(const char *val, const struct kernel_param *kp,
q_num = QM_QNUM_V1;
else
q_num = QM_QNUM_V2;
+
+ pci_dev_put(pdev);
}
ret = kstrtou32(val, 10, &n);
@@ -469,11 +471,11 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen);
int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs);
void hisi_qm_dev_err_init(struct hisi_qm *qm);
void hisi_qm_dev_err_uninit(struct hisi_qm *qm);
-int hisi_qm_diff_regs_init(struct hisi_qm *qm,
- struct dfx_diff_registers *dregs, int reg_len);
-void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len);
+int hisi_qm_regs_debugfs_init(struct hisi_qm *qm,
+ struct dfx_diff_registers *dregs, u32 reg_len);
+void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len);
void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
- struct dfx_diff_registers *dregs, int regs_len);
+ struct dfx_diff_registers *dregs, u32 regs_len);
pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
pci_channel_state_t state);
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 77c2885c4c13..8a3115516a1b 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -34,7 +34,7 @@
* @priv: Private data, for use by the RNG driver.
* @quality: Estimation of true entropy in RNG's bitstream
* (in bits of entropy per 1024 bits of input;
- * valid values: 1 to 1024, or 0 for unknown).
+ * valid values: 1 to 1024, or 0 for maximum).
*/
struct hwrng {
const char *name;
diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h
index 578b18aab821..0824fbc026a1 100644
--- a/include/uapi/linux/if_alg.h
+++ b/include/uapi/linux/if_alg.h
@@ -52,6 +52,7 @@ struct af_alg_iv {
#define ALG_SET_AEAD_ASSOCLEN 4
#define ALG_SET_AEAD_AUTHSIZE 5
#define ALG_SET_DRBG_ENTROPY 6
+#define ALG_SET_KEY_BY_KEY_SERIAL 7
/* Operations */
#define ALG_OP_DECRYPT 0
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 7e9683e9f5c6..45436bfc6dff 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -8,9 +8,18 @@ config CRYPTO_LIB_UTILS
config CRYPTO_LIB_AES
tristate
+config CRYPTO_LIB_AESGCM
+ tristate
+ select CRYPTO_LIB_AES
+ select CRYPTO_LIB_GF128MUL
+ select CRYPTO_LIB_UTILS
+
config CRYPTO_LIB_ARC4
tristate
+config CRYPTO_LIB_GF128MUL
+ tristate
+
config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
bool
help
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index c852f067ab06..6ec2d4543d9c 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -10,9 +10,14 @@ obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o
obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
libaes-y := aes.o
+obj-$(CONFIG_CRYPTO_LIB_AESGCM) += libaesgcm.o
+libaesgcm-y := aesgcm.o
+
obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
libarc4-y := arc4.o
+obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o
+
# blake2s is used by the /dev/random driver which is always builtin
obj-y += libblake2s.o
libblake2s-y := blake2s.o
diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c
new file mode 100644
index 000000000000..c632d6e17af8
--- /dev/null
+++ b/lib/crypto/aesgcm.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Minimal library implementation of GCM
+ *
+ * Copyright 2022 Google LLC
+ */
+
+#include <linux/module.h>
+
+#include <crypto/algapi.h>
+#include <crypto/gcm.h>
+#include <crypto/ghash.h>
+
+#include <asm/irqflags.h>
+
+static void aesgcm_encrypt_block(const struct crypto_aes_ctx *ctx, void *dst,
+ const void *src)
+{
+ unsigned long flags;
+
+ /*
+ * In AES-GCM, both the GHASH key derivation and the CTR mode
+ * encryption operate on known plaintext, making them susceptible to
+ * timing attacks on the encryption key. The AES library already
+ * mitigates this risk to some extent by pulling the entire S-box into
+ * the caches before doing any substitutions, but this strategy is more
+ * effective when running with interrupts disabled.
+ */
+ local_irq_save(flags);
+ aes_encrypt(ctx, dst, src);
+ local_irq_restore(flags);
+}
+
+/**
+ * aesgcm_expandkey - Expands the AES and GHASH keys for the AES-GCM key
+ * schedule
+ *
+ * @ctx: The data structure that will hold the AES-GCM key schedule
+ * @key: The AES encryption input key
+ * @keysize: The length in bytes of the input key
+ * @authsize: The size in bytes of the GCM authentication tag
+ *
+ * Returns: 0 on success, or -EINVAL if @keysize or @authsize contain values
+ * that are not permitted by the GCM specification.
+ */
+int aesgcm_expandkey(struct aesgcm_ctx *ctx, const u8 *key,
+ unsigned int keysize, unsigned int authsize)
+{
+ u8 kin[AES_BLOCK_SIZE] = {};
+ int ret;
+
+ ret = crypto_gcm_check_authsize(authsize) ?:
+ aes_expandkey(&ctx->aes_ctx, key, keysize);
+ if (ret)
+ return ret;
+
+ ctx->authsize = authsize;
+ aesgcm_encrypt_block(&ctx->aes_ctx, &ctx->ghash_key, kin);
+
+ return 0;
+}
+EXPORT_SYMBOL(aesgcm_expandkey);
+
+static void aesgcm_ghash(be128 *ghash, const be128 *key, const void *src,
+ int len)
+{
+ while (len > 0) {
+ crypto_xor((u8 *)ghash, src, min(len, GHASH_BLOCK_SIZE));
+ gf128mul_lle(ghash, key);
+
+ src += GHASH_BLOCK_SIZE;
+ len -= GHASH_BLOCK_SIZE;
+ }
+}
+
+static void aesgcm_mac(const struct aesgcm_ctx *ctx, const u8 *src, int src_len,
+ const u8 *assoc, int assoc_len, __be32 *ctr, u8 *authtag)
+{
+ be128 tail = { cpu_to_be64(assoc_len * 8), cpu_to_be64(src_len * 8) };
+ u8 buf[AES_BLOCK_SIZE];
+ be128 ghash = {};
+
+ aesgcm_ghash(&ghash, &ctx->ghash_key, assoc, assoc_len);
+ aesgcm_ghash(&ghash, &ctx->ghash_key, src, src_len);
+ aesgcm_ghash(&ghash, &ctx->ghash_key, &tail, sizeof(tail));
+
+ ctr[3] = cpu_to_be32(1);
+ aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr);
+ crypto_xor_cpy(authtag, buf, (u8 *)&ghash, ctx->authsize);
+
+ memzero_explicit(&ghash, sizeof(ghash));
+ memzero_explicit(buf, sizeof(buf));
+}
+
+static void aesgcm_crypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src,
+ int len, __be32 *ctr)
+{
+ u8 buf[AES_BLOCK_SIZE];
+ unsigned int n = 2;
+
+ while (len > 0) {
+ /*
+ * The counter increment below must not result in overflow or
+ * carry into the next 32-bit word, as this could result in
+ * inadvertent IV reuse, which must be avoided at all cost for
+ * stream ciphers such as AES-CTR. Given the range of 'int
+ * len', this cannot happen, so no explicit test is necessary.
+ */
+ ctr[3] = cpu_to_be32(n++);
+ aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr);
+ crypto_xor_cpy(dst, src, buf, min(len, AES_BLOCK_SIZE));
+
+ dst += AES_BLOCK_SIZE;
+ src += AES_BLOCK_SIZE;
+ len -= AES_BLOCK_SIZE;
+ }
+ memzero_explicit(buf, sizeof(buf));
+}
+
+/**
+ * aesgcm_encrypt - Perform AES-GCM encryption on a block of data
+ *
+ * @ctx: The AES-GCM key schedule
+ * @dst: Pointer to the ciphertext output buffer
+ * @src: Pointer the plaintext (may equal @dst for encryption in place)
+ * @crypt_len: The size in bytes of the plaintext and ciphertext.
+ * @assoc: Pointer to the associated data,
+ * @assoc_len: The size in bytes of the associated data
+ * @iv: The initialization vector (IV) to use for this block of data
+ * (must be 12 bytes in size as per the GCM spec recommendation)
+ * @authtag: The address of the buffer in memory where the authentication
+ * tag should be stored. The buffer is assumed to have space for
+ * @ctx->authsize bytes.
+ */
+void aesgcm_encrypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src,
+ int crypt_len, const u8 *assoc, int assoc_len,
+ const u8 iv[GCM_AES_IV_SIZE], u8 *authtag)
+{
+ __be32 ctr[4];
+
+ memcpy(ctr, iv, GCM_AES_IV_SIZE);
+
+ aesgcm_crypt(ctx, dst, src, crypt_len, ctr);
+ aesgcm_mac(ctx, dst, crypt_len, assoc, assoc_len, ctr, authtag);
+}
+EXPORT_SYMBOL(aesgcm_encrypt);
+
+/**
+ * aesgcm_decrypt - Perform AES-GCM decryption on a block of data
+ *
+ * @ctx: The AES-GCM key schedule
+ * @dst: Pointer to the plaintext output buffer
+ * @src: Pointer the ciphertext (may equal @dst for decryption in place)
+ * @crypt_len: The size in bytes of the plaintext and ciphertext.
+ * @assoc: Pointer to the associated data,
+ * @assoc_len: The size in bytes of the associated data
+ * @iv: The initialization vector (IV) to use for this block of data
+ * (must be 12 bytes in size as per the GCM spec recommendation)
+ * @authtag: The address of the buffer in memory where the authentication
+ * tag is stored.
+ *
+ * Returns: true on success, or false if the ciphertext failed authentication.
+ * On failure, no plaintext will be returned.
+ */
+bool __must_check aesgcm_decrypt(const struct aesgcm_ctx *ctx, u8 *dst,
+ const u8 *src, int crypt_len, const u8 *assoc,
+ int assoc_len, const u8 iv[GCM_AES_IV_SIZE],
+ const u8 *authtag)
+{
+ u8 tagbuf[AES_BLOCK_SIZE];
+ __be32 ctr[4];
+
+ memcpy(ctr, iv, GCM_AES_IV_SIZE);
+
+ aesgcm_mac(ctx, src, crypt_len, assoc, assoc_len, ctr, tagbuf);
+ if (crypto_memneq(authtag, tagbuf, ctx->authsize)) {
+ memzero_explicit(tagbuf, sizeof(tagbuf));
+ return false;
+ }
+ aesgcm_crypt(ctx, dst, src, crypt_len, ctr);
+ return true;
+}
+EXPORT_SYMBOL(aesgcm_decrypt);
+
+MODULE_DESCRIPTION("Generic AES-GCM library");
+MODULE_AUTHOR("Ard Biesheuvel <[email protected]>");
+MODULE_LICENSE("GPL");
+
+#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+
+/*
+ * Test code below. Vectors taken from crypto/testmgr.h
+ */
+
+static const u8 __initconst ctext0[16] =
+ "\x58\xe2\xfc\xce\xfa\x7e\x30\x61"
+ "\x36\x7f\x1d\x57\xa4\xe7\x45\x5a";
+
+static const u8 __initconst ptext1[16];
+
+static const u8 __initconst ctext1[32] =
+ "\x03\x88\xda\xce\x60\xb6\xa3\x92"
+ "\xf3\x28\xc2\xb9\x71\xb2\xfe\x78"
+ "\xab\x6e\x47\xd4\x2c\xec\x13\xbd"
+ "\xf5\x3a\x67\xb2\x12\x57\xbd\xdf";
+
+static const u8 __initconst ptext2[64] =
+ "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+ "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+ "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+ "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+ "\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
+
+static const u8 __initconst ctext2[80] =
+ "\x42\x83\x1e\xc2\x21\x77\x74\x24"
+ "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c"
+ "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0"
+ "\x35\xc1\x7e\x23\x29\xac\xa1\x2e"
+ "\x21\xd5\x14\xb2\x54\x66\x93\x1c"
+ "\x7d\x8f\x6a\x5a\xac\x84\xaa\x05"
+ "\x1b\xa3\x0b\x39\x6a\x0a\xac\x97"
+ "\x3d\x58\xe0\x91\x47\x3f\x59\x85"
+ "\x4d\x5c\x2a\xf3\x27\xcd\x64\xa6"
+ "\x2c\xf3\x5a\xbd\x2b\xa6\xfa\xb4";
+
+static const u8 __initconst ptext3[60] =
+ "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+ "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+ "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+ "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+ "\xba\x63\x7b\x39";
+
+static const u8 __initconst ctext3[76] =
+ "\x42\x83\x1e\xc2\x21\x77\x74\x24"
+ "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c"
+ "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0"
+ "\x35\xc1\x7e\x23\x29\xac\xa1\x2e"
+ "\x21\xd5\x14\xb2\x54\x66\x93\x1c"
+ "\x7d\x8f\x6a\x5a\xac\x84\xaa\x05"
+ "\x1b\xa3\x0b\x39\x6a\x0a\xac\x97"
+ "\x3d\x58\xe0\x91"
+ "\x5b\xc9\x4f\xbc\x32\x21\xa5\xdb"
+ "\x94\xfa\xe9\x5a\xe7\x12\x1a\x47";
+
+static const u8 __initconst ctext4[16] =
+ "\xcd\x33\xb2\x8a\xc7\x73\xf7\x4b"
+ "\xa0\x0e\xd1\xf3\x12\x57\x24\x35";
+
+static const u8 __initconst ctext5[32] =
+ "\x98\xe7\x24\x7c\x07\xf0\xfe\x41"
+ "\x1c\x26\x7e\x43\x84\xb0\xf6\x00"
+ "\x2f\xf5\x8d\x80\x03\x39\x27\xab"
+ "\x8e\xf4\xd4\x58\x75\x14\xf0\xfb";
+
+static const u8 __initconst ptext6[64] =
+ "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+ "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+ "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+ "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+ "\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
+
+static const u8 __initconst ctext6[80] =
+ "\x39\x80\xca\x0b\x3c\x00\xe8\x41"
+ "\xeb\x06\xfa\xc4\x87\x2a\x27\x57"
+ "\x85\x9e\x1c\xea\xa6\xef\xd9\x84"
+ "\x62\x85\x93\xb4\x0c\xa1\xe1\x9c"
+ "\x7d\x77\x3d\x00\xc1\x44\xc5\x25"
+ "\xac\x61\x9d\x18\xc8\x4a\x3f\x47"
+ "\x18\xe2\x44\x8b\x2f\xe3\x24\xd9"
+ "\xcc\xda\x27\x10\xac\xad\xe2\x56"
+ "\x99\x24\xa7\xc8\x58\x73\x36\xbf"
+ "\xb1\x18\x02\x4d\xb8\x67\x4a\x14";
+
+static const u8 __initconst ctext7[16] =
+ "\x53\x0f\x8a\xfb\xc7\x45\x36\xb9"
+ "\xa9\x63\xb4\xf1\xc4\xcb\x73\x8b";
+
+static const u8 __initconst ctext8[32] =
+ "\xce\xa7\x40\x3d\x4d\x60\x6b\x6e"
+ "\x07\x4e\xc5\xd3\xba\xf3\x9d\x18"
+ "\xd0\xd1\xc8\xa7\x99\x99\x6b\xf0"
+ "\x26\x5b\x98\xb5\xd4\x8a\xb9\x19";
+
+static const u8 __initconst ptext9[64] =
+ "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+ "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+ "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+ "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+ "\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
+
+static const u8 __initconst ctext9[80] =
+ "\x52\x2d\xc1\xf0\x99\x56\x7d\x07"
+ "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d"
+ "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9"
+ "\x75\x98\xa2\xbd\x25\x55\xd1\xaa"
+ "\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d"
+ "\xa7\xb0\x8b\x10\x56\x82\x88\x38"
+ "\xc5\xf6\x1e\x63\x93\xba\x7a\x0a"
+ "\xbc\xc9\xf6\x62\x89\x80\x15\xad"
+ "\xb0\x94\xda\xc5\xd9\x34\x71\xbd"
+ "\xec\x1a\x50\x22\x70\xe3\xcc\x6c";
+
+static const u8 __initconst ptext10[60] =
+ "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+ "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+ "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+ "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+ "\xba\x63\x7b\x39";
+
+static const u8 __initconst ctext10[76] =
+ "\x52\x2d\xc1\xf0\x99\x56\x7d\x07"
+ "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d"
+ "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9"
+ "\x75\x98\xa2\xbd\x25\x55\xd1\xaa"
+ "\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d"
+ "\xa7\xb0\x8b\x10\x56\x82\x88\x38"
+ "\xc5\xf6\x1e\x63\x93\xba\x7a\x0a"
+ "\xbc\xc9\xf6\x62"
+ "\x76\xfc\x6e\xce\x0f\x4e\x17\x68"
+ "\xcd\xdf\x88\x53\xbb\x2d\x55\x1b";
+
+static const u8 __initconst ptext11[60] =
+ "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+ "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+ "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+ "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+ "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+ "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+ "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+ "\xba\x63\x7b\x39";
+
+static const u8 __initconst ctext11[76] =
+ "\x39\x80\xca\x0b\x3c\x00\xe8\x41"
+ "\xeb\x06\xfa\xc4\x87\x2a\x27\x57"
+ "\x85\x9e\x1c\xea\xa6\xef\xd9\x84"
+ "\x62\x85\x93\xb4\x0c\xa1\xe1\x9c"
+ "\x7d\x77\x3d\x00\xc1\x44\xc5\x25"
+ "\xac\x61\x9d\x18\xc8\x4a\x3f\x47"
+ "\x18\xe2\x44\x8b\x2f\xe3\x24\xd9"
+ "\xcc\xda\x27\x10"
+ "\x25\x19\x49\x8e\x80\xf1\x47\x8f"
+ "\x37\xba\x55\xbd\x6d\x27\x61\x8c";
+
+static const u8 __initconst ptext12[719] =
+ "\x42\xc1\xcc\x08\x48\x6f\x41\x3f"
+ "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0"
+ "\x58\x83\xf0\xc3\x70\x14\xc0\x5b"
+ "\x3f\xec\x1d\x25\x3c\x51\xd2\x03"
+ "\xcf\x59\x74\x1f\xb2\x85\xb4\x07"
+ "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb"
+ "\xaf\x08\x44\xbd\x6f\x91\x15\xe1"
+ "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50"
+ "\x59\xa9\x97\xab\xbb\x0e\x74\x5c"
+ "\x00\xa4\x43\x54\x04\x54\x9b\x3b"
+ "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08"
+ "\xae\xe6\x10\x3f\x32\x65\xd1\xfc"
+ "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3"
+ "\x35\x23\xf4\x20\x41\xd4\xad\x82"
+ "\x8b\xa4\xad\x96\x1c\x20\x53\xbe"
+ "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72"
+ "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7"
+ "\xad\x49\x3a\xae\x98\xce\xa6\x66"
+ "\x10\x30\x90\x8c\x55\x83\xd7\x7c"
+ "\x8b\xe6\x53\xde\xd2\x6e\x18\x21"
+ "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73"
+ "\x57\xcc\x89\x09\x75\x9b\x78\x70"
+ "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5"
+ "\xfa\x70\x04\x70\xc6\x96\x1c\x7d"
+ "\x54\x41\x77\xa8\xe3\xb0\x7e\x96"
+ "\x82\xd9\xec\xa2\x87\x68\x55\xf9"
+ "\x8f\x9e\x73\x43\x47\x6a\x08\x36"
+ "\x93\x67\xa8\x2d\xde\xac\x41\xa9"
+ "\x5c\x4d\x73\x97\x0f\x70\x68\xfa"
+ "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9"
+ "\x78\x1f\x51\x07\xe3\x9a\x13\x4e"
+ "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7"
+ "\xab\x19\x37\xd9\xba\x76\x5e\xd2"
+ "\xf2\x53\x15\x17\x4c\x6b\x16\x9f"
+ "\x02\x66\x49\xca\x7c\x91\x05\xf2"
+ "\x45\x36\x1e\xf5\x77\xad\x1f\x46"
+ "\xa8\x13\xfb\x63\xb6\x08\x99\x63"
+ "\x82\xa2\xed\xb3\xac\xdf\x43\x19"
+ "\x45\xea\x78\x73\xd9\xb7\x39\x11"
+ "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81"
+ "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79"
+ "\xa4\x47\x7d\x80\x20\x26\xfd\x63"
+ "\x0a\xc7\x7e\x6d\x75\x47\xff\x76"
+ "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b"
+ "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1"
+ "\x54\x03\xa4\x09\x0c\x37\x7a\x15"
+ "\x23\x27\x5b\x8b\x4b\xa5\x64\x97"
+ "\xae\x4a\x50\x73\x1f\x66\x1c\x5c"
+ "\x03\x25\x3c\x8d\x48\x58\x71\x34"
+ "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5"
+ "\xb6\x19\x2b\x84\x2a\x20\xd1\xea"
+ "\x80\x6f\x96\x0e\x05\x62\xc7\x78"
+ "\x87\x79\x60\x38\x46\xb4\x25\x57"
+ "\x6e\x16\x63\xf8\xad\x6e\xd7\x42"
+ "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a"
+ "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22"
+ "\x86\x5c\x74\x3a\xeb\x24\x26\xc7"
+ "\x09\xfc\x91\x96\x47\x87\x4f\x1a"
+ "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24"
+ "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a"
+ "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5"
+ "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb"
+ "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe"
+ "\x0b\x63\xde\x87\x42\x79\x8a\x68"
+ "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f"
+ "\x9d\xd1\xc7\x45\x90\x08\xc9\x83"
+ "\xe9\x83\x84\xcb\x28\x69\x09\x69"
+ "\xce\x99\x46\x00\x54\xcb\xd8\x38"
+ "\xf9\x53\x4a\xbf\x31\xce\x57\x15"
+ "\x33\xfa\x96\x04\x33\x42\xe3\xc0"
+ "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6"
+ "\x19\x95\xd0\x0e\x82\x07\x63\xf9"
+ "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9"
+ "\xb5\x9f\x23\x28\x60\xe7\x20\x51"
+ "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2"
+ "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb"
+ "\x78\xc6\x91\x22\x40\x91\x80\xbe"
+ "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9"
+ "\x67\x10\xa4\x83\x98\x79\x23\xe7"
+ "\x92\xda\xa9\x22\x16\xb1\xe7\x78"
+ "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37"
+ "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9"
+ "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d"
+ "\x48\x11\x06\xbb\x2d\xf2\x63\x88"
+ "\x3f\x73\x09\xe2\x45\x56\x31\x51"
+ "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9"
+ "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66"
+ "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23"
+ "\x59\xfa\xfa\xaa\x44\x04\x01\xa7"
+ "\xa4\x78\xdb\x74\x3d\x8b\xb5";
+
+static const u8 __initconst ctext12[735] =
+ "\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20"
+ "\xbb\xb1\x12\x7f\x41\xea\xb3\xc0"
+ "\xa2\xb4\x37\x19\x11\x58\xb6\x0b"
+ "\x4c\x1d\x38\x05\x54\xd1\x16\x73"
+ "\x8e\x1c\x20\x90\xa2\x9a\xb7\x74"
+ "\x47\xe6\xd8\xfc\x18\x3a\xb4\xea"
+ "\xd5\x16\x5a\x2c\x53\x01\x46\xb3"
+ "\x18\x33\x74\x6c\x50\xf2\xe8\xc0"
+ "\x73\xda\x60\x22\xeb\xe3\xe5\x9b"
+ "\x20\x93\x6c\x4b\x37\x99\xb8\x23"
+ "\x3b\x4e\xac\xe8\x5b\xe8\x0f\xb7"
+ "\xc3\x8f\xfb\x4a\x37\xd9\x39\x95"
+ "\x34\xf1\xdb\x8f\x71\xd9\xc7\x0b"
+ "\x02\xf1\x63\xfc\x9b\xfc\xc5\xab"
+ "\xb9\x14\x13\x21\xdf\xce\xaa\x88"
+ "\x44\x30\x1e\xce\x26\x01\x92\xf8"
+ "\x9f\x00\x4b\x0c\x4b\xf7\x5f\xe0"
+ "\x89\xca\x94\x66\x11\x21\x97\xca"
+ "\x3e\x83\x74\x2d\xdb\x4d\x11\xeb"
+ "\x97\xc2\x14\xff\x9e\x1e\xa0\x6b"
+ "\x08\xb4\x31\x2b\x85\xc6\x85\x6c"
+ "\x90\xec\x39\xc0\xec\xb3\xb5\x4e"
+ "\xf3\x9c\xe7\x83\x3a\x77\x0a\xf4"
+ "\x56\xfe\xce\x18\x33\x6d\x0b\x2d"
+ "\x33\xda\xc8\x05\x5c\xb4\x09\x2a"
+ "\xde\x6b\x52\x98\x01\xef\x36\x3d"
+ "\xbd\xf9\x8f\xa8\x3e\xaa\xcd\xd1"
+ "\x01\x2d\x42\x49\xc3\xb6\x84\xbb"
+ "\x48\x96\xe0\x90\x93\x6c\x48\x64"
+ "\xd4\xfa\x7f\x93\x2c\xa6\x21\xc8"
+ "\x7a\x23\x7b\xaa\x20\x56\x12\xae"
+ "\x16\x9d\x94\x0f\x54\xa1\xec\xca"
+ "\x51\x4e\xf2\x39\xf4\xf8\x5f\x04"
+ "\x5a\x0d\xbf\xf5\x83\xa1\x15\xe1"
+ "\xf5\x3c\xd8\x62\xa3\xed\x47\x89"
+ "\x85\x4c\xe5\xdb\xac\x9e\x17\x1d"
+ "\x0c\x09\xe3\x3e\x39\x5b\x4d\x74"
+ "\x0e\xf5\x34\xee\x70\x11\x4c\xfd"
+ "\xdb\x34\xb1\xb5\x10\x3f\x73\xb7"
+ "\xf5\xfa\xed\xb0\x1f\xa5\xcd\x3c"
+ "\x8d\x35\x83\xd4\x11\x44\x6e\x6c"
+ "\x5b\xe0\x0e\x69\xa5\x39\xe5\xbb"
+ "\xa9\x57\x24\x37\xe6\x1f\xdd\xcf"
+ "\x16\x2a\x13\xf9\x6a\x2d\x90\xa0"
+ "\x03\x60\x7a\xed\x69\xd5\x00\x8b"
+ "\x7e\x4f\xcb\xb9\xfa\x91\xb9\x37"
+ "\xc1\x26\xce\x90\x97\x22\x64\x64"
+ "\xc1\x72\x43\x1b\xf6\xac\xc1\x54"
+ "\x8a\x10\x9c\xdd\x8d\xd5\x8e\xb2"
+ "\xe4\x85\xda\xe0\x20\x5f\xf4\xb4"
+ "\x15\xb5\xa0\x8d\x12\x74\x49\x23"
+ "\x3a\xdf\x4a\xd3\xf0\x3b\x89\xeb"
+ "\xf8\xcc\x62\x7b\xfb\x93\x07\x41"
+ "\x61\x26\x94\x58\x70\xa6\x3c\xe4"
+ "\xff\x58\xc4\x13\x3d\xcb\x36\x6b"
+ "\x32\xe5\xb2\x6d\x03\x74\x6f\x76"
+ "\x93\x77\xde\x48\xc4\xfa\x30\x4a"
+ "\xda\x49\x80\x77\x0f\x1c\xbe\x11"
+ "\xc8\x48\xb1\xe5\xbb\xf2\x8a\xe1"
+ "\x96\x2f\x9f\xd1\x8e\x8a\x5c\xe2"
+ "\xf7\xd7\xd8\x54\xf3\x3f\xc4\x91"
+ "\xb8\xfb\x86\xdc\x46\x24\x91\x60"
+ "\x6c\x2f\xc9\x41\x37\x51\x49\x54"
+ "\x09\x81\x21\xf3\x03\x9f\x2b\xe3"
+ "\x1f\x39\x63\xaf\xf4\xd7\x53\x60"
+ "\xa7\xc7\x54\xf9\xee\xb1\xb1\x7d"
+ "\x75\x54\x65\x93\xfe\xb1\x68\x6b"
+ "\x57\x02\xf9\xbb\x0e\xf9\xf8\xbf"
+ "\x01\x12\x27\xb4\xfe\xe4\x79\x7a"
+ "\x40\x5b\x51\x4b\xdf\x38\xec\xb1"
+ "\x6a\x56\xff\x35\x4d\x42\x33\xaa"
+ "\x6f\x1b\xe4\xdc\xe0\xdb\x85\x35"
+ "\x62\x10\xd4\xec\xeb\xc5\x7e\x45"
+ "\x1c\x6f\x17\xca\x3b\x8e\x2d\x66"
+ "\x4f\x4b\x36\x56\xcd\x1b\x59\xaa"
+ "\xd2\x9b\x17\xb9\x58\xdf\x7b\x64"
+ "\x8a\xff\x3b\x9c\xa6\xb5\x48\x9e"
+ "\xaa\xe2\x5d\x09\x71\x32\x5f\xb6"
+ "\x29\xbe\xe7\xc7\x52\x7e\x91\x82"
+ "\x6b\x6d\x33\xe1\x34\x06\x36\x21"
+ "\x5e\xbe\x1e\x2f\x3e\xc1\xfb\xea"
+ "\x49\x2c\xb5\xca\xf7\xb0\x37\xea"
+ "\x1f\xed\x10\x04\xd9\x48\x0d\x1a"
+ "\x1c\xfb\xe7\x84\x0e\x83\x53\x74"
+ "\xc7\x65\xe2\x5c\xe5\xba\x73\x4c"
+ "\x0e\xe1\xb5\x11\x45\x61\x43\x46"
+ "\xaa\x25\x8f\xbd\x85\x08\xfa\x4c"
+ "\x15\xc1\xc0\xd8\xf5\xdc\x16\xbb"
+ "\x7b\x1d\xe3\x87\x57\xa7\x2a\x1d"
+ "\x38\x58\x9e\x8a\x43\xdc\x57"
+ "\xd1\x81\x7d\x2b\xe9\xff\x99\x3a"
+ "\x4b\x24\x52\x58\x55\xe1\x49\x14";
+
+static struct {
+ const u8 *ptext;
+ const u8 *ctext;
+
+ u8 key[AES_MAX_KEY_SIZE];
+ u8 iv[GCM_AES_IV_SIZE];
+ u8 assoc[20];
+
+ int klen;
+ int clen;
+ int plen;
+ int alen;
+} const aesgcm_tv[] __initconst = {
+ { /* From McGrew & Viega - http://citeseer.ist.psu.edu/656989.html */
+ .klen = 16,
+ .ctext = ctext0,
+ .clen = sizeof(ctext0),
+ }, {
+ .klen = 16,
+ .ptext = ptext1,
+ .plen = sizeof(ptext1),
+ .ctext = ctext1,
+ .clen = sizeof(ctext1),
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+ .klen = 16,
+ .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+ "\xde\xca\xf8\x88",
+ .ptext = ptext2,
+ .plen = sizeof(ptext2),
+ .ctext = ctext2,
+ .clen = sizeof(ctext2),
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+ .klen = 16,
+ .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+ "\xde\xca\xf8\x88",
+ .ptext = ptext3,
+ .plen = sizeof(ptext3),
+ .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xab\xad\xda\xd2",
+ .alen = 20,
+ .ctext = ctext3,
+ .clen = sizeof(ctext3),
+ }, {
+ .klen = 24,
+ .ctext = ctext4,
+ .clen = sizeof(ctext4),
+ }, {
+ .klen = 24,
+ .ptext = ptext1,
+ .plen = sizeof(ptext1),
+ .ctext = ctext5,
+ .clen = sizeof(ctext5),
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\xfe\xff\xe9\x92\x86\x65\x73\x1c",
+ .klen = 24,
+ .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+ "\xde\xca\xf8\x88",
+ .ptext = ptext6,
+ .plen = sizeof(ptext6),
+ .ctext = ctext6,
+ .clen = sizeof(ctext6),
+ }, {
+ .klen = 32,
+ .ctext = ctext7,
+ .clen = sizeof(ctext7),
+ }, {
+ .klen = 32,
+ .ptext = ptext1,
+ .plen = sizeof(ptext1),
+ .ctext = ctext8,
+ .clen = sizeof(ctext8),
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+ .klen = 32,
+ .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+ "\xde\xca\xf8\x88",
+ .ptext = ptext9,
+ .plen = sizeof(ptext9),
+ .ctext = ctext9,
+ .clen = sizeof(ctext9),
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+ .klen = 32,
+ .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+ "\xde\xca\xf8\x88",
+ .ptext = ptext10,
+ .plen = sizeof(ptext10),
+ .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xab\xad\xda\xd2",
+ .alen = 20,
+ .ctext = ctext10,
+ .clen = sizeof(ctext10),
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\xfe\xff\xe9\x92\x86\x65\x73\x1c",
+ .klen = 24,
+ .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+ "\xde\xca\xf8\x88",
+ .ptext = ptext11,
+ .plen = sizeof(ptext11),
+ .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+ "\xab\xad\xda\xd2",
+ .alen = 20,
+ .ctext = ctext11,
+ .clen = sizeof(ctext11),
+ }, {
+ .key = "\x62\x35\xf8\x95\xfc\xa5\xeb\xf6"
+ "\x0e\x92\x12\x04\xd3\xa1\x3f\x2e"
+ "\x8b\x32\xcf\xe7\x44\xed\x13\x59"
+ "\x04\x38\x77\xb0\xb9\xad\xb4\x38",
+ .klen = 32,
+ .iv = "\x00\xff\xff\xff\xff\x00\x00\xff"
+ "\xff\xff\x00\xff",
+ .ptext = ptext12,
+ .plen = sizeof(ptext12),
+ .ctext = ctext12,
+ .clen = sizeof(ctext12),
+ }
+};
+
+static int __init libaesgcm_init(void)
+{
+ for (int i = 0; i < ARRAY_SIZE(aesgcm_tv); i++) {
+ u8 tagbuf[AES_BLOCK_SIZE];
+ int plen = aesgcm_tv[i].plen;
+ struct aesgcm_ctx ctx;
+ u8 buf[sizeof(ptext12)];
+
+ if (aesgcm_expandkey(&ctx, aesgcm_tv[i].key, aesgcm_tv[i].klen,
+ aesgcm_tv[i].clen - plen)) {
+ pr_err("aesgcm_expandkey() failed on vector %d\n", i);
+ return -ENODEV;
+ }
+
+ if (!aesgcm_decrypt(&ctx, buf, aesgcm_tv[i].ctext, plen,
+ aesgcm_tv[i].assoc, aesgcm_tv[i].alen,
+ aesgcm_tv[i].iv, aesgcm_tv[i].ctext + plen)
+ || memcmp(buf, aesgcm_tv[i].ptext, plen)) {
+ pr_err("aesgcm_decrypt() #1 failed on vector %d\n", i);
+ return -ENODEV;
+ }
+
+ /* encrypt in place */
+ aesgcm_encrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc,
+ aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf);
+ if (memcmp(buf, aesgcm_tv[i].ctext, plen)) {
+ pr_err("aesgcm_encrypt() failed on vector %d\n", i);
+ return -ENODEV;
+ }
+
+ /* decrypt in place */
+ if (!aesgcm_decrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc,
+ aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf)
+ || memcmp(buf, aesgcm_tv[i].ptext, plen)) {
+ pr_err("aesgcm_decrypt() #2 failed on vector %d\n", i);
+ return -ENODEV;
+ }
+ }
+ return 0;
+}
+module_init(libaesgcm_init);
+
+static void __exit libaesgcm_exit(void)
+{
+}
+module_exit(libaesgcm_exit);
+#endif
diff --git a/crypto/gf128mul.c b/lib/crypto/gf128mul.c
index a69ae3e6c16c..8f8c45e0cdcf 100644
--- a/crypto/gf128mul.c
+++ b/lib/crypto/gf128mul.c
@@ -146,6 +146,17 @@ static void gf128mul_x8_lle(be128 *x)
x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
}
+/* time invariant version of gf128mul_x8_lle */
+static void gf128mul_x8_lle_ti(be128 *x)
+{
+ u64 a = be64_to_cpu(x->a);
+ u64 b = be64_to_cpu(x->b);
+ u64 _tt = xda_le(b & 0xff); /* avoid table lookup */
+
+ x->b = cpu_to_be64((b >> 8) | (a << 56));
+ x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
+}
+
static void gf128mul_x8_bbe(be128 *x)
{
u64 a = be64_to_cpu(x->a);
@@ -169,38 +180,47 @@ EXPORT_SYMBOL(gf128mul_x8_ble);
void gf128mul_lle(be128 *r, const be128 *b)
{
- be128 p[8];
+ /*
+ * The p array should be aligned to twice the size of its element type,
+ * so that every even/odd pair is guaranteed to share a cacheline
+ * (assuming a cacheline size of 32 bytes or more, which is by far the
+ * most common). This ensures that each be128_xor() call in the loop
+ * takes the same amount of time regardless of the value of 'ch', which
+ * is derived from function parameter 'b', which is commonly used as a
+ * key, e.g., for GHASH. The odd array elements are all set to zero,
+ * making each be128_xor() a NOP if its associated bit in 'ch' is not
+ * set, and this is equivalent to calling be128_xor() conditionally.
+ * This approach aims to avoid leaking information about such keys
+ * through execution time variances.
+ *
+ * Unfortunately, __aligned(16) or higher does not work on x86 for
+ * variables on the stack so we need to perform the alignment by hand.
+ */
+ be128 array[16 + 3] = {};
+ be128 *p = PTR_ALIGN(&array[0], 2 * sizeof(be128));
int i;
p[0] = *r;
for (i = 0; i < 7; ++i)
- gf128mul_x_lle(&p[i + 1], &p[i]);
+ gf128mul_x_lle(&p[2 * i + 2], &p[2 * i]);
memset(r, 0, sizeof(*r));
for (i = 0;;) {
u8 ch = ((u8 *)b)[15 - i];
- if (ch & 0x80)
- be128_xor(r, r, &p[0]);
- if (ch & 0x40)
- be128_xor(r, r, &p[1]);
- if (ch & 0x20)
- be128_xor(r, r, &p[2]);
- if (ch & 0x10)
- be128_xor(r, r, &p[3]);
- if (ch & 0x08)
- be128_xor(r, r, &p[4]);
- if (ch & 0x04)
- be128_xor(r, r, &p[5]);
- if (ch & 0x02)
- be128_xor(r, r, &p[6]);
- if (ch & 0x01)
- be128_xor(r, r, &p[7]);
+ be128_xor(r, r, &p[ 0 + !(ch & 0x80)]);
+ be128_xor(r, r, &p[ 2 + !(ch & 0x40)]);
+ be128_xor(r, r, &p[ 4 + !(ch & 0x20)]);
+ be128_xor(r, r, &p[ 6 + !(ch & 0x10)]);
+ be128_xor(r, r, &p[ 8 + !(ch & 0x08)]);
+ be128_xor(r, r, &p[10 + !(ch & 0x04)]);
+ be128_xor(r, r, &p[12 + !(ch & 0x02)]);
+ be128_xor(r, r, &p[14 + !(ch & 0x01)]);
if (++i >= 16)
break;
- gf128mul_x8_lle(r);
+ gf128mul_x8_lle_ti(r); /* use the time invariant version */
}
}
EXPORT_SYMBOL(gf128mul_lle);