diff options
| author | Eric Biggers <ebiggers@kernel.org> | 2025-07-12 16:22:58 -0700 |
|---|---|---|
| committer | Eric Biggers <ebiggers@kernel.org> | 2025-07-14 11:11:29 -0700 |
| commit | 70cb6ca58fddb02e269fe743ba75d53d577b5b1c (patch) | |
| tree | fc0f7e08c047a35fbf765981731a6b24cb9fc652 /arch | |
| parent | crypto: sha1 - Use same state format as legacy drivers (diff) | |
| download | linux-70cb6ca58fddb02e269fe743ba75d53d577b5b1c.tar.gz linux-70cb6ca58fddb02e269fe743ba75d53d577b5b1c.zip | |
lib/crypto: arm/sha1: Migrate optimized code into library
Instead of exposing the arm-optimized SHA-1 code via arm-specific
crypto_shash algorithms, instead just implement the sha1_blocks()
library function. This is much simpler, it makes the SHA-1 library
functions be arm-optimized, and it fixes the longstanding issue where
the arm-optimized SHA-1 code was disabled by default. SHA-1 still
remains available through crypto_shash, but individual architectures no
longer need to handle it.
To match sha1_blocks(), change the type of the nblocks parameter of the
assembly functions from int to size_t. The assembly functions actually
already treated it as size_t.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250712232329.818226-8-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/arm/configs/exynos_defconfig | 1 | ||||
| -rw-r--r-- | arch/arm/configs/milbeaut_m10v_defconfig | 2 | ||||
| -rw-r--r-- | arch/arm/configs/multi_v7_defconfig | 2 | ||||
| -rw-r--r-- | arch/arm/configs/omap2plus_defconfig | 1 | ||||
| -rw-r--r-- | arch/arm/configs/pxa_defconfig | 1 | ||||
| -rw-r--r-- | arch/arm/crypto/Kconfig | 31 | ||||
| -rw-r--r-- | arch/arm/crypto/Makefile | 6 | ||||
| -rw-r--r-- | arch/arm/crypto/sha1-armv4-large.S | 507 | ||||
| -rw-r--r-- | arch/arm/crypto/sha1-armv7-neon.S | 634 | ||||
| -rw-r--r-- | arch/arm/crypto/sha1-ce-core.S | 123 | ||||
| -rw-r--r-- | arch/arm/crypto/sha1-ce-glue.c | 72 | ||||
| -rw-r--r-- | arch/arm/crypto/sha1_glue.c | 75 | ||||
| -rw-r--r-- | arch/arm/crypto/sha1_neon_glue.c | 83 |
13 files changed, 0 insertions, 1538 deletions
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index d58e30069304..6915c766923a 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -363,7 +363,6 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_DEV_EXYNOS_RNG=y diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index 8ebf8bd872fe..a3be0b2ede09 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -98,8 +98,6 @@ CONFIG_CRYPTO_SELFTESTS=y CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_GHASH_ARM_CE=m -CONFIG_CRYPTO_SHA1_ARM_NEON=m -CONFIG_CRYPTO_SHA1_ARM_CE=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 3fd07e864ca8..fb63f487a623 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1280,8 +1280,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_GHASH_ARM_CE=m -CONFIG_CRYPTO_SHA1_ARM_NEON=m -CONFIG_CRYPTO_SHA1_ARM_CE=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 530dfb8338c9..046467637901 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -704,7 +704,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_SECURITY=y CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_GHASH_ARM_CE=m -CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_CHACHA20_NEON=m diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index eaa44574d4a6..1a80602c1284 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -658,7 +658,6 @@ CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y -CONFIG_CRYPTO_SHA1_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_FONTS=y CONFIG_FONT_8x8=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index a18f97f1597c..1e5f3cdf691c 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -62,37 +62,6 @@ config CRYPTO_BLAKE2B_NEON much faster than the SHA-2 family and slightly faster than SHA-1. -config CRYPTO_SHA1_ARM - tristate "Hash functions: SHA-1" - select CRYPTO_SHA1 - select CRYPTO_HASH - help - SHA-1 secure hash algorithm (FIPS 180) - - Architecture: arm - -config CRYPTO_SHA1_ARM_NEON - tristate "Hash functions: SHA-1 (NEON)" - depends on KERNEL_MODE_NEON - select CRYPTO_SHA1_ARM - select CRYPTO_SHA1 - select CRYPTO_HASH - help - SHA-1 secure hash algorithm (FIPS 180) - - Architecture: arm using - - NEON (Advanced SIMD) extensions - -config CRYPTO_SHA1_ARM_CE - tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_SHA1_ARM - select CRYPTO_HASH - help - SHA-1 secure hash algorithm (FIPS 180) - - Architecture: arm using ARMv8 Crypto Extensions - config CRYPTO_AES_ARM tristate "Ciphers: AES" select CRYPTO_ALGAPI diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 78a4042d8761..4f23999ae17d 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -5,22 +5,16 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o -obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o -obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o -obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o -sha1-arm-y := sha1-armv4-large.o sha1_glue.o -sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o -sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S deleted file mode 100644 index 1c8b685149f2..000000000000 --- a/arch/arm/crypto/sha1-armv4-large.S +++ /dev/null @@ -1,507 +0,0 @@ -#define __ARM_ARCH__ __LINUX_ARM_ARCH__ -@ SPDX-License-Identifier: GPL-2.0 - -@ This code is taken from the OpenSSL project but the author (Andy Polyakov) -@ has relicensed it under the GPLv2. Therefore this program is free software; -@ you can redistribute it and/or modify it under the terms of the GNU General -@ Public License version 2 as published by the Free Software Foundation. -@ -@ The original headers, including the original license headers, are -@ included below for completeness. - -@ ==================================================================== -@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -@ project. The module is, however, dual licensed under OpenSSL and -@ CRYPTOGAMS licenses depending on where you obtain it. For further -@ details see https://www.openssl.org/~appro/cryptogams/. -@ ==================================================================== - -@ sha1_block procedure for ARMv4. -@ -@ January 2007. - -@ Size/performance trade-off -@ ==================================================================== -@ impl size in bytes comp cycles[*] measured performance -@ ==================================================================== -@ thumb 304 3212 4420 -@ armv4-small 392/+29% 1958/+64% 2250/+96% -@ armv4-compact 740/+89% 1552/+26% 1840/+22% -@ armv4-large 1420/+92% 1307/+19% 1370/+34%[***] -@ full unroll ~5100/+260% ~1260/+4% ~1300/+5% -@ ==================================================================== -@ thumb = same as 'small' but in Thumb instructions[**] and -@ with recurring code in two private functions; -@ small = detached Xload/update, loops are folded; -@ compact = detached Xload/update, 5x unroll; -@ large = interleaved Xload/update, 5x unroll; -@ full unroll = interleaved Xload/update, full unroll, estimated[!]; -@ -@ [*] Manually counted instructions in "grand" loop body. Measured -@ performance is affected by prologue and epilogue overhead, -@ i-cache availability, branch penalties, etc. -@ [**] While each Thumb instruction is twice smaller, they are not as -@ diverse as ARM ones: e.g., there are only two arithmetic -@ instructions with 3 arguments, no [fixed] rotate, addressing -@ modes are limited. As result it takes more instructions to do -@ the same job in Thumb, therefore the code is never twice as -@ small and always slower. -@ [***] which is also ~35% better than compiler generated code. Dual- -@ issue Cortex A8 core was measured to process input block in -@ ~990 cycles. - -@ August 2010. -@ -@ Rescheduling for dual-issue pipeline resulted in 13% improvement on -@ Cortex A8 core and in absolute terms ~870 cycles per input block -@ [or 13.6 cycles per byte]. - -@ February 2011. -@ -@ Profiler-assisted and platform-specific optimization resulted in 10% -@ improvement on Cortex A8 core and 12.2 cycles per byte. - -#include <linux/linkage.h> - -.text - -.align 2 -ENTRY(sha1_block_data_order) - stmdb sp!,{r4-r12,lr} - add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 - ldmia r0,{r3,r4,r5,r6,r7} -.Lloop: - ldr r8,.LK_00_19 - mov r14,sp - sub sp,sp,#15*4 - mov r5,r5,ror#30 - mov r6,r6,ror#30 - mov r7,r7,ror#30 @ [6] -.L_00_15: -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r7,r8,r7,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r5,r6 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r7,r8,r7,ror#2 @ E+=K_00_19 - eor r10,r5,r6 @ F_xx_xx - add r7,r7,r3,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r4,r10,ror#2 - add r7,r7,r9 @ E+=X[i] - eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r7,r7,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r6,r8,r6,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r4,r5 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r6,r8,r6,ror#2 @ E+=K_00_19 - eor r10,r4,r5 @ F_xx_xx - add r6,r6,r7,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r3,r10,ror#2 - add r6,r6,r9 @ E+=X[i] - eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r6,r6,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r5,r8,r5,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r3,r4 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r5,r8,r5,ror#2 @ E+=K_00_19 - eor r10,r3,r4 @ F_xx_xx - add r5,r5,r6,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r7,r10,ror#2 - add r5,r5,r9 @ E+=X[i] - eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r5,r5,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r4,r8,r4,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r7,r3 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r4,r8,r4,ror#2 @ E+=K_00_19 - eor r10,r7,r3 @ F_xx_xx - add r4,r4,r5,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r6,r10,ror#2 - add r4,r4,r9 @ E+=X[i] - eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r4,r4,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r3,r8,r3,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r6,r7 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r3,r8,r3,ror#2 @ E+=K_00_19 - eor r10,r6,r7 @ F_xx_xx - add r3,r3,r4,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r5,r10,ror#2 - add r3,r3,r9 @ E+=X[i] - eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r3,r3,r10 @ E+=F_00_19(B,C,D) - cmp r14,sp - bne .L_00_15 @ [((11+4)*5+2)*3] - sub sp,sp,#25*4 -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r7,r8,r7,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r5,r6 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r7,r8,r7,ror#2 @ E+=K_00_19 - eor r10,r5,r6 @ F_xx_xx - add r7,r7,r3,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r4,r10,ror#2 - add r7,r7,r9 @ E+=X[i] - eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r7,r7,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r6,r8,r6,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r4,r5 @ F_xx_xx - mov r9,r9,ror#31 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r3,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r6,r6,r9 @ E+=X[i] - eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) - add r6,r6,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r5,r8,r5,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r3,r4 @ F_xx_xx - mov r9,r9,ror#31 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r7,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r5,r5,r9 @ E+=X[i] - eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) - add r5,r5,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r4,r8,r4,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r7,r3 @ F_xx_xx - mov r9,r9,ror#31 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r6,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r4,r4,r9 @ E+=X[i] - eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) - add r4,r4,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r3,r8,r3,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r6,r7 @ F_xx_xx - mov r9,r9,ror#31 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r5,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r3,r3,r9 @ E+=X[i] - eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) - add r3,r3,r10 @ E+=F_00_19(B,C,D) - - ldr r8,.LK_20_39 @ [+15+16*4] - cmn sp,#0 @ [+3], clear carry to denote 20_39 -.L_20_39_or_60_79: - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r7,r8,r7,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r5,r6 @ F_xx_xx - mov r9,r9,ror#31 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r4,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r7,r7,r9 @ E+=X[i] - add r7,r7,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r6,r8,r6,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r4,r5 @ F_xx_xx - mov r9,r9,ror#31 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r3,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r6,r6,r9 @ E+=X[i] - add r6,r6,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r5,r8,r5,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r3,r4 @ F_xx_xx - mov r9,r9,ror#31 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r7,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r5,r5,r9 @ E+=X[i] - add r5,r5,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r4,r8,r4,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r7,r3 @ F_xx_xx - mov r9,r9,ror#31 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r6,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r4,r4,r9 @ E+=X[i] - add r4,r4,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r3,r8,r3,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r6,r7 @ F_xx_xx - mov r9,r9,ror#31 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r5,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r3,r3,r9 @ E+=X[i] - add r3,r3,r10 @ E+=F_20_39(B,C,D) - ARM( teq r14,sp ) @ preserve carry - THUMB( mov r11,sp ) - THUMB( teq r14,r11 ) @ preserve carry - bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] - bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes - - ldr r8,.LK_40_59 - sub sp,sp,#20*4 @ [+2] -.L_40_59: - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r7,r8,r7,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r5,r6 @ F_xx_xx - mov r9,r9,ror#31 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r4,r10,ror#2 @ F_xx_xx - and r11,r5,r6 @ F_xx_xx - add r7,r7,r9 @ E+=X[i] - add r7,r7,r10 @ E+=F_40_59(B,C,D) - add r7,r7,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r6,r8,r6,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r4,r5 @ F_xx_xx - mov r9,r9,ror#31 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r3,r10,ror#2 @ F_xx_xx - and r11,r4,r5 @ F_xx_xx - add r6,r6,r9 @ E+=X[i] - add r6,r6,r10 @ E+=F_40_59(B,C,D) - add r6,r6,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r5,r8,r5,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r3,r4 @ F_xx_xx - mov r9,r9,ror#31 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r7,r10,ror#2 @ F_xx_xx - and r11,r3,r4 @ F_xx_xx - add r5,r5,r9 @ E+=X[i] - add r5,r5,r10 @ E+=F_40_59(B,C,D) - add r5,r5,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r4,r8,r4,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r7,r3 @ F_xx_xx - mov r9,r9,ror#31 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r6,r10,ror#2 @ F_xx_xx - and r11,r7,r3 @ F_xx_xx - add r4,r4,r9 @ E+=X[i] - add r4,r4,r10 @ E+=F_40_59(B,C,D) - add r4,r4,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r3,r8,r3,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r6,r7 @ F_xx_xx - mov r9,r9,ror#31 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r5,r10,ror#2 @ F_xx_xx - and r11,r6,r7 @ F_xx_xx - add r3,r3,r9 @ E+=X[i] - add r3,r3,r10 @ E+=F_40_59(B,C,D) - add r3,r3,r11,ror#2 - cmp r14,sp - bne .L_40_59 @ [+((12+5)*5+2)*4] - - ldr r8,.LK_60_79 - sub sp,sp,#20*4 - cmp sp,#0 @ set carry to denote 60_79 - b .L_20_39_or_60_79 @ [+4], spare 300 bytes -.L_done: - add sp,sp,#80*4 @ "deallocate" stack frame - ldmia r0,{r8,r9,r10,r11,r12} - add r3,r8,r3 - add r4,r9,r4 - add r5,r10,r5,ror#2 - add r6,r11,r6,ror#2 - add r7,r12,r7,ror#2 - stmia r0,{r3,r4,r5,r6,r7} - teq r1,r2 - bne .Lloop @ [+18], total 1307 - - ldmia sp!,{r4-r12,pc} -.align 2 -.LK_00_19: .word 0x5a827999 -.LK_20_39: .word 0x6ed9eba1 -.LK_40_59: .word 0x8f1bbcdc -.LK_60_79: .word 0xca62c1d6 -ENDPROC(sha1_block_data_order) -.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" -.align 2 diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S deleted file mode 100644 index 28d816a6a530..000000000000 --- a/arch/arm/crypto/sha1-armv7-neon.S +++ /dev/null @@ -1,634 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function - * - * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - -.syntax unified -.fpu neon - -.text - - -/* Context structure */ - -#define state_h0 0 -#define state_h1 4 -#define state_h2 8 -#define state_h3 12 -#define state_h4 16 - - -/* Constants */ - -#define K1 0x5A827999 -#define K2 0x6ED9EBA1 -#define K3 0x8F1BBCDC -#define K4 0xCA62C1D6 -.align 4 -.LK_VEC: -.LK1: .long K1, K1, K1, K1 -.LK2: .long K2, K2, K2, K2 -.LK3: .long K3, K3, K3, K3 -.LK4: .long K4, K4, K4, K4 - - -/* Register macros */ - -#define RSTATE r0 -#define RDATA r1 -#define RNBLKS r2 -#define ROLDSTACK r3 -#define RWK lr - -#define _a r4 -#define _b r5 -#define _c r6 -#define _d r7 -#define _e r8 - -#define RT0 r9 -#define RT1 r10 -#define RT2 r11 -#define RT3 r12 - -#define W0 q0 -#define W1 q7 -#define W2 q2 -#define W3 q3 -#define W4 q4 -#define W5 q6 -#define W6 q5 -#define W7 q1 - -#define tmp0 q8 -#define tmp1 q9 -#define tmp2 q10 -#define tmp3 q11 - -#define qK1 q12 -#define qK2 q13 -#define qK3 q14 -#define qK4 q15 - -#ifdef CONFIG_CPU_BIG_ENDIAN -#define ARM_LE(code...) -#else -#define ARM_LE(code...) code -#endif - -/* Round function macros. */ - -#define WK_offs(i) (((i) & 15) * 4) - -#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ldr RT3, [sp, WK_offs(i)]; \ - pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - bic RT0, d, b; \ - add e, e, a, ror #(32 - 5); \ - and RT1, c, b; \ - pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add RT0, RT0, RT3; \ - add e, e, RT1; \ - ror b, #(32 - 30); \ - pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add e, e, RT0; - -#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ldr RT3, [sp, WK_offs(i)]; \ - pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - eor RT0, d, b; \ - add e, e, a, ror #(32 - 5); \ - eor RT0, RT0, c; \ - pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add e, e, RT3; \ - ror b, #(32 - 30); \ - pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add e, e, RT0; \ - -#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ldr RT3, [sp, WK_offs(i)]; \ - pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - eor RT0, b, c; \ - and RT1, b, c; \ - add e, e, a, ror #(32 - 5); \ - pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - and RT0, RT0, d; \ - add RT1, RT1, RT3; \ - add e, e, RT0; \ - ror b, #(32 - 30); \ - pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ - add e, e, RT1; - -#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) - -#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) - -#define R(a,b,c,d,e,f,i) \ - _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ - W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) - -#define dummy(...) - - -/* Input expansion macros. */ - -/********* Precalc macros for rounds 0-15 *************************************/ - -#define W_PRECALC_00_15() \ - add RWK, sp, #(WK_offs(0)); \ - \ - vld1.32 {W0, W7}, [RDATA]!; \ - ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ - vld1.32 {W6, W5}, [RDATA]!; \ - vadd.u32 tmp0, W0, curK; \ - ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ - ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ - vadd.u32 tmp1, W7, curK; \ - ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ - vadd.u32 tmp2, W6, curK; \ - vst1.32 {tmp0, tmp1}, [RWK]!; \ - vadd.u32 tmp3, W5, curK; \ - vst1.32 {tmp2, tmp3}, [RWK]; \ - -#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {W0, W7}, [RDATA]!; \ - -#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - add RWK, sp, #(WK_offs(0)); \ - -#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ - -#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {W6, W5}, [RDATA]!; \ - -#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp0, W0, curK; \ - -#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ - -#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ - -#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp1, W7, curK; \ - -#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ - -#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp2, W6, curK; \ - -#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vst1.32 {tmp0, tmp1}, [RWK]!; \ - -#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp3, W5, curK; \ - -#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vst1.32 {tmp2, tmp3}, [RWK]; \ - - -/********* Precalc macros for rounds 16-31 ************************************/ - -#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor tmp0, tmp0; \ - vext.8 W, W_m16, W_m12, #8; \ - -#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - add RWK, sp, #(WK_offs(i)); \ - vext.8 tmp0, W_m04, tmp0, #4; \ - -#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor tmp0, tmp0, W_m16; \ - veor.32 W, W, W_m08; \ - -#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor tmp1, tmp1; \ - veor W, W, tmp0; \ - -#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vshl.u32 tmp0, W, #1; \ - -#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vext.8 tmp1, tmp1, W, #(16-12); \ - vshr.u32 W, W, #31; \ - -#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vorr tmp0, tmp0, W; \ - vshr.u32 W, tmp1, #30; \ - -#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vshl.u32 tmp1, tmp1, #2; \ - -#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor tmp0, tmp0, W; \ - -#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor W, tmp0, tmp1; \ - -#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp0, W, curK; \ - -#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vst1.32 {tmp0}, [RWK]; - - -/********* Precalc macros for rounds 32-79 ************************************/ - -#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor W, W_m28; \ - -#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vext.8 tmp0, W_m08, W_m04, #8; \ - -#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor W, W_m16; \ - -#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - veor W, tmp0; \ - -#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - add RWK, sp, #(WK_offs(i&~3)); \ - -#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vshl.u32 tmp1, W, #2; \ - -#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vshr.u32 tmp0, W, #30; \ - -#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vorr W, tmp0, tmp1; \ - -#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vadd.u32 tmp0, W, curK; \ - -#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vst1.32 {tmp0}, [RWK]; - - -/* - * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. - * - * unsigned int - * sha1_transform_neon (void *ctx, const unsigned char *data, - * unsigned int nblks) - */ -.align 3 -ENTRY(sha1_transform_neon) - /* input: - * r0: ctx, CTX - * r1: data (64*nblks bytes) - * r2: nblks - */ - - cmp RNBLKS, #0; - beq .Ldo_nothing; - - push {r4-r12, lr}; - /*vpush {q4-q7};*/ - - adr RT3, .LK_VEC; - - mov ROLDSTACK, sp; - - /* Align stack. */ - sub RT0, sp, #(16*4); - and RT0, #(~(16-1)); - mov sp, RT0; - - vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ - - /* Get the values of the chaining variables. */ - ldm RSTATE, {_a-_e}; - - vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ - -#undef curK -#define curK qK1 - /* Precalc 0-15. */ - W_PRECALC_00_15(); - -.Loop: - /* Transform 0-15 + Precalc 16-31. */ - _R( _a, _b, _c, _d, _e, F1, 0, - WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, - W4, W5, W6, W7, W0, _, _, _ ); - _R( _e, _a, _b, _c, _d, F1, 1, - WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, - W4, W5, W6, W7, W0, _, _, _ ); - _R( _d, _e, _a, _b, _c, F1, 2, - WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, - W4, W5, W6, W7, W0, _, _, _ ); - _R( _c, _d, _e, _a, _b, F1, 3, - WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, - W4, W5, W6, W7, W0, _, _, _ ); - -#undef curK -#define curK qK2 - _R( _b, _c, _d, _e, _a, F1, 4, - WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, - W3, W4, W5, W6, W7, _, _, _ ); - _R( _a, _b, _c, _d, _e, F1, 5, - WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, - W3, W4, W5, W6, W7, _, _, _ ); - _R( _e, _a, _b, _c, _d, F1, 6, - WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, - W3, W4, W5, W6, W7, _, _, _ ); - _R( _d, _e, _a, _b, _c, F1, 7, - WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, - W3, W4, W5, W6, W7, _, _, _ ); - - _R( _c, _d, _e, _a, _b, F1, 8, - WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, - W2, W3, W4, W5, W6, _, _, _ ); - _R( _b, _c, _d, _e, _a, F1, 9, - WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, - W2, W3, W4, W5, W6, _, _, _ ); - _R( _a, _b, _c, _d, _e, F1, 10, - WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, - W2, W3, W4, W5, W6, _, _, _ ); - _R( _e, _a, _b, _c, _d, F1, 11, - WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, - W2, W3, W4, W5, W6, _, _, _ ); - - _R( _d, _e, _a, _b, _c, F1, 12, - WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, - W1, W2, W3, W4, W5, _, _, _ ); - _R( _c, _d, _e, _a, _b, F1, 13, - WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, - W1, W2, W3, W4, W5, _, _, _ ); - _R( _b, _c, _d, _e, _a, F1, 14, - WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, - W1, W2, W3, W4, W5, _, _, _ ); - _R( _a, _b, _c, _d, _e, F1, 15, - WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, - W1, W2, W3, W4, W5, _, _, _ ); - - /* Transform 16-63 + Precalc 32-79. */ - _R( _e, _a, _b, _c, _d, F1, 16, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _d, _e, _a, _b, _c, F1, 17, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _c, _d, _e, _a, _b, F1, 18, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _b, _c, _d, _e, _a, F1, 19, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, - W0, W1, W2, W3, W4, W5, W6, W7); - - _R( _a, _b, _c, _d, _e, F2, 20, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _e, _a, _b, _c, _d, F2, 21, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _d, _e, _a, _b, _c, F2, 22, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _c, _d, _e, _a, _b, F2, 23, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, - W7, W0, W1, W2, W3, W4, W5, W6); - -#undef curK -#define curK qK3 - _R( _b, _c, _d, _e, _a, F2, 24, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _a, _b, _c, _d, _e, F2, 25, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _e, _a, _b, _c, _d, F2, 26, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _d, _e, _a, _b, _c, F2, 27, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, - W6, W7, W0, W1, W2, W3, W4, W5); - - _R( _c, _d, _e, _a, _b, F2, 28, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _b, _c, _d, _e, _a, F2, 29, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _a, _b, _c, _d, _e, F2, 30, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _e, _a, _b, _c, _d, F2, 31, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, - W5, W6, W7, W0, W1, W2, W3, W4); - - _R( _d, _e, _a, _b, _c, F2, 32, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, - W4, W5, W6, W7, W0, W1, W2, W3); - _R( _c, _d, _e, _a, _b, F2, 33, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, - W4, W5, W6, W7, W0, W1, W2, W3); - _R( _b, _c, _d, _e, _a, F2, 34, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, - W4, W5, W6, W7, W0, W1, W2, W3); - _R( _a, _b, _c, _d, _e, F2, 35, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, - W4, W5, W6, W7, W0, W1, W2, W3); - - _R( _e, _a, _b, _c, _d, F2, 36, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, - W3, W4, W5, W6, W7, W0, W1, W2); - _R( _d, _e, _a, _b, _c, F2, 37, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, - W3, W4, W5, W6, W7, W0, W1, W2); - _R( _c, _d, _e, _a, _b, F2, 38, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, - W3, W4, W5, W6, W7, W0, W1, W2); - _R( _b, _c, _d, _e, _a, F2, 39, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, - W3, W4, W5, W6, W7, W0, W1, W2); - - _R( _a, _b, _c, _d, _e, F3, 40, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, - W2, W3, W4, W5, W6, W7, W0, W1); - _R( _e, _a, _b, _c, _d, F3, 41, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, - W2, W3, W4, W5, W6, W7, W0, W1); - _R( _d, _e, _a, _b, _c, F3, 42, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, - W2, W3, W4, W5, W6, W7, W0, W1); - _R( _c, _d, _e, _a, _b, F3, 43, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, - W2, W3, W4, W5, W6, W7, W0, W1); - -#undef curK -#define curK qK4 - _R( _b, _c, _d, _e, _a, F3, 44, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, - W1, W2, W3, W4, W5, W6, W7, W0); - _R( _a, _b, _c, _d, _e, F3, 45, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, - W1, W2, W3, W4, W5, W6, W7, W0); - _R( _e, _a, _b, _c, _d, F3, 46, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, - W1, W2, W3, W4, W5, W6, W7, W0); - _R( _d, _e, _a, _b, _c, F3, 47, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, - W1, W2, W3, W4, W5, W6, W7, W0); - - _R( _c, _d, _e, _a, _b, F3, 48, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _b, _c, _d, _e, _a, F3, 49, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _a, _b, _c, _d, _e, F3, 50, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, - W0, W1, W2, W3, W4, W5, W6, W7); - _R( _e, _a, _b, _c, _d, F3, 51, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, - W0, W1, W2, W3, W4, W5, W6, W7); - - _R( _d, _e, _a, _b, _c, F3, 52, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _c, _d, _e, _a, _b, F3, 53, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _b, _c, _d, _e, _a, F3, 54, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, - W7, W0, W1, W2, W3, W4, W5, W6); - _R( _a, _b, _c, _d, _e, F3, 55, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, - W7, W0, W1, W2, W3, W4, W5, W6); - - _R( _e, _a, _b, _c, _d, F3, 56, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _d, _e, _a, _b, _c, F3, 57, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _c, _d, _e, _a, _b, F3, 58, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, - W6, W7, W0, W1, W2, W3, W4, W5); - _R( _b, _c, _d, _e, _a, F3, 59, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, - W6, W7, W0, W1, W2, W3, W4, W5); - - subs RNBLKS, #1; - - _R( _a, _b, _c, _d, _e, F4, 60, - WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _e, _a, _b, _c, _d, F4, 61, - WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _d, _e, _a, _b, _c, F4, 62, - WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, - W5, W6, W7, W0, W1, W2, W3, W4); - _R( _c, _d, _e, _a, _b, F4, 63, - WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, - W5, W6, W7, W0, W1, W2, W3, W4); - - beq .Lend; - - /* Transform 64-79 + Precalc 0-15 of next block. */ -#undef curK -#define curK qK1 - _R( _b, _c, _d, _e, _a, F4, 64, - WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _a, _b, _c, _d, _e, F4, 65, - WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _e, _a, _b, _c, _d, F4, 66, - WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _d, _e, _a, _b, _c, F4, 67, - WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - - _R( _c, _d, _e, _a, _b, F4, 68, - dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _b, _c, _d, _e, _a, F4, 69, - dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _a, _b, _c, _d, _e, F4, 70, - WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _e, _a, _b, _c, _d, F4, 71, - WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - - _R( _d, _e, _a, _b, _c, F4, 72, - dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _c, _d, _e, _a, _b, F4, 73, - dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _b, _c, _d, _e, _a, F4, 74, - WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _a, _b, _c, _d, _e, F4, 75, - WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - - _R( _e, _a, _b, _c, _d, F4, 76, - WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _d, _e, _a, _b, _c, F4, 77, - WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _c, _d, _e, _a, _b, F4, 78, - WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); - _R( _b, _c, _d, _e, _a, F4, 79, - WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); - - /* Update the chaining variables. */ - ldm RSTATE, {RT0-RT3}; - add _a, RT0; - ldr RT0, [RSTATE, #state_h4]; - add _b, RT1; - add _c, RT2; - add _d, RT3; - add _e, RT0; - stm RSTATE, {_a-_e}; - - b .Loop; - -.Lend: - /* Transform 64-79 */ - R( _b, _c, _d, _e, _a, F4, 64 ); - R( _a, _b, _c, _d, _e, F4, 65 ); - R( _e, _a, _b, _c, _d, F4, 66 ); - R( _d, _e, _a, _b, _c, F4, 67 ); - R( _c, _d, _e, _a, _b, F4, 68 ); - R( _b, _c, _d, _e, _a, F4, 69 ); - R( _a, _b, _c, _d, _e, F4, 70 ); - R( _e, _a, _b, _c, _d, F4, 71 ); - R( _d, _e, _a, _b, _c, F4, 72 ); - R( _c, _d, _e, _a, _b, F4, 73 ); - R( _b, _c, _d, _e, _a, F4, 74 ); - R( _a, _b, _c, _d, _e, F4, 75 ); - R( _e, _a, _b, _c, _d, F4, 76 ); - R( _d, _e, _a, _b, _c, F4, 77 ); - R( _c, _d, _e, _a, _b, F4, 78 ); - R( _b, _c, _d, _e, _a, F4, 79 ); - - mov sp, ROLDSTACK; - - /* Update the chaining variables. */ - ldm RSTATE, {RT0-RT3}; - add _a, RT0; - ldr RT0, [RSTATE, #state_h4]; - add _b, RT1; - add _c, RT2; - add _d, RT3; - /*vpop {q4-q7};*/ - add _e, RT0; - stm RSTATE, {_a-_e}; - - pop {r4-r12, pc}; - -.Ldo_nothing: - bx lr -ENDPROC(sha1_transform_neon) diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S deleted file mode 100644 index 8a702e051738..000000000000 --- a/arch/arm/crypto/sha1-ce-core.S +++ /dev/null @@ -1,123 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions - * - * Copyright (C) 2015 Linaro Ltd. - * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - - .text - .arch armv8-a - .fpu crypto-neon-fp-armv8 - - k0 .req q0 - k1 .req q1 - k2 .req q2 - k3 .req q3 - - ta0 .req q4 - ta1 .req q5 - tb0 .req q5 - tb1 .req q4 - - dga .req q6 - dgb .req q7 - dgbs .req s28 - - dg0 .req q12 - dg1a0 .req q13 - dg1a1 .req q14 - dg1b0 .req q14 - dg1b1 .req q13 - - .macro add_only, op, ev, rc, s0, dg1 - .ifnb \s0 - vadd.u32 tb\ev, q\s0, \rc - .endif - sha1h.32 dg1b\ev, dg0 - .ifb \dg1 - sha1\op\().32 dg0, dg1a\ev, ta\ev - .else - sha1\op\().32 dg0, \dg1, ta\ev - .endif - .endm - - .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 - sha1su0.32 q\s0, q\s1, q\s2 - add_only \op, \ev, \rc, \s1, \dg1 - sha1su1.32 q\s0, q\s3 - .endm - - .align 6 -.Lsha1_rcon: - .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999 - .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1 - .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc - .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 - - /* - * void sha1_ce_transform(struct sha1_state *sst, u8 const *src, - * int blocks); - */ -ENTRY(sha1_ce_transform) - /* load round constants */ - adr ip, .Lsha1_rcon - vld1.32 {k0-k1}, [ip, :128]! - vld1.32 {k2-k3}, [ip, :128] - - /* load state */ - vld1.32 {dga}, [r0] - vldr dgbs, [r0, #16] - - /* load input */ -0: vld1.32 {q8-q9}, [r1]! - vld1.32 {q10-q11}, [r1]! - subs r2, r2, #1 - -#ifndef CONFIG_CPU_BIG_ENDIAN - vrev32.8 q8, q8 - vrev32.8 q9, q9 - vrev32.8 q10, q10 - vrev32.8 q11, q11 -#endif - - vadd.u32 ta0, q8, k0 - vmov dg0, dga - - add_update c, 0, k0, 8, 9, 10, 11, dgb - add_update c, 1, k0, 9, 10, 11, 8 - add_update c, 0, k0, 10, 11, 8, 9 - add_update c, 1, k0, 11, 8, 9, 10 - add_update c, 0, k1, 8, 9, 10, 11 - - add_update p, 1, k1, 9, 10, 11, 8 - add_update p, 0, k1, 10, 11, 8, 9 - add_update p, 1, k1, 11, 8, 9, 10 - add_update p, 0, k1, 8, 9, 10, 11 - add_update p, 1, k2, 9, 10, 11, 8 - - add_update m, 0, k2, 10, 11, 8, 9 - add_update m, 1, k2, 11, 8, 9, 10 - add_update m, 0, k2, 8, 9, 10, 11 - add_update m, 1, k2, 9, 10, 11, 8 - add_update m, 0, k3, 10, 11, 8, 9 - - add_update p, 1, k3, 11, 8, 9, 10 - add_only p, 0, k3, 9 - add_only p, 1, k3, 10 - add_only p, 0, k3, 11 - add_only p, 1 - - /* update state */ - vadd.u32 dga, dga, dg0 - vadd.u32 dgb, dgb, dg1a0 - bne 0b - - /* store new state */ - vst1.32 {dga}, [r0] - vstr dgbs, [r0, #16] - bx lr -ENDPROC(sha1_ce_transform) diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c deleted file mode 100644 index fac07a4799de..000000000000 --- a/arch/arm/crypto/sha1-ce-glue.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions - * - * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> - */ - -#include <asm/neon.h> -#include <crypto/internal/hash.h> -#include <crypto/sha1.h> -#include <crypto/sha1_base.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> - -MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); - -asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src, - int blocks); - -static int sha1_ce_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - int remain; - - kernel_neon_begin(); - remain = sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform); - kernel_neon_end(); - - return remain; -} - -static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - kernel_neon_begin(); - sha1_base_do_finup(desc, data, len, sha1_ce_transform); - kernel_neon_end(); - - return sha1_base_finish(desc, out); -} - -static struct shash_alg alg = { - .init = sha1_base_init, - .update = sha1_ce_update, - .finup = sha1_ce_finup, - .descsize = SHA1_STATE_SIZE, - .digestsize = SHA1_DIGEST_SIZE, - .base = { - .cra_name = "sha1", - .cra_driver_name = "sha1-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init sha1_ce_mod_init(void) -{ - return crypto_register_shash(&alg); -} - -static void __exit sha1_ce_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_cpu_feature_match(SHA1, sha1_ce_mod_init); -module_exit(sha1_ce_mod_fini); diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c deleted file mode 100644 index 255da00c7d98..000000000000 --- a/arch/arm/crypto/sha1_glue.c +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Cryptographic API. - * Glue code for the SHA1 Secure Hash Algorithm assembler implementation - * - * This file is based on sha1_generic.c and sha1_ssse3_glue.c - * - * Copyright (c) Alan Smithee. - * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> - * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> - * Copyright (c) Mathias Krause <minipli@googlemail.com> - */ - -#include <crypto/internal/hash.h> -#include <crypto/sha1.h> -#include <crypto/sha1_base.h> -#include <linux/kernel.h> -#include <linux/module.h> - -asmlinkage void sha1_block_data_order(struct sha1_state *digest, - const u8 *data, int rounds); - -static int sha1_update_arm(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - /* make sure signature matches sha1_block_fn() */ - BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); - - return sha1_base_do_update_blocks(desc, data, len, - sha1_block_data_order); -} - -static int sha1_finup_arm(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - sha1_base_do_finup(desc, data, len, sha1_block_data_order); - return sha1_base_finish(desc, out); -} - -static struct shash_alg alg = { - .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_base_init, - .update = sha1_update_arm, - .finup = sha1_finup_arm, - .descsize = SHA1_STATE_SIZE, - .base = { - .cra_name = "sha1", - .cra_driver_name= "sha1-asm", - .cra_priority = 150, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - - -static int __init sha1_mod_init(void) -{ - return crypto_register_shash(&alg); -} - - -static void __exit sha1_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - - -module_init(sha1_mod_init); -module_exit(sha1_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)"); -MODULE_ALIAS_CRYPTO("sha1"); -MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>"); diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c deleted file mode 100644 index d321850f22a6..000000000000 --- a/arch/arm/crypto/sha1_neon_glue.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using - * ARM NEON instructions. - * - * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This file is based on sha1_generic.c and sha1_ssse3_glue.c: - * Copyright (c) Alan Smithee. - * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> - * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> - * Copyright (c) Mathias Krause <minipli@googlemail.com> - * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com> - */ - -#include <asm/neon.h> -#include <crypto/internal/hash.h> -#include <crypto/sha1.h> -#include <crypto/sha1_base.h> -#include <linux/kernel.h> -#include <linux/module.h> - -asmlinkage void sha1_transform_neon(struct sha1_state *state_h, - const u8 *data, int rounds); - -static int sha1_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - int remain; - - kernel_neon_begin(); - remain = sha1_base_do_update_blocks(desc, data, len, - sha1_transform_neon); - kernel_neon_end(); - - return remain; -} - -static int sha1_neon_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - kernel_neon_begin(); - sha1_base_do_finup(desc, data, len, sha1_transform_neon); - kernel_neon_end(); - - return sha1_base_finish(desc, out); -} - -static struct shash_alg alg = { - .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_base_init, - .update = sha1_neon_update, - .finup = sha1_neon_finup, - .descsize = SHA1_STATE_SIZE, - .base = { - .cra_name = "sha1", - .cra_driver_name = "sha1-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init sha1_neon_mod_init(void) -{ - if (!cpu_has_neon()) - return -ENODEV; - - return crypto_register_shash(&alg); -} - -static void __exit sha1_neon_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(sha1_neon_mod_init); -module_exit(sha1_neon_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated"); -MODULE_ALIAS_CRYPTO("sha1"); |
