1f2f770d7SSami Tolvanen#!/usr/bin/env perl 2c2e415feSAdam Langley# SPDX-License-Identifier: GPL-2.0 3c2e415feSAdam Langley 4c2e415feSAdam Langley# This code is taken from the OpenSSL project but the author (Andy Polyakov) 5c2e415feSAdam Langley# has relicensed it under the GPLv2. Therefore this program is free software; 6c2e415feSAdam Langley# you can redistribute it and/or modify it under the terms of the GNU General 7c2e415feSAdam Langley# Public License version 2 as published by the Free Software Foundation. 8c2e415feSAdam Langley# 9c2e415feSAdam Langley# The original headers, including the original license headers, are 10c2e415feSAdam Langley# included below for completeness. 11f2f770d7SSami Tolvanen 12f2f770d7SSami Tolvanen# ==================================================================== 13f2f770d7SSami Tolvanen# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 14f2f770d7SSami Tolvanen# project. The module is, however, dual licensed under OpenSSL and 15f2f770d7SSami Tolvanen# CRYPTOGAMS licenses depending on where you obtain it. For further 169332a9e7SAlexander A. Klimov# details see https://www.openssl.org/~appro/cryptogams/. 17f2f770d7SSami Tolvanen# ==================================================================== 18f2f770d7SSami Tolvanen 19f2f770d7SSami Tolvanen# SHA256 block procedure for ARMv4. May 2007. 20f2f770d7SSami Tolvanen 21f2f770d7SSami Tolvanen# Performance is ~2x better than gcc 3.4 generated code and in "abso- 22f2f770d7SSami Tolvanen# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per 23f2f770d7SSami Tolvanen# byte [on single-issue Xscale PXA250 core]. 24f2f770d7SSami Tolvanen 25f2f770d7SSami Tolvanen# July 2010. 26f2f770d7SSami Tolvanen# 27f2f770d7SSami Tolvanen# Rescheduling for dual-issue pipeline resulted in 22% improvement on 28f2f770d7SSami Tolvanen# Cortex A8 core and ~20 cycles per processed byte. 29f2f770d7SSami Tolvanen 30f2f770d7SSami Tolvanen# February 2011. 31f2f770d7SSami Tolvanen# 32f2f770d7SSami Tolvanen# Profiler-assisted and platform-specific optimization resulted in 16% 33f2f770d7SSami Tolvanen# improvement on Cortex A8 core and ~15.4 cycles per processed byte. 34f2f770d7SSami Tolvanen 35f2f770d7SSami Tolvanen# September 2013. 36f2f770d7SSami Tolvanen# 37f2f770d7SSami Tolvanen# Add NEON implementation. On Cortex A8 it was measured to process one 38f2f770d7SSami Tolvanen# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon 39f2f770d7SSami Tolvanen# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only 40f2f770d7SSami Tolvanen# code (meaning that latter performs sub-optimally, nothing was done 41f2f770d7SSami Tolvanen# about it). 42f2f770d7SSami Tolvanen 43f2f770d7SSami Tolvanen# May 2014. 44f2f770d7SSami Tolvanen# 45f2f770d7SSami Tolvanen# Add ARMv8 code path performing at 2.0 cpb on Apple A7. 46f2f770d7SSami Tolvanen 47f2f770d7SSami Tolvanenwhile (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 48f2f770d7SSami Tolvanenopen STDOUT,">$output"; 49f2f770d7SSami Tolvanen 50f2f770d7SSami Tolvanen$ctx="r0"; $t0="r0"; 51f2f770d7SSami Tolvanen$inp="r1"; $t4="r1"; 52f2f770d7SSami Tolvanen$len="r2"; $t1="r2"; 53f2f770d7SSami Tolvanen$T1="r3"; $t3="r3"; 54f2f770d7SSami Tolvanen$A="r4"; 55f2f770d7SSami Tolvanen$B="r5"; 56f2f770d7SSami Tolvanen$C="r6"; 57f2f770d7SSami Tolvanen$D="r7"; 58f2f770d7SSami Tolvanen$E="r8"; 59f2f770d7SSami Tolvanen$F="r9"; 60f2f770d7SSami Tolvanen$G="r10"; 61f2f770d7SSami Tolvanen$H="r11"; 62f2f770d7SSami Tolvanen@V=($A,$B,$C,$D,$E,$F,$G,$H); 63f2f770d7SSami Tolvanen$t2="r12"; 64f2f770d7SSami Tolvanen$Ktbl="r14"; 65f2f770d7SSami Tolvanen 66f2f770d7SSami Tolvanen@Sigma0=( 2,13,22); 67f2f770d7SSami Tolvanen@Sigma1=( 6,11,25); 68f2f770d7SSami Tolvanen@sigma0=( 7,18, 3); 69f2f770d7SSami Tolvanen@sigma1=(17,19,10); 70f2f770d7SSami Tolvanen 71f2f770d7SSami Tolvanensub BODY_00_15 { 72f2f770d7SSami Tolvanenmy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 73f2f770d7SSami Tolvanen 74f2f770d7SSami Tolvanen$code.=<<___ if ($i<16); 75f2f770d7SSami Tolvanen#if __ARM_ARCH__>=7 76f2f770d7SSami Tolvanen @ ldr $t1,[$inp],#4 @ $i 77f2f770d7SSami Tolvanen# if $i==15 78f2f770d7SSami Tolvanen str $inp,[sp,#17*4] @ make room for $t4 79f2f770d7SSami Tolvanen# endif 80f2f770d7SSami Tolvanen eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 81f2f770d7SSami Tolvanen add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 82f2f770d7SSami Tolvanen eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 83f2f770d7SSami Tolvanen# ifndef __ARMEB__ 84f2f770d7SSami Tolvanen rev $t1,$t1 85f2f770d7SSami Tolvanen# endif 86f2f770d7SSami Tolvanen#else 87f2f770d7SSami Tolvanen @ ldrb $t1,[$inp,#3] @ $i 88f2f770d7SSami Tolvanen add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 89f2f770d7SSami Tolvanen ldrb $t2,[$inp,#2] 90f2f770d7SSami Tolvanen ldrb $t0,[$inp,#1] 91f2f770d7SSami Tolvanen orr $t1,$t1,$t2,lsl#8 92f2f770d7SSami Tolvanen ldrb $t2,[$inp],#4 93f2f770d7SSami Tolvanen orr $t1,$t1,$t0,lsl#16 94f2f770d7SSami Tolvanen# if $i==15 95f2f770d7SSami Tolvanen str $inp,[sp,#17*4] @ make room for $t4 96f2f770d7SSami Tolvanen# endif 97f2f770d7SSami Tolvanen eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 98f2f770d7SSami Tolvanen orr $t1,$t1,$t2,lsl#24 99f2f770d7SSami Tolvanen eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 100f2f770d7SSami Tolvanen#endif 101f2f770d7SSami Tolvanen___ 102f2f770d7SSami Tolvanen$code.=<<___; 103f2f770d7SSami Tolvanen ldr $t2,[$Ktbl],#4 @ *K256++ 104f2f770d7SSami Tolvanen add $h,$h,$t1 @ h+=X[i] 105f2f770d7SSami Tolvanen str $t1,[sp,#`$i%16`*4] 106f2f770d7SSami Tolvanen eor $t1,$f,$g 107f2f770d7SSami Tolvanen add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) 108f2f770d7SSami Tolvanen and $t1,$t1,$e 109f2f770d7SSami Tolvanen add $h,$h,$t2 @ h+=K256[i] 110f2f770d7SSami Tolvanen eor $t1,$t1,$g @ Ch(e,f,g) 111f2f770d7SSami Tolvanen eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` 112f2f770d7SSami Tolvanen add $h,$h,$t1 @ h+=Ch(e,f,g) 113f2f770d7SSami Tolvanen#if $i==31 114f2f770d7SSami Tolvanen and $t2,$t2,#0xff 115f2f770d7SSami Tolvanen cmp $t2,#0xf2 @ done? 116f2f770d7SSami Tolvanen#endif 117f2f770d7SSami Tolvanen#if $i<15 118f2f770d7SSami Tolvanen# if __ARM_ARCH__>=7 119f2f770d7SSami Tolvanen ldr $t1,[$inp],#4 @ prefetch 120f2f770d7SSami Tolvanen# else 121f2f770d7SSami Tolvanen ldrb $t1,[$inp,#3] 122f2f770d7SSami Tolvanen# endif 123f2f770d7SSami Tolvanen eor $t2,$a,$b @ a^b, b^c in next round 124f2f770d7SSami Tolvanen#else 125f2f770d7SSami Tolvanen ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx 126f2f770d7SSami Tolvanen eor $t2,$a,$b @ a^b, b^c in next round 127f2f770d7SSami Tolvanen ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx 128f2f770d7SSami Tolvanen#endif 129f2f770d7SSami Tolvanen eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) 130f2f770d7SSami Tolvanen and $t3,$t3,$t2 @ (b^c)&=(a^b) 131f2f770d7SSami Tolvanen add $d,$d,$h @ d+=h 132f2f770d7SSami Tolvanen eor $t3,$t3,$b @ Maj(a,b,c) 133f2f770d7SSami Tolvanen add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) 134f2f770d7SSami Tolvanen @ add $h,$h,$t3 @ h+=Maj(a,b,c) 135f2f770d7SSami Tolvanen___ 136f2f770d7SSami Tolvanen ($t2,$t3)=($t3,$t2); 137f2f770d7SSami Tolvanen} 138f2f770d7SSami Tolvanen 139f2f770d7SSami Tolvanensub BODY_16_XX { 140f2f770d7SSami Tolvanenmy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 141f2f770d7SSami Tolvanen 142f2f770d7SSami Tolvanen$code.=<<___; 143f2f770d7SSami Tolvanen @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i 144f2f770d7SSami Tolvanen @ ldr $t4,[sp,#`($i+14)%16`*4] 145f2f770d7SSami Tolvanen mov $t0,$t1,ror#$sigma0[0] 146f2f770d7SSami Tolvanen add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 147f2f770d7SSami Tolvanen mov $t2,$t4,ror#$sigma1[0] 148f2f770d7SSami Tolvanen eor $t0,$t0,$t1,ror#$sigma0[1] 149f2f770d7SSami Tolvanen eor $t2,$t2,$t4,ror#$sigma1[1] 150f2f770d7SSami Tolvanen eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) 151f2f770d7SSami Tolvanen ldr $t1,[sp,#`($i+0)%16`*4] 152f2f770d7SSami Tolvanen eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) 153f2f770d7SSami Tolvanen ldr $t4,[sp,#`($i+9)%16`*4] 154f2f770d7SSami Tolvanen 155f2f770d7SSami Tolvanen add $t2,$t2,$t0 156f2f770d7SSami Tolvanen eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 157f2f770d7SSami Tolvanen add $t1,$t1,$t2 158f2f770d7SSami Tolvanen eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 159f2f770d7SSami Tolvanen add $t1,$t1,$t4 @ X[i] 160f2f770d7SSami Tolvanen___ 161f2f770d7SSami Tolvanen &BODY_00_15(@_); 162f2f770d7SSami Tolvanen} 163f2f770d7SSami Tolvanen 164f2f770d7SSami Tolvanen$code=<<___; 165f2f770d7SSami Tolvanen#ifndef __KERNEL__ 166f2f770d7SSami Tolvanen# include "arm_arch.h" 167f2f770d7SSami Tolvanen#else 168f2f770d7SSami Tolvanen# define __ARM_ARCH__ __LINUX_ARM_ARCH__ 169f2f770d7SSami Tolvanen# define __ARM_MAX_ARCH__ 7 170f2f770d7SSami Tolvanen#endif 171f2f770d7SSami Tolvanen 172f2f770d7SSami Tolvanen.text 173f2f770d7SSami Tolvanen#if __ARM_ARCH__<7 174f2f770d7SSami Tolvanen.code 32 175f2f770d7SSami Tolvanen#else 176f2f770d7SSami Tolvanen.syntax unified 177f2f770d7SSami Tolvanen# ifdef __thumb2__ 178f2f770d7SSami Tolvanen.thumb 179f2f770d7SSami Tolvanen# else 180f2f770d7SSami Tolvanen.code 32 181f2f770d7SSami Tolvanen# endif 182f2f770d7SSami Tolvanen#endif 183f2f770d7SSami Tolvanen 184f2f770d7SSami Tolvanen.type K256,%object 185f2f770d7SSami Tolvanen.align 5 186f2f770d7SSami TolvanenK256: 187f2f770d7SSami Tolvanen.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 188f2f770d7SSami Tolvanen.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 189f2f770d7SSami Tolvanen.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 190f2f770d7SSami Tolvanen.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 191f2f770d7SSami Tolvanen.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 192f2f770d7SSami Tolvanen.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 193f2f770d7SSami Tolvanen.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 194f2f770d7SSami Tolvanen.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 195f2f770d7SSami Tolvanen.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 196f2f770d7SSami Tolvanen.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 197f2f770d7SSami Tolvanen.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 198f2f770d7SSami Tolvanen.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 199f2f770d7SSami Tolvanen.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 200f2f770d7SSami Tolvanen.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 201f2f770d7SSami Tolvanen.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 202f2f770d7SSami Tolvanen.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 203f2f770d7SSami Tolvanen.size K256,.-K256 204f2f770d7SSami Tolvanen.word 0 @ terminator 205f2f770d7SSami Tolvanen#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 206f2f770d7SSami Tolvanen.LOPENSSL_armcap: 207f2f770d7SSami Tolvanen.word OPENSSL_armcap_P-sha256_block_data_order 208f2f770d7SSami Tolvanen#endif 209f2f770d7SSami Tolvanen.align 5 210f2f770d7SSami Tolvanen 211f2f770d7SSami Tolvanen.global sha256_block_data_order 212f2f770d7SSami Tolvanen.type sha256_block_data_order,%function 213f2f770d7SSami Tolvanensha256_block_data_order: 21469216a54SArd Biesheuvel.Lsha256_block_data_order: 215f2f770d7SSami Tolvanen#if __ARM_ARCH__<7 216f2f770d7SSami Tolvanen sub r3,pc,#8 @ sha256_block_data_order 217f2f770d7SSami Tolvanen#else 21869216a54SArd Biesheuvel adr r3,.Lsha256_block_data_order 219f2f770d7SSami Tolvanen#endif 220f2f770d7SSami Tolvanen#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 221f2f770d7SSami Tolvanen ldr r12,.LOPENSSL_armcap 222f2f770d7SSami Tolvanen ldr r12,[r3,r12] @ OPENSSL_armcap_P 223f2f770d7SSami Tolvanen tst r12,#ARMV8_SHA256 224f2f770d7SSami Tolvanen bne .LARMv8 225f2f770d7SSami Tolvanen tst r12,#ARMV7_NEON 226f2f770d7SSami Tolvanen bne .LNEON 227f2f770d7SSami Tolvanen#endif 228f2f770d7SSami Tolvanen add $len,$inp,$len,lsl#6 @ len to point at the end of inp 229f2f770d7SSami Tolvanen stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} 230f2f770d7SSami Tolvanen ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} 231f2f770d7SSami Tolvanen sub $Ktbl,r3,#256+32 @ K256 232f2f770d7SSami Tolvanen sub sp,sp,#16*4 @ alloca(X[16]) 233f2f770d7SSami Tolvanen.Loop: 234f2f770d7SSami Tolvanen# if __ARM_ARCH__>=7 235f2f770d7SSami Tolvanen ldr $t1,[$inp],#4 236f2f770d7SSami Tolvanen# else 237f2f770d7SSami Tolvanen ldrb $t1,[$inp,#3] 238f2f770d7SSami Tolvanen# endif 239f2f770d7SSami Tolvanen eor $t3,$B,$C @ magic 240f2f770d7SSami Tolvanen eor $t2,$t2,$t2 241f2f770d7SSami Tolvanen___ 242f2f770d7SSami Tolvanenfor($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } 243f2f770d7SSami Tolvanen$code.=".Lrounds_16_xx:\n"; 244f2f770d7SSami Tolvanenfor (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } 245f2f770d7SSami Tolvanen$code.=<<___; 246f2f770d7SSami Tolvanen#if __ARM_ARCH__>=7 247f2f770d7SSami Tolvanen ite eq @ Thumb2 thing, sanity check in ARM 248f2f770d7SSami Tolvanen#endif 249f2f770d7SSami Tolvanen ldreq $t3,[sp,#16*4] @ pull ctx 250f2f770d7SSami Tolvanen bne .Lrounds_16_xx 251f2f770d7SSami Tolvanen 252f2f770d7SSami Tolvanen add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 253f2f770d7SSami Tolvanen ldr $t0,[$t3,#0] 254f2f770d7SSami Tolvanen ldr $t1,[$t3,#4] 255f2f770d7SSami Tolvanen ldr $t2,[$t3,#8] 256f2f770d7SSami Tolvanen add $A,$A,$t0 257f2f770d7SSami Tolvanen ldr $t0,[$t3,#12] 258f2f770d7SSami Tolvanen add $B,$B,$t1 259f2f770d7SSami Tolvanen ldr $t1,[$t3,#16] 260f2f770d7SSami Tolvanen add $C,$C,$t2 261f2f770d7SSami Tolvanen ldr $t2,[$t3,#20] 262f2f770d7SSami Tolvanen add $D,$D,$t0 263f2f770d7SSami Tolvanen ldr $t0,[$t3,#24] 264f2f770d7SSami Tolvanen add $E,$E,$t1 265f2f770d7SSami Tolvanen ldr $t1,[$t3,#28] 266f2f770d7SSami Tolvanen add $F,$F,$t2 267f2f770d7SSami Tolvanen ldr $inp,[sp,#17*4] @ pull inp 268f2f770d7SSami Tolvanen ldr $t2,[sp,#18*4] @ pull inp+len 269f2f770d7SSami Tolvanen add $G,$G,$t0 270f2f770d7SSami Tolvanen add $H,$H,$t1 271f2f770d7SSami Tolvanen stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} 272f2f770d7SSami Tolvanen cmp $inp,$t2 273f2f770d7SSami Tolvanen sub $Ktbl,$Ktbl,#256 @ rewind Ktbl 274f2f770d7SSami Tolvanen bne .Loop 275f2f770d7SSami Tolvanen 276f2f770d7SSami Tolvanen add sp,sp,#`16+3`*4 @ destroy frame 277f2f770d7SSami Tolvanen#if __ARM_ARCH__>=5 278f2f770d7SSami Tolvanen ldmia sp!,{r4-r11,pc} 279f2f770d7SSami Tolvanen#else 280f2f770d7SSami Tolvanen ldmia sp!,{r4-r11,lr} 281f2f770d7SSami Tolvanen tst lr,#1 282f2f770d7SSami Tolvanen moveq pc,lr @ be binary compatible with V4, yet 283f2f770d7SSami Tolvanen bx lr @ interoperable with Thumb ISA:-) 284f2f770d7SSami Tolvanen#endif 285f2f770d7SSami Tolvanen.size sha256_block_data_order,.-sha256_block_data_order 286f2f770d7SSami Tolvanen___ 287f2f770d7SSami Tolvanen###################################################################### 288f2f770d7SSami Tolvanen# NEON stuff 289f2f770d7SSami Tolvanen# 290f2f770d7SSami Tolvanen{{{ 291f2f770d7SSami Tolvanenmy @X=map("q$_",(0..3)); 292f2f770d7SSami Tolvanenmy ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); 293f2f770d7SSami Tolvanenmy $Xfer=$t4; 294f2f770d7SSami Tolvanenmy $j=0; 295f2f770d7SSami Tolvanen 296f2f770d7SSami Tolvanensub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } 297f2f770d7SSami Tolvanensub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } 298f2f770d7SSami Tolvanen 299f2f770d7SSami Tolvanensub AUTOLOAD() # thunk [simplified] x86-style perlasm 300f2f770d7SSami Tolvanen{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; 301f2f770d7SSami Tolvanen my $arg = pop; 302f2f770d7SSami Tolvanen $arg = "#$arg" if ($arg*1 eq $arg); 303f2f770d7SSami Tolvanen $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; 304f2f770d7SSami Tolvanen} 305f2f770d7SSami Tolvanen 306f2f770d7SSami Tolvanensub Xupdate() 307f2f770d7SSami Tolvanen{ use integer; 308f2f770d7SSami Tolvanen my $body = shift; 309f2f770d7SSami Tolvanen my @insns = (&$body,&$body,&$body,&$body); 310f2f770d7SSami Tolvanen my ($a,$b,$c,$d,$e,$f,$g,$h); 311f2f770d7SSami Tolvanen 312f2f770d7SSami Tolvanen &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] 313f2f770d7SSami Tolvanen eval(shift(@insns)); 314f2f770d7SSami Tolvanen eval(shift(@insns)); 315f2f770d7SSami Tolvanen eval(shift(@insns)); 316f2f770d7SSami Tolvanen &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] 317f2f770d7SSami Tolvanen eval(shift(@insns)); 318f2f770d7SSami Tolvanen eval(shift(@insns)); 319f2f770d7SSami Tolvanen eval(shift(@insns)); 320f2f770d7SSami Tolvanen &vshr_u32 ($T2,$T0,$sigma0[0]); 321f2f770d7SSami Tolvanen eval(shift(@insns)); 322f2f770d7SSami Tolvanen eval(shift(@insns)); 323f2f770d7SSami Tolvanen &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] 324f2f770d7SSami Tolvanen eval(shift(@insns)); 325f2f770d7SSami Tolvanen eval(shift(@insns)); 326f2f770d7SSami Tolvanen &vshr_u32 ($T1,$T0,$sigma0[2]); 327f2f770d7SSami Tolvanen eval(shift(@insns)); 328f2f770d7SSami Tolvanen eval(shift(@insns)); 329f2f770d7SSami Tolvanen &vsli_32 ($T2,$T0,32-$sigma0[0]); 330f2f770d7SSami Tolvanen eval(shift(@insns)); 331f2f770d7SSami Tolvanen eval(shift(@insns)); 332f2f770d7SSami Tolvanen &vshr_u32 ($T3,$T0,$sigma0[1]); 333f2f770d7SSami Tolvanen eval(shift(@insns)); 334f2f770d7SSami Tolvanen eval(shift(@insns)); 335f2f770d7SSami Tolvanen &veor ($T1,$T1,$T2); 336f2f770d7SSami Tolvanen eval(shift(@insns)); 337f2f770d7SSami Tolvanen eval(shift(@insns)); 338f2f770d7SSami Tolvanen &vsli_32 ($T3,$T0,32-$sigma0[1]); 339f2f770d7SSami Tolvanen eval(shift(@insns)); 340f2f770d7SSami Tolvanen eval(shift(@insns)); 341f2f770d7SSami Tolvanen &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); 342f2f770d7SSami Tolvanen eval(shift(@insns)); 343f2f770d7SSami Tolvanen eval(shift(@insns)); 344f2f770d7SSami Tolvanen &veor ($T1,$T1,$T3); # sigma0(X[1..4]) 345f2f770d7SSami Tolvanen eval(shift(@insns)); 346f2f770d7SSami Tolvanen eval(shift(@insns)); 347f2f770d7SSami Tolvanen &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); 348f2f770d7SSami Tolvanen eval(shift(@insns)); 349f2f770d7SSami Tolvanen eval(shift(@insns)); 350f2f770d7SSami Tolvanen &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); 351f2f770d7SSami Tolvanen eval(shift(@insns)); 352f2f770d7SSami Tolvanen eval(shift(@insns)); 353f2f770d7SSami Tolvanen &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) 354f2f770d7SSami Tolvanen eval(shift(@insns)); 355f2f770d7SSami Tolvanen eval(shift(@insns)); 356f2f770d7SSami Tolvanen &veor ($T5,$T5,$T4); 357f2f770d7SSami Tolvanen eval(shift(@insns)); 358f2f770d7SSami Tolvanen eval(shift(@insns)); 359f2f770d7SSami Tolvanen &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); 360f2f770d7SSami Tolvanen eval(shift(@insns)); 361f2f770d7SSami Tolvanen eval(shift(@insns)); 362f2f770d7SSami Tolvanen &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); 363f2f770d7SSami Tolvanen eval(shift(@insns)); 364f2f770d7SSami Tolvanen eval(shift(@insns)); 365f2f770d7SSami Tolvanen &veor ($T5,$T5,$T4); # sigma1(X[14..15]) 366f2f770d7SSami Tolvanen eval(shift(@insns)); 367f2f770d7SSami Tolvanen eval(shift(@insns)); 368f2f770d7SSami Tolvanen &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) 369f2f770d7SSami Tolvanen eval(shift(@insns)); 370f2f770d7SSami Tolvanen eval(shift(@insns)); 371f2f770d7SSami Tolvanen &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); 372f2f770d7SSami Tolvanen eval(shift(@insns)); 373f2f770d7SSami Tolvanen eval(shift(@insns)); 374f2f770d7SSami Tolvanen &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); 375f2f770d7SSami Tolvanen eval(shift(@insns)); 376f2f770d7SSami Tolvanen eval(shift(@insns)); 377f2f770d7SSami Tolvanen &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); 378f2f770d7SSami Tolvanen eval(shift(@insns)); 379f2f770d7SSami Tolvanen eval(shift(@insns)); 380f2f770d7SSami Tolvanen &veor ($T5,$T5,$T4); 381f2f770d7SSami Tolvanen eval(shift(@insns)); 382f2f770d7SSami Tolvanen eval(shift(@insns)); 383f2f770d7SSami Tolvanen &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); 384f2f770d7SSami Tolvanen eval(shift(@insns)); 385f2f770d7SSami Tolvanen eval(shift(@insns)); 386f2f770d7SSami Tolvanen &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 387f2f770d7SSami Tolvanen eval(shift(@insns)); 388f2f770d7SSami Tolvanen eval(shift(@insns)); 389f2f770d7SSami Tolvanen &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); 390f2f770d7SSami Tolvanen eval(shift(@insns)); 391f2f770d7SSami Tolvanen eval(shift(@insns)); 392f2f770d7SSami Tolvanen &veor ($T5,$T5,$T4); # sigma1(X[16..17]) 393f2f770d7SSami Tolvanen eval(shift(@insns)); 394f2f770d7SSami Tolvanen eval(shift(@insns)); 395f2f770d7SSami Tolvanen &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) 396f2f770d7SSami Tolvanen eval(shift(@insns)); 397f2f770d7SSami Tolvanen eval(shift(@insns)); 398f2f770d7SSami Tolvanen &vadd_i32 ($T0,$T0,@X[0]); 399f2f770d7SSami Tolvanen while($#insns>=2) { eval(shift(@insns)); } 400f2f770d7SSami Tolvanen &vst1_32 ("{$T0}","[$Xfer,:128]!"); 401f2f770d7SSami Tolvanen eval(shift(@insns)); 402f2f770d7SSami Tolvanen eval(shift(@insns)); 403f2f770d7SSami Tolvanen 404f2f770d7SSami Tolvanen push(@X,shift(@X)); # "rotate" X[] 405f2f770d7SSami Tolvanen} 406f2f770d7SSami Tolvanen 407f2f770d7SSami Tolvanensub Xpreload() 408f2f770d7SSami Tolvanen{ use integer; 409f2f770d7SSami Tolvanen my $body = shift; 410f2f770d7SSami Tolvanen my @insns = (&$body,&$body,&$body,&$body); 411f2f770d7SSami Tolvanen my ($a,$b,$c,$d,$e,$f,$g,$h); 412f2f770d7SSami Tolvanen 413f2f770d7SSami Tolvanen eval(shift(@insns)); 414f2f770d7SSami Tolvanen eval(shift(@insns)); 415f2f770d7SSami Tolvanen eval(shift(@insns)); 416f2f770d7SSami Tolvanen eval(shift(@insns)); 417f2f770d7SSami Tolvanen &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 418f2f770d7SSami Tolvanen eval(shift(@insns)); 419f2f770d7SSami Tolvanen eval(shift(@insns)); 420f2f770d7SSami Tolvanen eval(shift(@insns)); 421f2f770d7SSami Tolvanen eval(shift(@insns)); 422f2f770d7SSami Tolvanen &vrev32_8 (@X[0],@X[0]); 423f2f770d7SSami Tolvanen eval(shift(@insns)); 424f2f770d7SSami Tolvanen eval(shift(@insns)); 425f2f770d7SSami Tolvanen eval(shift(@insns)); 426f2f770d7SSami Tolvanen eval(shift(@insns)); 427f2f770d7SSami Tolvanen &vadd_i32 ($T0,$T0,@X[0]); 428f2f770d7SSami Tolvanen foreach (@insns) { eval; } # remaining instructions 429f2f770d7SSami Tolvanen &vst1_32 ("{$T0}","[$Xfer,:128]!"); 430f2f770d7SSami Tolvanen 431f2f770d7SSami Tolvanen push(@X,shift(@X)); # "rotate" X[] 432f2f770d7SSami Tolvanen} 433f2f770d7SSami Tolvanen 434f2f770d7SSami Tolvanensub body_00_15 () { 435f2f770d7SSami Tolvanen ( 436f2f770d7SSami Tolvanen '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. 437f2f770d7SSami Tolvanen '&add ($h,$h,$t1)', # h+=X[i]+K[i] 438f2f770d7SSami Tolvanen '&eor ($t1,$f,$g)', 439f2f770d7SSami Tolvanen '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', 440f2f770d7SSami Tolvanen '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past 441f2f770d7SSami Tolvanen '&and ($t1,$t1,$e)', 442f2f770d7SSami Tolvanen '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) 443f2f770d7SSami Tolvanen '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', 444f2f770d7SSami Tolvanen '&eor ($t1,$t1,$g)', # Ch(e,f,g) 445f2f770d7SSami Tolvanen '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) 446f2f770d7SSami Tolvanen '&eor ($t2,$a,$b)', # a^b, b^c in next round 447f2f770d7SSami Tolvanen '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) 448f2f770d7SSami Tolvanen '&add ($h,$h,$t1)', # h+=Ch(e,f,g) 449f2f770d7SSami Tolvanen '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. 450f2f770d7SSami Tolvanen '&ldr ($t1,"[$Ktbl]") if ($j==15);'. 451f2f770d7SSami Tolvanen '&ldr ($t1,"[sp,#64]") if ($j==31)', 452f2f770d7SSami Tolvanen '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) 453f2f770d7SSami Tolvanen '&add ($d,$d,$h)', # d+=h 454f2f770d7SSami Tolvanen '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) 455f2f770d7SSami Tolvanen '&eor ($t3,$t3,$b)', # Maj(a,b,c) 456f2f770d7SSami Tolvanen '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' 457f2f770d7SSami Tolvanen ) 458f2f770d7SSami Tolvanen} 459f2f770d7SSami Tolvanen 460f2f770d7SSami Tolvanen$code.=<<___; 461f2f770d7SSami Tolvanen#if __ARM_MAX_ARCH__>=7 462f2f770d7SSami Tolvanen.arch armv7-a 463f2f770d7SSami Tolvanen.fpu neon 464f2f770d7SSami Tolvanen 465f2f770d7SSami Tolvanen.global sha256_block_data_order_neon 466f2f770d7SSami Tolvanen.type sha256_block_data_order_neon,%function 467f2f770d7SSami Tolvanen.align 4 468f2f770d7SSami Tolvanensha256_block_data_order_neon: 469f2f770d7SSami Tolvanen.LNEON: 470f2f770d7SSami Tolvanen stmdb sp!,{r4-r12,lr} 471f2f770d7SSami Tolvanen 472f2f770d7SSami Tolvanen sub $H,sp,#16*4+16 473*54781938SArd Biesheuvel adr $Ktbl,.Lsha256_block_data_order 474*54781938SArd Biesheuvel sub $Ktbl,$Ktbl,#.Lsha256_block_data_order-K256 475f2f770d7SSami Tolvanen bic $H,$H,#15 @ align for 128-bit stores 476f2f770d7SSami Tolvanen mov $t2,sp 477f2f770d7SSami Tolvanen mov sp,$H @ alloca 478f2f770d7SSami Tolvanen add $len,$inp,$len,lsl#6 @ len to point at the end of inp 479f2f770d7SSami Tolvanen 480f2f770d7SSami Tolvanen vld1.8 {@X[0]},[$inp]! 481f2f770d7SSami Tolvanen vld1.8 {@X[1]},[$inp]! 482f2f770d7SSami Tolvanen vld1.8 {@X[2]},[$inp]! 483f2f770d7SSami Tolvanen vld1.8 {@X[3]},[$inp]! 484f2f770d7SSami Tolvanen vld1.32 {$T0},[$Ktbl,:128]! 485f2f770d7SSami Tolvanen vld1.32 {$T1},[$Ktbl,:128]! 486f2f770d7SSami Tolvanen vld1.32 {$T2},[$Ktbl,:128]! 487f2f770d7SSami Tolvanen vld1.32 {$T3},[$Ktbl,:128]! 488f2f770d7SSami Tolvanen vrev32.8 @X[0],@X[0] @ yes, even on 489f2f770d7SSami Tolvanen str $ctx,[sp,#64] 490f2f770d7SSami Tolvanen vrev32.8 @X[1],@X[1] @ big-endian 491f2f770d7SSami Tolvanen str $inp,[sp,#68] 492f2f770d7SSami Tolvanen mov $Xfer,sp 493f2f770d7SSami Tolvanen vrev32.8 @X[2],@X[2] 494f2f770d7SSami Tolvanen str $len,[sp,#72] 495f2f770d7SSami Tolvanen vrev32.8 @X[3],@X[3] 496f2f770d7SSami Tolvanen str $t2,[sp,#76] @ save original sp 497f2f770d7SSami Tolvanen vadd.i32 $T0,$T0,@X[0] 498f2f770d7SSami Tolvanen vadd.i32 $T1,$T1,@X[1] 499f2f770d7SSami Tolvanen vst1.32 {$T0},[$Xfer,:128]! 500f2f770d7SSami Tolvanen vadd.i32 $T2,$T2,@X[2] 501f2f770d7SSami Tolvanen vst1.32 {$T1},[$Xfer,:128]! 502f2f770d7SSami Tolvanen vadd.i32 $T3,$T3,@X[3] 503f2f770d7SSami Tolvanen vst1.32 {$T2},[$Xfer,:128]! 504f2f770d7SSami Tolvanen vst1.32 {$T3},[$Xfer,:128]! 505f2f770d7SSami Tolvanen 506f2f770d7SSami Tolvanen ldmia $ctx,{$A-$H} 507f2f770d7SSami Tolvanen sub $Xfer,$Xfer,#64 508f2f770d7SSami Tolvanen ldr $t1,[sp,#0] 509f2f770d7SSami Tolvanen eor $t2,$t2,$t2 510f2f770d7SSami Tolvanen eor $t3,$B,$C 511f2f770d7SSami Tolvanen b .L_00_48 512f2f770d7SSami Tolvanen 513f2f770d7SSami Tolvanen.align 4 514f2f770d7SSami Tolvanen.L_00_48: 515f2f770d7SSami Tolvanen___ 516f2f770d7SSami Tolvanen &Xupdate(\&body_00_15); 517f2f770d7SSami Tolvanen &Xupdate(\&body_00_15); 518f2f770d7SSami Tolvanen &Xupdate(\&body_00_15); 519f2f770d7SSami Tolvanen &Xupdate(\&body_00_15); 520f2f770d7SSami Tolvanen$code.=<<___; 521f2f770d7SSami Tolvanen teq $t1,#0 @ check for K256 terminator 522f2f770d7SSami Tolvanen ldr $t1,[sp,#0] 523f2f770d7SSami Tolvanen sub $Xfer,$Xfer,#64 524f2f770d7SSami Tolvanen bne .L_00_48 525f2f770d7SSami Tolvanen 526f2f770d7SSami Tolvanen ldr $inp,[sp,#68] 527f2f770d7SSami Tolvanen ldr $t0,[sp,#72] 528f2f770d7SSami Tolvanen sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl 529f2f770d7SSami Tolvanen teq $inp,$t0 530f2f770d7SSami Tolvanen it eq 531f2f770d7SSami Tolvanen subeq $inp,$inp,#64 @ avoid SEGV 532f2f770d7SSami Tolvanen vld1.8 {@X[0]},[$inp]! @ load next input block 533f2f770d7SSami Tolvanen vld1.8 {@X[1]},[$inp]! 534f2f770d7SSami Tolvanen vld1.8 {@X[2]},[$inp]! 535f2f770d7SSami Tolvanen vld1.8 {@X[3]},[$inp]! 536f2f770d7SSami Tolvanen it ne 537f2f770d7SSami Tolvanen strne $inp,[sp,#68] 538f2f770d7SSami Tolvanen mov $Xfer,sp 539f2f770d7SSami Tolvanen___ 540f2f770d7SSami Tolvanen &Xpreload(\&body_00_15); 541f2f770d7SSami Tolvanen &Xpreload(\&body_00_15); 542f2f770d7SSami Tolvanen &Xpreload(\&body_00_15); 543f2f770d7SSami Tolvanen &Xpreload(\&body_00_15); 544f2f770d7SSami Tolvanen$code.=<<___; 545f2f770d7SSami Tolvanen ldr $t0,[$t1,#0] 546f2f770d7SSami Tolvanen add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 547f2f770d7SSami Tolvanen ldr $t2,[$t1,#4] 548f2f770d7SSami Tolvanen ldr $t3,[$t1,#8] 549f2f770d7SSami Tolvanen ldr $t4,[$t1,#12] 550f2f770d7SSami Tolvanen add $A,$A,$t0 @ accumulate 551f2f770d7SSami Tolvanen ldr $t0,[$t1,#16] 552f2f770d7SSami Tolvanen add $B,$B,$t2 553f2f770d7SSami Tolvanen ldr $t2,[$t1,#20] 554f2f770d7SSami Tolvanen add $C,$C,$t3 555f2f770d7SSami Tolvanen ldr $t3,[$t1,#24] 556f2f770d7SSami Tolvanen add $D,$D,$t4 557f2f770d7SSami Tolvanen ldr $t4,[$t1,#28] 558f2f770d7SSami Tolvanen add $E,$E,$t0 559f2f770d7SSami Tolvanen str $A,[$t1],#4 560f2f770d7SSami Tolvanen add $F,$F,$t2 561f2f770d7SSami Tolvanen str $B,[$t1],#4 562f2f770d7SSami Tolvanen add $G,$G,$t3 563f2f770d7SSami Tolvanen str $C,[$t1],#4 564f2f770d7SSami Tolvanen add $H,$H,$t4 565f2f770d7SSami Tolvanen str $D,[$t1],#4 566f2f770d7SSami Tolvanen stmia $t1,{$E-$H} 567f2f770d7SSami Tolvanen 568f2f770d7SSami Tolvanen ittte ne 569f2f770d7SSami Tolvanen movne $Xfer,sp 570f2f770d7SSami Tolvanen ldrne $t1,[sp,#0] 571f2f770d7SSami Tolvanen eorne $t2,$t2,$t2 572f2f770d7SSami Tolvanen ldreq sp,[sp,#76] @ restore original sp 573f2f770d7SSami Tolvanen itt ne 574f2f770d7SSami Tolvanen eorne $t3,$B,$C 575f2f770d7SSami Tolvanen bne .L_00_48 576f2f770d7SSami Tolvanen 577f2f770d7SSami Tolvanen ldmia sp!,{r4-r12,pc} 578f2f770d7SSami Tolvanen.size sha256_block_data_order_neon,.-sha256_block_data_order_neon 579f2f770d7SSami Tolvanen#endif 580f2f770d7SSami Tolvanen___ 581f2f770d7SSami Tolvanen}}} 582f2f770d7SSami Tolvanen###################################################################### 583f2f770d7SSami Tolvanen# ARMv8 stuff 584f2f770d7SSami Tolvanen# 585f2f770d7SSami Tolvanen{{{ 586f2f770d7SSami Tolvanenmy ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); 587f2f770d7SSami Tolvanenmy @MSG=map("q$_",(8..11)); 588f2f770d7SSami Tolvanenmy ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); 589f2f770d7SSami Tolvanenmy $Ktbl="r3"; 590f2f770d7SSami Tolvanen 591f2f770d7SSami Tolvanen$code.=<<___; 592f2f770d7SSami Tolvanen#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 593f2f770d7SSami Tolvanen 594f2f770d7SSami Tolvanen# ifdef __thumb2__ 595f2f770d7SSami Tolvanen# define INST(a,b,c,d) .byte c,d|0xc,a,b 596f2f770d7SSami Tolvanen# else 597f2f770d7SSami Tolvanen# define INST(a,b,c,d) .byte a,b,c,d 598f2f770d7SSami Tolvanen# endif 599f2f770d7SSami Tolvanen 600f2f770d7SSami Tolvanen.type sha256_block_data_order_armv8,%function 601f2f770d7SSami Tolvanen.align 5 602f2f770d7SSami Tolvanensha256_block_data_order_armv8: 603f2f770d7SSami Tolvanen.LARMv8: 604f2f770d7SSami Tolvanen vld1.32 {$ABCD,$EFGH},[$ctx] 605f2f770d7SSami Tolvanen# ifdef __thumb2__ 606f2f770d7SSami Tolvanen adr $Ktbl,.LARMv8 607f2f770d7SSami Tolvanen sub $Ktbl,$Ktbl,#.LARMv8-K256 608f2f770d7SSami Tolvanen# else 609f2f770d7SSami Tolvanen adrl $Ktbl,K256 610f2f770d7SSami Tolvanen# endif 611f2f770d7SSami Tolvanen add $len,$inp,$len,lsl#6 @ len to point at the end of inp 612f2f770d7SSami Tolvanen 613f2f770d7SSami Tolvanen.Loop_v8: 614f2f770d7SSami Tolvanen vld1.8 {@MSG[0]-@MSG[1]},[$inp]! 615f2f770d7SSami Tolvanen vld1.8 {@MSG[2]-@MSG[3]},[$inp]! 616f2f770d7SSami Tolvanen vld1.32 {$W0},[$Ktbl]! 617f2f770d7SSami Tolvanen vrev32.8 @MSG[0],@MSG[0] 618f2f770d7SSami Tolvanen vrev32.8 @MSG[1],@MSG[1] 619f2f770d7SSami Tolvanen vrev32.8 @MSG[2],@MSG[2] 620f2f770d7SSami Tolvanen vrev32.8 @MSG[3],@MSG[3] 621f2f770d7SSami Tolvanen vmov $ABCD_SAVE,$ABCD @ offload 622f2f770d7SSami Tolvanen vmov $EFGH_SAVE,$EFGH 623f2f770d7SSami Tolvanen teq $inp,$len 624f2f770d7SSami Tolvanen___ 625f2f770d7SSami Tolvanenfor($i=0;$i<12;$i++) { 626f2f770d7SSami Tolvanen$code.=<<___; 627f2f770d7SSami Tolvanen vld1.32 {$W1},[$Ktbl]! 628f2f770d7SSami Tolvanen vadd.i32 $W0,$W0,@MSG[0] 629f2f770d7SSami Tolvanen sha256su0 @MSG[0],@MSG[1] 630f2f770d7SSami Tolvanen vmov $abcd,$ABCD 631f2f770d7SSami Tolvanen sha256h $ABCD,$EFGH,$W0 632f2f770d7SSami Tolvanen sha256h2 $EFGH,$abcd,$W0 633f2f770d7SSami Tolvanen sha256su1 @MSG[0],@MSG[2],@MSG[3] 634f2f770d7SSami Tolvanen___ 635f2f770d7SSami Tolvanen ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); 636f2f770d7SSami Tolvanen} 637f2f770d7SSami Tolvanen$code.=<<___; 638f2f770d7SSami Tolvanen vld1.32 {$W1},[$Ktbl]! 639f2f770d7SSami Tolvanen vadd.i32 $W0,$W0,@MSG[0] 640f2f770d7SSami Tolvanen vmov $abcd,$ABCD 641f2f770d7SSami Tolvanen sha256h $ABCD,$EFGH,$W0 642f2f770d7SSami Tolvanen sha256h2 $EFGH,$abcd,$W0 643f2f770d7SSami Tolvanen 644f2f770d7SSami Tolvanen vld1.32 {$W0},[$Ktbl]! 645f2f770d7SSami Tolvanen vadd.i32 $W1,$W1,@MSG[1] 646f2f770d7SSami Tolvanen vmov $abcd,$ABCD 647f2f770d7SSami Tolvanen sha256h $ABCD,$EFGH,$W1 648f2f770d7SSami Tolvanen sha256h2 $EFGH,$abcd,$W1 649f2f770d7SSami Tolvanen 650f2f770d7SSami Tolvanen vld1.32 {$W1},[$Ktbl] 651f2f770d7SSami Tolvanen vadd.i32 $W0,$W0,@MSG[2] 652f2f770d7SSami Tolvanen sub $Ktbl,$Ktbl,#256-16 @ rewind 653f2f770d7SSami Tolvanen vmov $abcd,$ABCD 654f2f770d7SSami Tolvanen sha256h $ABCD,$EFGH,$W0 655f2f770d7SSami Tolvanen sha256h2 $EFGH,$abcd,$W0 656f2f770d7SSami Tolvanen 657f2f770d7SSami Tolvanen vadd.i32 $W1,$W1,@MSG[3] 658f2f770d7SSami Tolvanen vmov $abcd,$ABCD 659f2f770d7SSami Tolvanen sha256h $ABCD,$EFGH,$W1 660f2f770d7SSami Tolvanen sha256h2 $EFGH,$abcd,$W1 661f2f770d7SSami Tolvanen 662f2f770d7SSami Tolvanen vadd.i32 $ABCD,$ABCD,$ABCD_SAVE 663f2f770d7SSami Tolvanen vadd.i32 $EFGH,$EFGH,$EFGH_SAVE 664f2f770d7SSami Tolvanen it ne 665f2f770d7SSami Tolvanen bne .Loop_v8 666f2f770d7SSami Tolvanen 667f2f770d7SSami Tolvanen vst1.32 {$ABCD,$EFGH},[$ctx] 668f2f770d7SSami Tolvanen 669f2f770d7SSami Tolvanen ret @ bx lr 670f2f770d7SSami Tolvanen.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 671f2f770d7SSami Tolvanen#endif 672f2f770d7SSami Tolvanen___ 673f2f770d7SSami Tolvanen}}} 674f2f770d7SSami Tolvanen$code.=<<___; 675f2f770d7SSami Tolvanen.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" 676f2f770d7SSami Tolvanen.align 2 677f2f770d7SSami Tolvanen#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 678f2f770d7SSami Tolvanen.comm OPENSSL_armcap_P,4,4 679f2f770d7SSami Tolvanen#endif 680f2f770d7SSami Tolvanen___ 681f2f770d7SSami Tolvanen 682f2f770d7SSami Tolvanenopen SELF,$0; 683f2f770d7SSami Tolvanenwhile(<SELF>) { 684f2f770d7SSami Tolvanen next if (/^#!/); 685f2f770d7SSami Tolvanen last if (!s/^#/@/ and !/^$/); 686f2f770d7SSami Tolvanen print; 687f2f770d7SSami Tolvanen} 688f2f770d7SSami Tolvanenclose SELF; 689f2f770d7SSami Tolvanen 690f2f770d7SSami Tolvanen{ my %opcode = ( 691f2f770d7SSami Tolvanen "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, 692f2f770d7SSami Tolvanen "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); 693f2f770d7SSami Tolvanen 694f2f770d7SSami Tolvanen sub unsha256 { 695f2f770d7SSami Tolvanen my ($mnemonic,$arg)=@_; 696f2f770d7SSami Tolvanen 697f2f770d7SSami Tolvanen if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { 698f2f770d7SSami Tolvanen my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) 699f2f770d7SSami Tolvanen |(($2&7)<<17)|(($2&8)<<4) 700f2f770d7SSami Tolvanen |(($3&7)<<1) |(($3&8)<<2); 701f2f770d7SSami Tolvanen # since ARMv7 instructions are always encoded little-endian. 702f2f770d7SSami Tolvanen # correct solution is to use .inst directive, but older 703f2f770d7SSami Tolvanen # assemblers don't implement it:-( 704f2f770d7SSami Tolvanen sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", 705f2f770d7SSami Tolvanen $word&0xff,($word>>8)&0xff, 706f2f770d7SSami Tolvanen ($word>>16)&0xff,($word>>24)&0xff, 707f2f770d7SSami Tolvanen $mnemonic,$arg; 708f2f770d7SSami Tolvanen } 709f2f770d7SSami Tolvanen } 710f2f770d7SSami Tolvanen} 711f2f770d7SSami Tolvanen 712f2f770d7SSami Tolvanenforeach (split($/,$code)) { 713f2f770d7SSami Tolvanen 714f2f770d7SSami Tolvanen s/\`([^\`]*)\`/eval $1/geo; 715f2f770d7SSami Tolvanen 716f2f770d7SSami Tolvanen s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; 717f2f770d7SSami Tolvanen 718f2f770d7SSami Tolvanen s/\bret\b/bx lr/go or 719f2f770d7SSami Tolvanen s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 720f2f770d7SSami Tolvanen 721f2f770d7SSami Tolvanen print $_,"\n"; 722f2f770d7SSami Tolvanen} 723f2f770d7SSami Tolvanen 724f2f770d7SSami Tolvanenclose STDOUT; # enforce flush 725