1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# SHA1 block procedure for Alpha. 11 12# On 21264 performance is 33% better than code generated by vendor 13# compiler, and 75% better than GCC [3.4], and in absolute terms is 14# 8.7 cycles per processed byte. Implementation features vectorized 15# byte swap, but not Xupdate. 16 17@X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", 18 "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); 19$ctx="a0"; # $16 20$inp="a1"; 21$num="a2"; 22$A="a3"; 23$B="a4"; # 20 24$C="a5"; 25$D="t8"; 26$E="t9"; @V=($A,$B,$C,$D,$E); 27$t0="t10"; # 24 28$t1="t11"; 29$t2="ra"; 30$t3="t12"; 31$K="AT"; # 28 32 33sub BODY_00_19 { 34my ($i,$a,$b,$c,$d,$e)=@_; 35my $j=$i+1; 36$code.=<<___ if ($i==0); 37 ldq_u @X[0],0+0($inp) 38 ldq_u @X[1],0+7($inp) 39___ 40$code.=<<___ if (!($i&1) && $i<14); 41 ldq_u @X[$i+2],($i+2)*4+0($inp) 42 ldq_u @X[$i+3],($i+2)*4+7($inp) 43___ 44$code.=<<___ if (!($i&1) && $i<15); 45 extql @X[$i],$inp,@X[$i] 46 extqh @X[$i+1],$inp,@X[$i+1] 47 48 or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched 49 50 srl @X[$i],24,$t0 # vectorized byte swap 51 srl @X[$i],8,$t2 52 53 sll @X[$i],8,$t3 54 sll @X[$i],24,@X[$i] 55 zapnot $t0,0x11,$t0 56 zapnot $t2,0x22,$t2 57 58 zapnot @X[$i],0x88,@X[$i] 59 or $t0,$t2,$t0 60 zapnot $t3,0x44,$t3 61 sll $a,5,$t1 62 63 or @X[$i],$t0,@X[$i] 64 addl $K,$e,$e 65 and $b,$c,$t2 66 zapnot $a,0xf,$a 67 68 or @X[$i],$t3,@X[$i] 69 srl $a,27,$t0 70 bic $d,$b,$t3 71 sll $b,30,$b 72 73 extll @X[$i],4,@X[$i+1] # extract upper half 74 or $t2,$t3,$t2 75 addl @X[$i],$e,$e 76 77 addl $t1,$e,$e 78 srl $b,32,$t3 79 zapnot @X[$i],0xf,@X[$i] 80 81 addl $t0,$e,$e 82 addl $t2,$e,$e 83 or $t3,$b,$b 84___ 85$code.=<<___ if (($i&1) && $i<15); 86 sll $a,5,$t1 87 addl $K,$e,$e 88 and $b,$c,$t2 89 zapnot $a,0xf,$a 90 91 srl $a,27,$t0 92 addl @X[$i%16],$e,$e 93 bic $d,$b,$t3 94 sll $b,30,$b 95 96 or $t2,$t3,$t2 97 addl $t1,$e,$e 98 srl $b,32,$t3 99 zapnot @X[$i],0xf,@X[$i] 100 101 addl $t0,$e,$e 102 addl $t2,$e,$e 103 or $t3,$b,$b 104___ 105$code.=<<___ if ($i>=15); # with forward Xupdate 106 sll $a,5,$t1 107 addl $K,$e,$e 108 and $b,$c,$t2 109 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 110 111 zapnot $a,0xf,$a 112 addl @X[$i%16],$e,$e 113 bic $d,$b,$t3 114 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 115 116 srl $a,27,$t0 117 addl $t1,$e,$e 118 or $t2,$t3,$t2 119 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 120 121 sll $b,30,$b 122 addl $t0,$e,$e 123 srl @X[$j%16],31,$t1 124 125 addl $t2,$e,$e 126 srl $b,32,$t3 127 addl @X[$j%16],@X[$j%16],@X[$j%16] 128 129 or $t3,$b,$b 130 zapnot @X[$i%16],0xf,@X[$i%16] 131 or $t1,@X[$j%16],@X[$j%16] 132___ 133} 134 135sub BODY_20_39 { 136my ($i,$a,$b,$c,$d,$e)=@_; 137my $j=$i+1; 138$code.=<<___ if ($i<79); # with forward Xupdate 139 sll $a,5,$t1 140 addl $K,$e,$e 141 zapnot $a,0xf,$a 142 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 143 144 sll $b,30,$t3 145 addl $t1,$e,$e 146 xor $b,$c,$t2 147 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 148 149 srl $b,2,$b 150 addl @X[$i%16],$e,$e 151 xor $d,$t2,$t2 152 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 153 154 srl @X[$j%16],31,$t1 155 addl $t2,$e,$e 156 srl $a,27,$t0 157 addl @X[$j%16],@X[$j%16],@X[$j%16] 158 159 or $t3,$b,$b 160 addl $t0,$e,$e 161 or $t1,@X[$j%16],@X[$j%16] 162___ 163$code.=<<___ if ($i<77); 164 zapnot @X[$i%16],0xf,@X[$i%16] 165___ 166$code.=<<___ if ($i==79); # with context fetch 167 sll $a,5,$t1 168 addl $K,$e,$e 169 zapnot $a,0xf,$a 170 ldl @X[0],0($ctx) 171 172 sll $b,30,$t3 173 addl $t1,$e,$e 174 xor $b,$c,$t2 175 ldl @X[1],4($ctx) 176 177 srl $b,2,$b 178 addl @X[$i%16],$e,$e 179 xor $d,$t2,$t2 180 ldl @X[2],8($ctx) 181 182 srl $a,27,$t0 183 addl $t2,$e,$e 184 ldl @X[3],12($ctx) 185 186 or $t3,$b,$b 187 addl $t0,$e,$e 188 ldl @X[4],16($ctx) 189___ 190} 191 192sub BODY_40_59 { 193my ($i,$a,$b,$c,$d,$e)=@_; 194my $j=$i+1; 195$code.=<<___; # with forward Xupdate 196 sll $a,5,$t1 197 addl $K,$e,$e 198 zapnot $a,0xf,$a 199 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 200 201 srl $a,27,$t0 202 and $b,$c,$t2 203 and $b,$d,$t3 204 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 205 206 sll $b,30,$b 207 addl $t1,$e,$e 208 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 209 210 srl @X[$j%16],31,$t1 211 addl $t0,$e,$e 212 or $t2,$t3,$t2 213 and $c,$d,$t3 214 215 or $t2,$t3,$t2 216 srl $b,32,$t3 217 addl @X[$i%16],$e,$e 218 addl @X[$j%16],@X[$j%16],@X[$j%16] 219 220 or $t3,$b,$b 221 addl $t2,$e,$e 222 or $t1,@X[$j%16],@X[$j%16] 223 zapnot @X[$i%16],0xf,@X[$i%16] 224___ 225} 226 227$code=<<___; 228#include <machine/asm.h> 229 230.text 231 232.set noat 233.set noreorder 234.globl sha1_block_data_order 235.align 5 236.ent sha1_block_data_order 237sha1_block_data_order: 238 lda sp,-64(sp) 239 stq ra,0(sp) 240 stq s0,8(sp) 241 stq s1,16(sp) 242 stq s2,24(sp) 243 stq s3,32(sp) 244 stq s4,40(sp) 245 stq s5,48(sp) 246 stq fp,56(sp) 247 .mask 0x0400fe00,-64 248 .frame sp,64,ra 249 .prologue 0 250 251 ldl $A,0($ctx) 252 ldl $B,4($ctx) 253 sll $num,6,$num 254 ldl $C,8($ctx) 255 ldl $D,12($ctx) 256 ldl $E,16($ctx) 257 addq $inp,$num,$num 258 259.Lloop: 260 .set noreorder 261 ldah $K,23170(zero) 262 zapnot $B,0xf,$B 263 lda $K,31129($K) # K_00_19 264___ 265for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 266 267$code.=<<___; 268 ldah $K,28378(zero) 269 lda $K,-5215($K) # K_20_39 270___ 271for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 272 273$code.=<<___; 274 ldah $K,-28900(zero) 275 lda $K,-17188($K) # K_40_59 276___ 277for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 278 279$code.=<<___; 280 ldah $K,-13725(zero) 281 lda $K,-15914($K) # K_60_79 282___ 283for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 284 285$code.=<<___; 286 addl @X[0],$A,$A 287 addl @X[1],$B,$B 288 addl @X[2],$C,$C 289 addl @X[3],$D,$D 290 addl @X[4],$E,$E 291 stl $A,0($ctx) 292 stl $B,4($ctx) 293 addq $inp,64,$inp 294 stl $C,8($ctx) 295 stl $D,12($ctx) 296 stl $E,16($ctx) 297 cmpult $inp,$num,$t1 298 bne $t1,.Lloop 299 300 .set noreorder 301 ldq ra,0(sp) 302 ldq s0,8(sp) 303 ldq s1,16(sp) 304 ldq s2,24(sp) 305 ldq s3,32(sp) 306 ldq s4,40(sp) 307 ldq s5,48(sp) 308 ldq fp,56(sp) 309 lda sp,64(sp) 310 ret (ra) 311.end sha1_block_data_order 312.ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>" 313.align 2 314___ 315$output=shift and open STDOUT,">$output"; 316print $code; 317close STDOUT; 318