1#! /usr/bin/env perl 2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# SHA256/512 block procedure for PA-RISC. 18 19# June 2009. 20# 21# SHA256 performance is >75% better than gcc 3.2 generated code on 22# PA-7100LC. Compared to code generated by vendor compiler this 23# implementation is almost 70% faster in 64-bit build, but delivers 24# virtually same performance in 32-bit build on PA-8600. 25# 26# SHA512 performance is >2.9x better than gcc 3.2 generated code on 27# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the 28# code is executed on PA-RISC 2.0 processor and switches to 64-bit 29# code path delivering adequate performance even in "blended" 32-bit 30# build. Though 64-bit code is not any faster than code generated by 31# vendor compiler on PA-8600... 32# 33# Special thanks to polarhome.com for providing HP-UX account. 34 35$flavour = shift; 36$output = shift; 37open STDOUT,">$output"; 38 39if ($flavour =~ /64/) { 40 $LEVEL ="2.0W"; 41 $SIZE_T =8; 42 $FRAME_MARKER =80; 43 $SAVED_RP =16; 44 $PUSH ="std"; 45 $PUSHMA ="std,ma"; 46 $POP ="ldd"; 47 $POPMB ="ldd,mb"; 48} else { 49 $LEVEL ="1.0"; 50 $SIZE_T =4; 51 $FRAME_MARKER =48; 52 $SAVED_RP =20; 53 $PUSH ="stw"; 54 $PUSHMA ="stwm"; 55 $POP ="ldw"; 56 $POPMB ="ldwm"; 57} 58 59if ($output =~ /512/) { 60 $func="sha512_block_data_order"; 61 $SZ=8; 62 @Sigma0=(28,34,39); 63 @Sigma1=(14,18,41); 64 @sigma0=(1, 8, 7); 65 @sigma1=(19,61, 6); 66 $rounds=80; 67 $LAST10BITS=0x017; 68 $LD="ldd"; 69 $LDM="ldd,ma"; 70 $ST="std"; 71} else { 72 $func="sha256_block_data_order"; 73 $SZ=4; 74 @Sigma0=( 2,13,22); 75 @Sigma1=( 6,11,25); 76 @sigma0=( 7,18, 3); 77 @sigma1=(17,19,10); 78 $rounds=64; 79 $LAST10BITS=0x0f2; 80 $LD="ldw"; 81 $LDM="ldwm"; 82 $ST="stw"; 83} 84 85$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker 86 # [+ argument transfer] 87$XOFF=16*$SZ+32; # local variables 88$FRAME+=$XOFF; 89$XOFF+=$FRAME_MARKER; # distance between %sp and local variables 90 91$ctx="%r26"; # zapped by $a0 92$inp="%r25"; # zapped by $a1 93$num="%r24"; # zapped by $t0 94 95$a0 ="%r26"; 96$a1 ="%r25"; 97$t0 ="%r24"; 98$t1 ="%r29"; 99$Tbl="%r31"; 100 101@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28"); 102 103@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 104 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp); 105 106sub ROUND_00_15 { 107my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; 108$code.=<<___; 109 _ror $e,$Sigma1[0],$a0 110 and $f,$e,$t0 111 _ror $e,$Sigma1[1],$a1 112 addl $t1,$h,$h 113 andcm $g,$e,$t1 114 xor $a1,$a0,$a0 115 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1 116 or $t0,$t1,$t1 ; Ch(e,f,g) 117 addl @X[$i%16],$h,$h 118 xor $a0,$a1,$a1 ; Sigma1(e) 119 addl $t1,$h,$h 120 _ror $a,$Sigma0[0],$a0 121 addl $a1,$h,$h 122 123 _ror $a,$Sigma0[1],$a1 124 and $a,$b,$t0 125 and $a,$c,$t1 126 xor $a1,$a0,$a0 127 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1 128 xor $t1,$t0,$t0 129 and $b,$c,$t1 130 xor $a0,$a1,$a1 ; Sigma0(a) 131 addl $h,$d,$d 132 xor $t1,$t0,$t0 ; Maj(a,b,c) 133 `"$LDM $SZ($Tbl),$t1" if ($i<15)` 134 addl $a1,$h,$h 135 addl $t0,$h,$h 136 137___ 138} 139 140sub ROUND_16_xx { 141my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; 142$i-=16; 143$code.=<<___; 144 _ror @X[($i+1)%16],$sigma0[0],$a0 145 _ror @X[($i+1)%16],$sigma0[1],$a1 146 addl @X[($i+9)%16],@X[$i],@X[$i] 147 _ror @X[($i+14)%16],$sigma1[0],$t0 148 _ror @X[($i+14)%16],$sigma1[1],$t1 149 xor $a1,$a0,$a0 150 _shr @X[($i+1)%16],$sigma0[2],$a1 151 xor $t1,$t0,$t0 152 _shr @X[($i+14)%16],$sigma1[2],$t1 153 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f]) 154 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f]) 155 $LDM $SZ($Tbl),$t1 156 addl $a0,@X[$i],@X[$i] 157 addl $t0,@X[$i],@X[$i] 158___ 159$code.=<<___ if ($i==15); 160 extru $t1,31,10,$a1 161 comiclr,<> $LAST10BITS,$a1,%r0 162 ldo 1($Tbl),$Tbl ; signal end of $Tbl 163___ 164&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); 165} 166 167$code=<<___; 168 .LEVEL $LEVEL 169 .SPACE \$TEXT\$ 170 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 171 172 .ALIGN 64 173L\$table 174___ 175$code.=<<___ if ($SZ==8); 176 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd 177 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc 178 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 179 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 180 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe 181 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 182 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 183 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 184 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 185 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 186 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 187 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 188 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 189 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 190 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 191 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 192 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 193 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df 194 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 195 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b 196 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 197 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 198 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 199 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 200 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 201 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 202 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb 203 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 204 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 205 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec 206 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 207 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b 208 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 209 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 210 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 211 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b 212 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 213 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c 214 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a 215 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 216___ 217$code.=<<___ if ($SZ==4); 218 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 219 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 220 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 221 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 222 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 223 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 224 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 225 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 226 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 227 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 228 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 229 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 230 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 231 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 232 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 233 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 234___ 235$code.=<<___; 236 237 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR 238 .ALIGN 64 239$func 240 .PROC 241 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 242 .ENTRY 243 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 244 $PUSHMA %r3,$FRAME(%sp) 245 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 246 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 247 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 248 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 249 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 250 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 251 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 252 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 253 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) 254 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) 255 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) 256 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) 257 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) 258 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) 259 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) 260 261 _shl $num,`log(16*$SZ)/log(2)`,$num 262 addl $inp,$num,$num ; $num to point at the end of $inp 263 264 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments 265 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) 266 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp) 267 268 blr %r0,$Tbl 269 ldi 3,$t1 270L\$pic 271 andcm $Tbl,$t1,$Tbl ; wipe privilege level 272 ldo L\$table-L\$pic($Tbl),$Tbl 273___ 274$code.=<<___ if ($SZ==8 && $SIZE_T==4); 275 ldi 31,$t1 276 mtctl $t1,%cr11 277 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0 278 b L\$parisc1 279 nop 280___ 281$code.=<<___; 282 $LD `0*$SZ`($ctx),$A ; load context 283 $LD `1*$SZ`($ctx),$B 284 $LD `2*$SZ`($ctx),$C 285 $LD `3*$SZ`($ctx),$D 286 $LD `4*$SZ`($ctx),$E 287 $LD `5*$SZ`($ctx),$F 288 $LD `6*$SZ`($ctx),$G 289 $LD `7*$SZ`($ctx),$H 290 291 extru $inp,31,`log($SZ)/log(2)`,$t0 292 sh3addl $t0,%r0,$t0 293 subi `8*$SZ`,$t0,$t0 294 mtctl $t0,%cr11 ; load %sar with align factor 295 296L\$oop 297 ldi `$SZ-1`,$t0 298 $LDM $SZ($Tbl),$t1 299 andcm $inp,$t0,$t0 ; align $inp 300___ 301 for ($i=0;$i<15;$i++) { # load input block 302 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; } 303$code.=<<___; 304 cmpb,*= $inp,$t0,L\$aligned 305 $LD `$SZ*15`($t0),@X[15] 306 $LD `$SZ*16`($t0),@X[16] 307___ 308 for ($i=0;$i<16;$i++) { # align data 309 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; } 310$code.=<<___; 311L\$aligned 312 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD 313___ 314 315for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } 316$code.=<<___; 317L\$rounds 318 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD 319___ 320for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); } 321$code.=<<___; 322 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled? 323 nop 324 325 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments 326 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp 327 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num 328 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl 329 330 $LD `0*$SZ`($ctx),@X[0] ; load context 331 $LD `1*$SZ`($ctx),@X[1] 332 $LD `2*$SZ`($ctx),@X[2] 333 $LD `3*$SZ`($ctx),@X[3] 334 $LD `4*$SZ`($ctx),@X[4] 335 $LD `5*$SZ`($ctx),@X[5] 336 addl @X[0],$A,$A 337 $LD `6*$SZ`($ctx),@X[6] 338 addl @X[1],$B,$B 339 $LD `7*$SZ`($ctx),@X[7] 340 ldo `16*$SZ`($inp),$inp ; advance $inp 341 342 $ST $A,`0*$SZ`($ctx) ; save context 343 addl @X[2],$C,$C 344 $ST $B,`1*$SZ`($ctx) 345 addl @X[3],$D,$D 346 $ST $C,`2*$SZ`($ctx) 347 addl @X[4],$E,$E 348 $ST $D,`3*$SZ`($ctx) 349 addl @X[5],$F,$F 350 $ST $E,`4*$SZ`($ctx) 351 addl @X[6],$G,$G 352 $ST $F,`5*$SZ`($ctx) 353 addl @X[7],$H,$H 354 $ST $G,`6*$SZ`($ctx) 355 $ST $H,`7*$SZ`($ctx) 356 357 cmpb,*<>,n $inp,$num,L\$oop 358 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp 359___ 360if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0 361{{ 362$code.=<<___; 363 b L\$done 364 nop 365 366 .ALIGN 64 367L\$parisc1 368___ 369 370@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo, 371 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) = 372 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 373 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16"); 374$a0 ="%r17"; 375$a1 ="%r18"; 376$a2 ="%r19"; 377$a3 ="%r20"; 378$t0 ="%r21"; 379$t1 ="%r22"; 380$t2 ="%r28"; 381$t3 ="%r29"; 382$Tbl="%r31"; 383 384@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx 385 386sub ROUND_00_15_pa1 { 387my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, 388 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_; 389my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; 390 391$code.=<<___ if (!$flag); 392 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi 393 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] 394___ 395$code.=<<___; 396 shd $ehi,$elo,$Sigma1[0],$t0 397 add $Xlo,$hlo,$hlo 398 shd $elo,$ehi,$Sigma1[0],$t1 399 addc $Xhi,$hhi,$hhi ; h += X[i] 400 shd $ehi,$elo,$Sigma1[1],$t2 401 ldwm 8($Tbl),$Xhi 402 shd $elo,$ehi,$Sigma1[1],$t3 403 ldw -4($Tbl),$Xlo ; load K[i] 404 xor $t2,$t0,$t0 405 xor $t3,$t1,$t1 406 and $flo,$elo,$a0 407 and $fhi,$ehi,$a1 408 shd $ehi,$elo,$Sigma1[2],$t2 409 andcm $glo,$elo,$a2 410 shd $elo,$ehi,$Sigma1[2],$t3 411 andcm $ghi,$ehi,$a3 412 xor $t2,$t0,$t0 413 xor $t3,$t1,$t1 ; Sigma1(e) 414 add $Xlo,$hlo,$hlo 415 xor $a2,$a0,$a0 416 addc $Xhi,$hhi,$hhi ; h += K[i] 417 xor $a3,$a1,$a1 ; Ch(e,f,g) 418 419 add $t0,$hlo,$hlo 420 shd $ahi,$alo,$Sigma0[0],$t0 421 addc $t1,$hhi,$hhi ; h += Sigma1(e) 422 shd $alo,$ahi,$Sigma0[0],$t1 423 add $a0,$hlo,$hlo 424 shd $ahi,$alo,$Sigma0[1],$t2 425 addc $a1,$hhi,$hhi ; h += Ch(e,f,g) 426 shd $alo,$ahi,$Sigma0[1],$t3 427 428 xor $t2,$t0,$t0 429 xor $t3,$t1,$t1 430 shd $ahi,$alo,$Sigma0[2],$t2 431 and $alo,$blo,$a0 432 shd $alo,$ahi,$Sigma0[2],$t3 433 and $ahi,$bhi,$a1 434 xor $t2,$t0,$t0 435 xor $t3,$t1,$t1 ; Sigma0(a) 436 437 and $alo,$clo,$a2 438 and $ahi,$chi,$a3 439 xor $a2,$a0,$a0 440 add $hlo,$dlo,$dlo 441 xor $a3,$a1,$a1 442 addc $hhi,$dhi,$dhi ; d += h 443 and $blo,$clo,$a2 444 add $t0,$hlo,$hlo 445 and $bhi,$chi,$a3 446 addc $t1,$hhi,$hhi ; h += Sigma0(a) 447 xor $a2,$a0,$a0 448 add $a0,$hlo,$hlo 449 xor $a3,$a1,$a1 ; Maj(a,b,c) 450 addc $a1,$hhi,$hhi ; h += Maj(a,b,c) 451 452___ 453$code.=<<___ if ($i==15 && $flag); 454 extru $Xlo,31,10,$Xlo 455 comiclr,= $LAST10BITS,$Xlo,%r0 456 b L\$rounds_pa1 457 nop 458___ 459push(@X,shift(@X)); push(@X,shift(@X)); 460} 461 462sub ROUND_16_xx_pa1 { 463my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; 464my ($i)=shift; 465$i-=16; 466$code.=<<___; 467 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi 468 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] 469 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1 470 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9] 471 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3 472 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14] 473 shd $Xnhi,$Xnlo,$sigma0[0],$t0 474 shd $Xnlo,$Xnhi,$sigma0[0],$t1 475 add $a0,$Xlo,$Xlo 476 shd $Xnhi,$Xnlo,$sigma0[1],$t2 477 addc $a1,$Xhi,$Xhi 478 shd $Xnlo,$Xnhi,$sigma0[1],$t3 479 xor $t2,$t0,$t0 480 shd $Xnhi,$Xnlo,$sigma0[2],$t2 481 xor $t3,$t1,$t1 482 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3 483 xor $t2,$t0,$t0 484 shd $a3,$a2,$sigma1[0],$a0 485 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f]) 486 shd $a2,$a3,$sigma1[0],$a1 487 add $t0,$Xlo,$Xlo 488 shd $a3,$a2,$sigma1[1],$t2 489 addc $t1,$Xhi,$Xhi 490 shd $a2,$a3,$sigma1[1],$t3 491 xor $t2,$a0,$a0 492 shd $a3,$a2,$sigma1[2],$t2 493 xor $t3,$a1,$a1 494 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3 495 xor $t2,$a0,$a0 496 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f]) 497 add $a0,$Xlo,$Xlo 498 addc $a1,$Xhi,$Xhi 499 500 stw $Xhi,`-$XOFF+8*($i%16)`(%sp) 501 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp) 502___ 503&ROUND_00_15_pa1($i,@_,1); 504} 505$code.=<<___; 506 ldw `0*4`($ctx),$Ahi ; load context 507 ldw `1*4`($ctx),$Alo 508 ldw `2*4`($ctx),$Bhi 509 ldw `3*4`($ctx),$Blo 510 ldw `4*4`($ctx),$Chi 511 ldw `5*4`($ctx),$Clo 512 ldw `6*4`($ctx),$Dhi 513 ldw `7*4`($ctx),$Dlo 514 ldw `8*4`($ctx),$Ehi 515 ldw `9*4`($ctx),$Elo 516 ldw `10*4`($ctx),$Fhi 517 ldw `11*4`($ctx),$Flo 518 ldw `12*4`($ctx),$Ghi 519 ldw `13*4`($ctx),$Glo 520 ldw `14*4`($ctx),$Hhi 521 ldw `15*4`($ctx),$Hlo 522 523 extru $inp,31,2,$t0 524 sh3addl $t0,%r0,$t0 525 subi 32,$t0,$t0 526 mtctl $t0,%cr11 ; load %sar with align factor 527 528L\$oop_pa1 529 extru $inp,31,2,$a3 530 comib,= 0,$a3,L\$aligned_pa1 531 sub $inp,$a3,$inp 532 533 ldw `0*4`($inp),$X[0] 534 ldw `1*4`($inp),$X[1] 535 ldw `2*4`($inp),$t2 536 ldw `3*4`($inp),$t3 537 ldw `4*4`($inp),$a0 538 ldw `5*4`($inp),$a1 539 ldw `6*4`($inp),$a2 540 ldw `7*4`($inp),$a3 541 vshd $X[0],$X[1],$X[0] 542 vshd $X[1],$t2,$X[1] 543 stw $X[0],`-$XOFF+0*4`(%sp) 544 ldw `8*4`($inp),$t0 545 vshd $t2,$t3,$t2 546 stw $X[1],`-$XOFF+1*4`(%sp) 547 ldw `9*4`($inp),$t1 548 vshd $t3,$a0,$t3 549___ 550{ 551my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); 552for ($i=2;$i<=(128/4-8);$i++) { 553$code.=<<___; 554 stw $t[0],`-$XOFF+$i*4`(%sp) 555 ldw `(8+$i)*4`($inp),$t[0] 556 vshd $t[1],$t[2],$t[1] 557___ 558push(@t,shift(@t)); 559} 560for (;$i<(128/4-1);$i++) { 561$code.=<<___; 562 stw $t[0],`-$XOFF+$i*4`(%sp) 563 vshd $t[1],$t[2],$t[1] 564___ 565push(@t,shift(@t)); 566} 567$code.=<<___; 568 b L\$collected_pa1 569 stw $t[0],`-$XOFF+$i*4`(%sp) 570 571___ 572} 573$code.=<<___; 574L\$aligned_pa1 575 ldw `0*4`($inp),$X[0] 576 ldw `1*4`($inp),$X[1] 577 ldw `2*4`($inp),$t2 578 ldw `3*4`($inp),$t3 579 ldw `4*4`($inp),$a0 580 ldw `5*4`($inp),$a1 581 ldw `6*4`($inp),$a2 582 ldw `7*4`($inp),$a3 583 stw $X[0],`-$XOFF+0*4`(%sp) 584 ldw `8*4`($inp),$t0 585 stw $X[1],`-$XOFF+1*4`(%sp) 586 ldw `9*4`($inp),$t1 587___ 588{ 589my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); 590for ($i=2;$i<(128/4-8);$i++) { 591$code.=<<___; 592 stw $t[0],`-$XOFF+$i*4`(%sp) 593 ldw `(8+$i)*4`($inp),$t[0] 594___ 595push(@t,shift(@t)); 596} 597for (;$i<128/4;$i++) { 598$code.=<<___; 599 stw $t[0],`-$XOFF+$i*4`(%sp) 600___ 601push(@t,shift(@t)); 602} 603$code.="L\$collected_pa1\n"; 604} 605 606for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } 607$code.="L\$rounds_pa1\n"; 608for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } 609 610$code.=<<___; 611 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments 612 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp 613 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num 614 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl 615 616 ldw `0*4`($ctx),$t1 ; update context 617 ldw `1*4`($ctx),$t0 618 ldw `2*4`($ctx),$t3 619 ldw `3*4`($ctx),$t2 620 ldw `4*4`($ctx),$a1 621 ldw `5*4`($ctx),$a0 622 ldw `6*4`($ctx),$a3 623 add $t0,$Alo,$Alo 624 ldw `7*4`($ctx),$a2 625 addc $t1,$Ahi,$Ahi 626 ldw `8*4`($ctx),$t1 627 add $t2,$Blo,$Blo 628 ldw `9*4`($ctx),$t0 629 addc $t3,$Bhi,$Bhi 630 ldw `10*4`($ctx),$t3 631 add $a0,$Clo,$Clo 632 ldw `11*4`($ctx),$t2 633 addc $a1,$Chi,$Chi 634 ldw `12*4`($ctx),$a1 635 add $a2,$Dlo,$Dlo 636 ldw `13*4`($ctx),$a0 637 addc $a3,$Dhi,$Dhi 638 ldw `14*4`($ctx),$a3 639 add $t0,$Elo,$Elo 640 ldw `15*4`($ctx),$a2 641 addc $t1,$Ehi,$Ehi 642 stw $Ahi,`0*4`($ctx) 643 add $t2,$Flo,$Flo 644 stw $Alo,`1*4`($ctx) 645 addc $t3,$Fhi,$Fhi 646 stw $Bhi,`2*4`($ctx) 647 add $a0,$Glo,$Glo 648 stw $Blo,`3*4`($ctx) 649 addc $a1,$Ghi,$Ghi 650 stw $Chi,`4*4`($ctx) 651 add $a2,$Hlo,$Hlo 652 stw $Clo,`5*4`($ctx) 653 addc $a3,$Hhi,$Hhi 654 stw $Dhi,`6*4`($ctx) 655 ldo `16*$SZ`($inp),$inp ; advance $inp 656 stw $Dlo,`7*4`($ctx) 657 stw $Ehi,`8*4`($ctx) 658 stw $Elo,`9*4`($ctx) 659 stw $Fhi,`10*4`($ctx) 660 stw $Flo,`11*4`($ctx) 661 stw $Ghi,`12*4`($ctx) 662 stw $Glo,`13*4`($ctx) 663 stw $Hhi,`14*4`($ctx) 664 comb,= $inp,$num,L\$done 665 stw $Hlo,`15*4`($ctx) 666 b L\$oop_pa1 667 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp 668L\$done 669___ 670}} 671$code.=<<___; 672 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 673 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 674 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 675 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 676 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 677 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 678 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 679 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 680 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 681 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 682 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 683 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 684 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 685 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 686 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 687 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 688 bv (%r2) 689 .EXIT 690 $POPMB -$FRAME(%sp),%r3 691 .PROCEND 692 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" 693___ 694 695# Explicitly encode PA-RISC 2.0 instructions used in this module, so 696# that it can be compiled with .LEVEL 1.0. It should be noted that I 697# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 698# directive... 699 700my $ldd = sub { 701 my ($mod,$args) = @_; 702 my $orig = "ldd$mod\t$args"; 703 704 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices 705 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1); 706 $opcode|=(1<<3) if ($mod =~ /^,m/); 707 $opcode|=(1<<2) if ($mod =~ /^,mb/); 708 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 709 } 710 else { "\t".$orig; } 711}; 712 713my $std = sub { 714 my ($mod,$args) = @_; 715 my $orig = "std$mod\t$args"; 716 717 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices 718 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); 719 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 720 } 721 else { "\t".$orig; } 722}; 723 724my $extrd = sub { 725 my ($mod,$args) = @_; 726 my $orig = "extrd$mod\t$args"; 727 728 # I only have ",u" completer, it's implicitly encoded... 729 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 730 { my $opcode=(0x36<<26)|($1<<21)|($4<<16); 731 my $len=32-$3; 732 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos 733 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len 734 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 735 } 736 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 737 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); 738 my $len=32-$2; 739 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len 740 $opcode |= (1<<13) if ($mod =~ /,\**=/); 741 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 742 } 743 else { "\t".$orig; } 744}; 745 746my $shrpd = sub { 747 my ($mod,$args) = @_; 748 my $orig = "shrpd$mod\t$args"; 749 750 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 751 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; 752 my $cpos=63-$3; 753 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa 754 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 755 } 756 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 757 { sprintf "\t.WORD\t0x%08x\t; %s", 758 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; 759 } 760 else { "\t".$orig; } 761}; 762 763sub assemble { 764 my ($mnemonic,$mod,$args)=@_; 765 my $opcode = eval("\$$mnemonic"); 766 767 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; 768} 769 770if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 771 =~ /GNU assembler/) { 772 $gnuas = 1; 773} 774 775foreach (split("\n",$code)) { 776 s/\`([^\`]*)\`/eval $1/ge; 777 778 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/ 779 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32 780 : sprintf("shd\t%$1,%$2,%d",$3)/e or 781 # translate made up instructions: _ror, _shr, _align, _shl 782 s/_ror(\s+)(%r[0-9]+),/ 783 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or 784 785 s/_shr(\s+%r[0-9]+),([0-9]+),/ 786 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2) 787 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or 788 789 s/_align(\s+%r[0-9]+,%r[0-9]+),/ 790 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or 791 792 s/_shl(\s+%r[0-9]+),([0-9]+),/ 793 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2) 794 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e; 795 796 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4); 797 798 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); 799 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); 800 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); 801 s/cmpb,\*/comb,/ if ($SIZE_T==4); 802 s/\bbv\b/bve/ if ($SIZE_T==8); 803 804 print $_,"\n"; 805} 806 807close STDOUT or die "error closing STDOUT: $!"; 808