1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# SHA1 block procedure for PA-RISC. 11 12# June 2009. 13# 14# On PA-7100LC performance is >30% better than gcc 3.2 generated code 15# for aligned input and >50% better for unaligned. Compared to vendor 16# compiler on PA-8600 it's almost 60% faster in 64-bit build and just 17# few percent faster in 32-bit one (this for aligned input, data for 18# unaligned input is not available). 19# 20# Special thanks to polarhome.com for providing HP-UX account. 21 22$flavour = shift; 23$output = shift; 24open STDOUT,">$output"; 25 26if ($flavour =~ /64/) { 27 $LEVEL ="2.0W"; 28 $SIZE_T =8; 29 $FRAME_MARKER =80; 30 $SAVED_RP =16; 31 $PUSH ="std"; 32 $PUSHMA ="std,ma"; 33 $POP ="ldd"; 34 $POPMB ="ldd,mb"; 35} else { 36 $LEVEL ="1.0"; 37 $SIZE_T =4; 38 $FRAME_MARKER =48; 39 $SAVED_RP =20; 40 $PUSH ="stw"; 41 $PUSHMA ="stwm"; 42 $POP ="ldw"; 43 $POPMB ="ldwm"; 44} 45 46$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker 47 # [+ argument transfer] 48$ctx="%r26"; # arg0 49$inp="%r25"; # arg1 50$num="%r24"; # arg2 51 52$t0="%r28"; 53$t1="%r29"; 54$K="%r31"; 55 56@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 57 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0); 58 59@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23"); 60 61sub BODY_00_19 { 62my ($i,$a,$b,$c,$d,$e)=@_; 63my $j=$i+1; 64$code.=<<___ if ($i<15); 65 addl $K,$e,$e ; $i 66 shd $a,$a,27,$t1 67 addl @X[$i],$e,$e 68 and $c,$b,$t0 69 addl $t1,$e,$e 70 andcm $d,$b,$t1 71 shd $b,$b,2,$b 72 or $t1,$t0,$t0 73 addl $t0,$e,$e 74___ 75$code.=<<___ if ($i>=15); # with forward Xupdate 76 addl $K,$e,$e ; $i 77 shd $a,$a,27,$t1 78 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 79 addl @X[$i%16],$e,$e 80 and $c,$b,$t0 81 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 82 addl $t1,$e,$e 83 andcm $d,$b,$t1 84 shd $b,$b,2,$b 85 or $t1,$t0,$t0 86 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 87 add $t0,$e,$e 88 shd @X[$j%16],@X[$j%16],31,@X[$j%16] 89___ 90} 91 92sub BODY_20_39 { 93my ($i,$a,$b,$c,$d,$e)=@_; 94my $j=$i+1; 95$code.=<<___ if ($i<79); 96 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i 97 addl $K,$e,$e 98 shd $a,$a,27,$t1 99 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 100 addl @X[$i%16],$e,$e 101 xor $b,$c,$t0 102 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 103 addl $t1,$e,$e 104 shd $b,$b,2,$b 105 xor $d,$t0,$t0 106 shd @X[$j%16],@X[$j%16],31,@X[$j%16] 107 addl $t0,$e,$e 108___ 109$code.=<<___ if ($i==79); # with context load 110 ldw 0($ctx),@X[0] ; $i 111 addl $K,$e,$e 112 shd $a,$a,27,$t1 113 ldw 4($ctx),@X[1] 114 addl @X[$i%16],$e,$e 115 xor $b,$c,$t0 116 ldw 8($ctx),@X[2] 117 addl $t1,$e,$e 118 shd $b,$b,2,$b 119 xor $d,$t0,$t0 120 ldw 12($ctx),@X[3] 121 addl $t0,$e,$e 122 ldw 16($ctx),@X[4] 123___ 124} 125 126sub BODY_40_59 { 127my ($i,$a,$b,$c,$d,$e)=@_; 128my $j=$i+1; 129$code.=<<___; 130 shd $a,$a,27,$t1 ; $i 131 addl $K,$e,$e 132 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 133 xor $d,$c,$t0 134 addl @X[$i%16],$e,$e 135 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 136 and $b,$t0,$t0 137 addl $t1,$e,$e 138 shd $b,$b,2,$b 139 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 140 addl $t0,$e,$e 141 and $d,$c,$t1 142 shd @X[$j%16],@X[$j%16],31,@X[$j%16] 143 addl $t1,$e,$e 144___ 145} 146 147$code=<<___; 148 .LEVEL $LEVEL 149#if 0 150 .SPACE \$TEXT\$ 151 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 152#else 153 .text 154#endif 155 156 .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR 157sha1_block_data_order 158 .PROC 159 .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16 160 .ENTRY 161 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 162 $PUSHMA %r3,$FRAME(%sp) 163 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 164 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 165 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 166 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 167 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 168 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 169 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 170 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 171 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) 172 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) 173 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) 174 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) 175 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) 176 177 ldw 0($ctx),$A 178 ldw 4($ctx),$B 179 ldw 8($ctx),$C 180 ldw 12($ctx),$D 181 ldw 16($ctx),$E 182 183 extru $inp,31,2,$t0 ; t0=inp&3; 184 sh3addl $t0,%r0,$t0 ; t0*=8; 185 subi 32,$t0,$t0 ; t0=32-t0; 186 mtctl $t0,%cr11 ; %sar=t0; 187 188L\$oop 189 ldi 3,$t0 190 andcm $inp,$t0,$t0 ; 64-bit neutral 191___ 192 for ($i=0;$i<15;$i++) { # load input block 193 $code.="\tldw `4*$i`($t0),@X[$i]\n"; } 194$code.=<<___; 195 cmpb,*= $inp,$t0,L\$aligned 196 ldw 60($t0),@X[15] 197 ldw 64($t0),@X[16] 198___ 199 for ($i=0;$i<16;$i++) { # align input 200 $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; } 201$code.=<<___; 202L\$aligned 203 ldil L'0x5a827000,$K ; K_00_19 204 ldo 0x999($K),$K 205___ 206for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 207$code.=<<___; 208 ldil L'0x6ed9e000,$K ; K_20_39 209 ldo 0xba1($K),$K 210___ 211 212for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 213$code.=<<___; 214 ldil L'0x8f1bb000,$K ; K_40_59 215 ldo 0xcdc($K),$K 216___ 217 218for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 219$code.=<<___; 220 ldil L'0xca62c000,$K ; K_60_79 221 ldo 0x1d6($K),$K 222___ 223for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 224 225$code.=<<___; 226 addl @X[0],$A,$A 227 addl @X[1],$B,$B 228 addl @X[2],$C,$C 229 addl @X[3],$D,$D 230 addl @X[4],$E,$E 231 stw $A,0($ctx) 232 stw $B,4($ctx) 233 stw $C,8($ctx) 234 stw $D,12($ctx) 235 stw $E,16($ctx) 236 addib,*<> -1,$num,L\$oop 237 ldo 64($inp),$inp 238 239 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 240 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 241 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 242 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 243 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 244 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 245 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 246 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 247 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 248 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 249 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 250 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 251 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 252 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 253 bv (%r2) 254 .EXIT 255 $POPMB -$FRAME(%sp),%r3 256 .PROCEND 257 258 .data 259 .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" 260___ 261 262$code =~ s/\`([^\`]*)\`/eval $1/gem; 263$code =~ s/,\*/,/gm if ($SIZE_T==4); 264$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8); 265print $code; 266close STDOUT; 267