1#!/usr/bin/env perl 2# 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. Rights for redistribution and usage in source and binary 6# forms are granted according to the OpenSSL license. 7# ==================================================================== 8# 9# Eternal question is what's wrong with compiler generated code? The 10# trick is that it's possible to reduce the number of shifts required 11# to perform rotations by maintaining copy of 32-bit value in upper 12# bits of 64-bit register. Just follow mux2 and shrp instructions... 13# Performance under big-endian OS such as HP-UX is 179MBps*1GHz, which 14# is >50% better than HP C and >2x better than gcc. As of this moment 15# performance under little-endian OS such as Linux and Windows will be 16# a bit lower, because data has to be picked in reverse byte-order. 17# It's possible to resolve this issue by implementing third function, 18# sha1_block_asm_data_order_aligned, which would temporarily flip 19# BE field in User Mask register... 20 21$code=<<___; 22.ident \"sha1-ia64.s, version 1.0\" 23.ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\" 24.explicit 25 26___ 27 28 29if ($^O eq "hpux") { 30 $ADDP="addp4"; 31 for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } 32} else { $ADDP="add"; } 33for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); 34 $big_endian=0 if (/\-DL_ENDIAN/); } 35if (!defined($big_endian)) 36 { $big_endian=(unpack('L',pack('N',1))==1); } 37 38#$human=1; 39if ($human) { # useful for visual code auditing... 40 ($A,$B,$C,$D,$E,$T) = ("A","B","C","D","E","T"); 41 ($h0,$h1,$h2,$h3,$h4) = ("h0","h1","h2","h3","h4"); 42 ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = 43 ( "K_00_19","K_20_39","K_40_59","K_60_79" ); 44 @X= ( "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", 45 "X8", "X9","X10","X11","X12","X13","X14","X15" ); 46} 47else { 48 ($A,$B,$C,$D,$E,$T) = ("loc0","loc1","loc2","loc3","loc4","loc5"); 49 ($h0,$h1,$h2,$h3,$h4) = ("loc6","loc7","loc8","loc9","loc10"); 50 ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = 51 ( "r14", "r15", "loc11", "loc12" ); 52 @X= ( "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", 53 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" ); 54} 55 56sub BODY_00_15 { 57local *code=shift; 58local ($i,$a,$b,$c,$d,$e,$f,$unaligned)=@_; 59 60if ($unaligned) { 61 $code.=<<___; 62{ .mmi; ld1 tmp0=[inp],2 // MSB 63 ld1 tmp1=[tmp3],2 };; 64{ .mmi; ld1 tmp2=[inp],2 65 ld1 $X[$i&0xf]=[tmp3],2 // LSB 66 dep tmp1=tmp0,tmp1,8,8 };; 67{ .mii; cmp.ne p16,p0=r0,r0 // no misaligned prefetch 68 dep $X[$i&0xf]=tmp2,$X[$i&0xf],8,8;; 69 dep $X[$i&0xf]=tmp1,$X[$i&0xf],16,16 };; 70{ .mmi; nop.m 0 71___ 72 } 73elsif ($i<15) { 74 $code.=<<___; 75{ .mmi; ld4 $X[($i+1)&0xf]=[inp],4 // prefetch 76___ 77 } 78else { 79 $code.=<<___; 80{ .mmi; nop.m 0 81___ 82 } 83if ($i<15) { 84 $code.=<<___; 85 and tmp0=$c,$b 86 dep.z tmp5=$a,5,27 } // a<<5 87{ .mmi; andcm tmp1=$d,$b 88 add tmp4=$e,$K_00_19 };; 89{ .mmi; or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) 90 add $f=tmp4,$X[$i&0xf] // f=xi+e+K_00_19 91 extr.u tmp1=$a,27,5 };; // a>>27 92{ .mib; add $f=$f,tmp0 // f+=F_00_19(b,c,d) 93 shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) 94{ .mib; or tmp1=tmp1,tmp5 // ROTATE(a,5) 95 mux2 tmp6=$a,0x44 };; // see b in next iteration 96{ .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) 97 mux2 $X[$i&0xf]=$X[$i&0xf],0x44 98 nop.i 0 };; 99 100___ 101 } 102else { 103 $code.=<<___; 104 and tmp0=$c,$b 105 dep.z tmp5=$a,5,27 } // a<<5 ;;? 106{ .mmi; andcm tmp1=$d,$b 107 add tmp4=$e,$K_00_19 };; 108{ .mmi; or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) 109 add $f=tmp4,$X[$i&0xf] // f=xi+e+K_00_19 110 extr.u tmp1=$a,27,5 } // a>>27 111{ .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 112 xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 113 nop.i 0 };; 114{ .mmi; add $f=$f,tmp0 // f+=F_00_19(b,c,d) 115 xor tmp2=tmp2,tmp3 // +1 116 shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) 117{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) 118 mux2 tmp6=$a,0x44 };; // see b in next iteration 119{ .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) 120 shrp $e=tmp2,tmp2,31 // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) 121 mux2 $X[$i&0xf]=$X[$i&0xf],0x44 };; 122 123___ 124 } 125} 126 127sub BODY_16_19 { 128local *code=shift; 129local ($i,$a,$b,$c,$d,$e,$f)=@_; 130 131$code.=<<___; 132{ .mmi; mov $X[$i&0xf]=$f // Xupdate 133 and tmp0=$c,$b 134 dep.z tmp5=$a,5,27 } // a<<5 135{ .mmi; andcm tmp1=$d,$b 136 add tmp4=$e,$K_00_19 };; 137{ .mmi; or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) 138 add $f=$f,tmp4 // f+=e+K_00_19 139 extr.u tmp1=$a,27,5 } // a>>27 140{ .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 141 xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 142 nop.i 0 };; 143{ .mmi; add $f=$f,tmp0 // f+=F_00_19(b,c,d) 144 xor tmp2=tmp2,tmp3 // +1 145 shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) 146{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) 147 mux2 tmp6=$a,0x44 };; // see b in next iteration 148{ .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) 149 shrp $e=tmp2,tmp2,31 // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) 150 nop.i 0 };; 151 152___ 153} 154 155sub BODY_20_39 { 156local *code=shift; 157local ($i,$a,$b,$c,$d,$e,$f,$Konst)=@_; 158 $Konst = $K_20_39 if (!defined($Konst)); 159 160if ($i<79) { 161$code.=<<___; 162{ .mib; mov $X[$i&0xf]=$f // Xupdate 163 dep.z tmp5=$a,5,27 } // a<<5 164{ .mib; xor tmp0=$c,$b 165 add tmp4=$e,$Konst };; 166{ .mmi; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d 167 add $f=$f,tmp4 // f+=e+K_20_39 168 extr.u tmp1=$a,27,5 } // a>>27 169{ .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 170 xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 171 nop.i 0 };; 172{ .mmi; add $f=$f,tmp0 // f+=F_20_39(b,c,d) 173 xor tmp2=tmp2,tmp3 // +1 174 shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) 175{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) 176 mux2 tmp6=$a,0x44 };; // see b in next iteration 177{ .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) 178 shrp $e=tmp2,tmp2,31 // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) 179 nop.i 0 };; 180 181___ 182} 183else { 184$code.=<<___; 185{ .mib; mov $X[$i&0xf]=$f // Xupdate 186 dep.z tmp5=$a,5,27 } // a<<5 187{ .mib; xor tmp0=$c,$b 188 add tmp4=$e,$Konst };; 189{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d 190 extr.u tmp1=$a,27,5 } // a>>27 191{ .mib; add $f=$f,tmp4 // f+=e+K_20_39 192 add $h1=$h1,$a };; // wrap up 193{ .mmi; 194(p16) ld4.s $X[0]=[inp],4 // non-faulting prefetch 195 add $f=$f,tmp0 // f+=F_20_39(b,c,d) 196 shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) ;;? 197{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) 198 add $h3=$h3,$c };; // wrap up 199{ .mib; add tmp3=1,inp // used in unaligned codepath 200 add $f=$f,tmp1 } // f+=ROTATE(a,5) 201{ .mib; add $h2=$h2,$b // wrap up 202 add $h4=$h4,$d };; // wrap up 203 204___ 205} 206} 207 208sub BODY_40_59 { 209local *code=shift; 210local ($i,$a,$b,$c,$d,$e,$f)=@_; 211 212$code.=<<___; 213{ .mmi; mov $X[$i&0xf]=$f // Xupdate 214 and tmp0=$c,$b 215 dep.z tmp5=$a,5,27 } // a<<5 216{ .mmi; and tmp1=$d,$b 217 add tmp4=$e,$K_40_59 };; 218{ .mmi; or tmp0=tmp0,tmp1 // (b&c)|(b&d) 219 add $f=$f,tmp4 // f+=e+K_40_59 220 extr.u tmp1=$a,27,5 } // a>>27 221{ .mmi; and tmp4=$c,$d 222 xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 223 xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 224 };; 225{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) 226 xor tmp2=tmp2,tmp3 // +1 227 shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) 228{ .mmi; or tmp0=tmp0,tmp4 // F_40_59(b,c,d)=(b&c)|(b&d)|(c&d) 229 mux2 tmp6=$a,0x44 };; // see b in next iteration 230{ .mii; add $f=$f,tmp0 // f+=F_40_59(b,c,d) 231 shrp $e=tmp2,tmp2,31;; // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) 232 add $f=$f,tmp1 };; // f+=ROTATE(a,5) 233 234___ 235} 236sub BODY_60_79 { &BODY_20_39(@_,$K_60_79); } 237 238$code.=<<___; 239.text 240 241tmp0=r8; 242tmp1=r9; 243tmp2=r10; 244tmp3=r11; 245ctx=r32; // in0 246inp=r33; // in1 247 248// void sha1_block_asm_host_order(SHA_CTX *c,const void *p,size_t num); 249.global sha1_block_asm_host_order# 250.proc sha1_block_asm_host_order# 251.align 32 252sha1_block_asm_host_order: 253 .prologue 254 .fframe 0 255 .save ar.pfs,r0 256 .save ar.lc,r3 257{ .mmi; alloc tmp1=ar.pfs,3,15,0,0 258 $ADDP tmp0=4,ctx 259 mov r3=ar.lc } 260{ .mmi; $ADDP ctx=0,ctx 261 $ADDP inp=0,inp 262 mov r2=pr };; 263tmp4=in2; 264tmp5=loc13; 265tmp6=loc14; 266 .body 267{ .mlx; ld4 $h0=[ctx],8 268 movl $K_00_19=0x5a827999 } 269{ .mlx; ld4 $h1=[tmp0],8 270 movl $K_20_39=0x6ed9eba1 };; 271{ .mlx; ld4 $h2=[ctx],8 272 movl $K_40_59=0x8f1bbcdc } 273{ .mlx; ld4 $h3=[tmp0] 274 movl $K_60_79=0xca62c1d6 };; 275{ .mmi; ld4 $h4=[ctx],-16 276 add in2=-1,in2 // adjust num for ar.lc 277 mov ar.ec=1 };; 278{ .mmi; ld4 $X[0]=[inp],4 // prefetch 279 cmp.ne p16,p0=r0,in2 // prefecth at loop end 280 mov ar.lc=in2 };; // brp.loop.imp: too far 281 282.Lhtop: 283{ .mmi; mov $A=$h0 284 mov $B=$h1 285 mux2 tmp6=$h1,0x44 } 286{ .mmi; mov $C=$h2 287 mov $D=$h3 288 mov $E=$h4 };; 289 290___ 291 292 &BODY_00_15(\$code, 0,$A,$B,$C,$D,$E,$T); 293 &BODY_00_15(\$code, 1,$T,$A,$B,$C,$D,$E); 294 &BODY_00_15(\$code, 2,$E,$T,$A,$B,$C,$D); 295 &BODY_00_15(\$code, 3,$D,$E,$T,$A,$B,$C); 296 &BODY_00_15(\$code, 4,$C,$D,$E,$T,$A,$B); 297 &BODY_00_15(\$code, 5,$B,$C,$D,$E,$T,$A); 298 &BODY_00_15(\$code, 6,$A,$B,$C,$D,$E,$T); 299 &BODY_00_15(\$code, 7,$T,$A,$B,$C,$D,$E); 300 &BODY_00_15(\$code, 8,$E,$T,$A,$B,$C,$D); 301 &BODY_00_15(\$code, 9,$D,$E,$T,$A,$B,$C); 302 &BODY_00_15(\$code,10,$C,$D,$E,$T,$A,$B); 303 &BODY_00_15(\$code,11,$B,$C,$D,$E,$T,$A); 304 &BODY_00_15(\$code,12,$A,$B,$C,$D,$E,$T); 305 &BODY_00_15(\$code,13,$T,$A,$B,$C,$D,$E); 306 &BODY_00_15(\$code,14,$E,$T,$A,$B,$C,$D); 307 &BODY_00_15(\$code,15,$D,$E,$T,$A,$B,$C); 308 309 &BODY_16_19(\$code,16,$C,$D,$E,$T,$A,$B); 310 &BODY_16_19(\$code,17,$B,$C,$D,$E,$T,$A); 311 &BODY_16_19(\$code,18,$A,$B,$C,$D,$E,$T); 312 &BODY_16_19(\$code,19,$T,$A,$B,$C,$D,$E); 313 314 &BODY_20_39(\$code,20,$E,$T,$A,$B,$C,$D); 315 &BODY_20_39(\$code,21,$D,$E,$T,$A,$B,$C); 316 &BODY_20_39(\$code,22,$C,$D,$E,$T,$A,$B); 317 &BODY_20_39(\$code,23,$B,$C,$D,$E,$T,$A); 318 &BODY_20_39(\$code,24,$A,$B,$C,$D,$E,$T); 319 &BODY_20_39(\$code,25,$T,$A,$B,$C,$D,$E); 320 &BODY_20_39(\$code,26,$E,$T,$A,$B,$C,$D); 321 &BODY_20_39(\$code,27,$D,$E,$T,$A,$B,$C); 322 &BODY_20_39(\$code,28,$C,$D,$E,$T,$A,$B); 323 &BODY_20_39(\$code,29,$B,$C,$D,$E,$T,$A); 324 &BODY_20_39(\$code,30,$A,$B,$C,$D,$E,$T); 325 &BODY_20_39(\$code,31,$T,$A,$B,$C,$D,$E); 326 &BODY_20_39(\$code,32,$E,$T,$A,$B,$C,$D); 327 &BODY_20_39(\$code,33,$D,$E,$T,$A,$B,$C); 328 &BODY_20_39(\$code,34,$C,$D,$E,$T,$A,$B); 329 &BODY_20_39(\$code,35,$B,$C,$D,$E,$T,$A); 330 &BODY_20_39(\$code,36,$A,$B,$C,$D,$E,$T); 331 &BODY_20_39(\$code,37,$T,$A,$B,$C,$D,$E); 332 &BODY_20_39(\$code,38,$E,$T,$A,$B,$C,$D); 333 &BODY_20_39(\$code,39,$D,$E,$T,$A,$B,$C); 334 335 &BODY_40_59(\$code,40,$C,$D,$E,$T,$A,$B); 336 &BODY_40_59(\$code,41,$B,$C,$D,$E,$T,$A); 337 &BODY_40_59(\$code,42,$A,$B,$C,$D,$E,$T); 338 &BODY_40_59(\$code,43,$T,$A,$B,$C,$D,$E); 339 &BODY_40_59(\$code,44,$E,$T,$A,$B,$C,$D); 340 &BODY_40_59(\$code,45,$D,$E,$T,$A,$B,$C); 341 &BODY_40_59(\$code,46,$C,$D,$E,$T,$A,$B); 342 &BODY_40_59(\$code,47,$B,$C,$D,$E,$T,$A); 343 &BODY_40_59(\$code,48,$A,$B,$C,$D,$E,$T); 344 &BODY_40_59(\$code,49,$T,$A,$B,$C,$D,$E); 345 &BODY_40_59(\$code,50,$E,$T,$A,$B,$C,$D); 346 &BODY_40_59(\$code,51,$D,$E,$T,$A,$B,$C); 347 &BODY_40_59(\$code,52,$C,$D,$E,$T,$A,$B); 348 &BODY_40_59(\$code,53,$B,$C,$D,$E,$T,$A); 349 &BODY_40_59(\$code,54,$A,$B,$C,$D,$E,$T); 350 &BODY_40_59(\$code,55,$T,$A,$B,$C,$D,$E); 351 &BODY_40_59(\$code,56,$E,$T,$A,$B,$C,$D); 352 &BODY_40_59(\$code,57,$D,$E,$T,$A,$B,$C); 353 &BODY_40_59(\$code,58,$C,$D,$E,$T,$A,$B); 354 &BODY_40_59(\$code,59,$B,$C,$D,$E,$T,$A); 355 356 &BODY_60_79(\$code,60,$A,$B,$C,$D,$E,$T); 357 &BODY_60_79(\$code,61,$T,$A,$B,$C,$D,$E); 358 &BODY_60_79(\$code,62,$E,$T,$A,$B,$C,$D); 359 &BODY_60_79(\$code,63,$D,$E,$T,$A,$B,$C); 360 &BODY_60_79(\$code,64,$C,$D,$E,$T,$A,$B); 361 &BODY_60_79(\$code,65,$B,$C,$D,$E,$T,$A); 362 &BODY_60_79(\$code,66,$A,$B,$C,$D,$E,$T); 363 &BODY_60_79(\$code,67,$T,$A,$B,$C,$D,$E); 364 &BODY_60_79(\$code,68,$E,$T,$A,$B,$C,$D); 365 &BODY_60_79(\$code,69,$D,$E,$T,$A,$B,$C); 366 &BODY_60_79(\$code,70,$C,$D,$E,$T,$A,$B); 367 &BODY_60_79(\$code,71,$B,$C,$D,$E,$T,$A); 368 &BODY_60_79(\$code,72,$A,$B,$C,$D,$E,$T); 369 &BODY_60_79(\$code,73,$T,$A,$B,$C,$D,$E); 370 &BODY_60_79(\$code,74,$E,$T,$A,$B,$C,$D); 371 &BODY_60_79(\$code,75,$D,$E,$T,$A,$B,$C); 372 &BODY_60_79(\$code,76,$C,$D,$E,$T,$A,$B); 373 &BODY_60_79(\$code,77,$B,$C,$D,$E,$T,$A); 374 &BODY_60_79(\$code,78,$A,$B,$C,$D,$E,$T); 375 &BODY_60_79(\$code,79,$T,$A,$B,$C,$D,$E); 376 377$code.=<<___; 378{ .mmb; add $h0=$h0,$E 379 nop.m 0 380 br.ctop.dptk.many .Lhtop };; 381.Lhend: 382{ .mmi; add tmp0=4,ctx 383 mov ar.lc=r3 };; 384{ .mmi; st4 [ctx]=$h0,8 385 st4 [tmp0]=$h1,8 };; 386{ .mmi; st4 [ctx]=$h2,8 387 st4 [tmp0]=$h3 };; 388{ .mib; st4 [ctx]=$h4,-16 389 mov pr=r2,0x1ffff 390 br.ret.sptk.many b0 };; 391.endp sha1_block_asm_host_order# 392___ 393 394 395$code.=<<___; 396// void sha1_block_asm_data_order(SHA_CTX *c,const void *p,size_t num); 397.global sha1_block_asm_data_order# 398.proc sha1_block_asm_data_order# 399.align 32 400sha1_block_asm_data_order: 401___ 402$code.=<<___ if ($big_endian); 403{ .mmi; and r2=3,inp };; 404{ .mib; cmp.eq p6,p0=r0,r2 405(p6) br.dptk.many sha1_block_asm_host_order };; 406___ 407$code.=<<___; 408 .prologue 409 .fframe 0 410 .save ar.pfs,r0 411 .save ar.lc,r3 412{ .mmi; alloc tmp1=ar.pfs,3,15,0,0 413 $ADDP tmp0=4,ctx 414 mov r3=ar.lc } 415{ .mmi; $ADDP ctx=0,ctx 416 $ADDP inp=0,inp 417 mov r2=pr };; 418tmp4=in2; 419tmp5=loc13; 420tmp6=loc14; 421 .body 422{ .mlx; ld4 $h0=[ctx],8 423 movl $K_00_19=0x5a827999 } 424{ .mlx; ld4 $h1=[tmp0],8 425 movl $K_20_39=0x6ed9eba1 };; 426{ .mlx; ld4 $h2=[ctx],8 427 movl $K_40_59=0x8f1bbcdc } 428{ .mlx; ld4 $h3=[tmp0] 429 movl $K_60_79=0xca62c1d6 };; 430{ .mmi; ld4 $h4=[ctx],-16 431 add in2=-1,in2 // adjust num for ar.lc 432 mov ar.ec=1 };; 433{ .mmi; nop.m 0 434 add tmp3=1,inp 435 mov ar.lc=in2 };; // brp.loop.imp: too far 436 437.Ldtop: 438{ .mmi; mov $A=$h0 439 mov $B=$h1 440 mux2 tmp6=$h1,0x44 } 441{ .mmi; mov $C=$h2 442 mov $D=$h3 443 mov $E=$h4 };; 444 445___ 446 447 &BODY_00_15(\$code, 0,$A,$B,$C,$D,$E,$T,1); 448 &BODY_00_15(\$code, 1,$T,$A,$B,$C,$D,$E,1); 449 &BODY_00_15(\$code, 2,$E,$T,$A,$B,$C,$D,1); 450 &BODY_00_15(\$code, 3,$D,$E,$T,$A,$B,$C,1); 451 &BODY_00_15(\$code, 4,$C,$D,$E,$T,$A,$B,1); 452 &BODY_00_15(\$code, 5,$B,$C,$D,$E,$T,$A,1); 453 &BODY_00_15(\$code, 6,$A,$B,$C,$D,$E,$T,1); 454 &BODY_00_15(\$code, 7,$T,$A,$B,$C,$D,$E,1); 455 &BODY_00_15(\$code, 8,$E,$T,$A,$B,$C,$D,1); 456 &BODY_00_15(\$code, 9,$D,$E,$T,$A,$B,$C,1); 457 &BODY_00_15(\$code,10,$C,$D,$E,$T,$A,$B,1); 458 &BODY_00_15(\$code,11,$B,$C,$D,$E,$T,$A,1); 459 &BODY_00_15(\$code,12,$A,$B,$C,$D,$E,$T,1); 460 &BODY_00_15(\$code,13,$T,$A,$B,$C,$D,$E,1); 461 &BODY_00_15(\$code,14,$E,$T,$A,$B,$C,$D,1); 462 &BODY_00_15(\$code,15,$D,$E,$T,$A,$B,$C,1); 463 464 &BODY_16_19(\$code,16,$C,$D,$E,$T,$A,$B); 465 &BODY_16_19(\$code,17,$B,$C,$D,$E,$T,$A); 466 &BODY_16_19(\$code,18,$A,$B,$C,$D,$E,$T); 467 &BODY_16_19(\$code,19,$T,$A,$B,$C,$D,$E); 468 469 &BODY_20_39(\$code,20,$E,$T,$A,$B,$C,$D); 470 &BODY_20_39(\$code,21,$D,$E,$T,$A,$B,$C); 471 &BODY_20_39(\$code,22,$C,$D,$E,$T,$A,$B); 472 &BODY_20_39(\$code,23,$B,$C,$D,$E,$T,$A); 473 &BODY_20_39(\$code,24,$A,$B,$C,$D,$E,$T); 474 &BODY_20_39(\$code,25,$T,$A,$B,$C,$D,$E); 475 &BODY_20_39(\$code,26,$E,$T,$A,$B,$C,$D); 476 &BODY_20_39(\$code,27,$D,$E,$T,$A,$B,$C); 477 &BODY_20_39(\$code,28,$C,$D,$E,$T,$A,$B); 478 &BODY_20_39(\$code,29,$B,$C,$D,$E,$T,$A); 479 &BODY_20_39(\$code,30,$A,$B,$C,$D,$E,$T); 480 &BODY_20_39(\$code,31,$T,$A,$B,$C,$D,$E); 481 &BODY_20_39(\$code,32,$E,$T,$A,$B,$C,$D); 482 &BODY_20_39(\$code,33,$D,$E,$T,$A,$B,$C); 483 &BODY_20_39(\$code,34,$C,$D,$E,$T,$A,$B); 484 &BODY_20_39(\$code,35,$B,$C,$D,$E,$T,$A); 485 &BODY_20_39(\$code,36,$A,$B,$C,$D,$E,$T); 486 &BODY_20_39(\$code,37,$T,$A,$B,$C,$D,$E); 487 &BODY_20_39(\$code,38,$E,$T,$A,$B,$C,$D); 488 &BODY_20_39(\$code,39,$D,$E,$T,$A,$B,$C); 489 490 &BODY_40_59(\$code,40,$C,$D,$E,$T,$A,$B); 491 &BODY_40_59(\$code,41,$B,$C,$D,$E,$T,$A); 492 &BODY_40_59(\$code,42,$A,$B,$C,$D,$E,$T); 493 &BODY_40_59(\$code,43,$T,$A,$B,$C,$D,$E); 494 &BODY_40_59(\$code,44,$E,$T,$A,$B,$C,$D); 495 &BODY_40_59(\$code,45,$D,$E,$T,$A,$B,$C); 496 &BODY_40_59(\$code,46,$C,$D,$E,$T,$A,$B); 497 &BODY_40_59(\$code,47,$B,$C,$D,$E,$T,$A); 498 &BODY_40_59(\$code,48,$A,$B,$C,$D,$E,$T); 499 &BODY_40_59(\$code,49,$T,$A,$B,$C,$D,$E); 500 &BODY_40_59(\$code,50,$E,$T,$A,$B,$C,$D); 501 &BODY_40_59(\$code,51,$D,$E,$T,$A,$B,$C); 502 &BODY_40_59(\$code,52,$C,$D,$E,$T,$A,$B); 503 &BODY_40_59(\$code,53,$B,$C,$D,$E,$T,$A); 504 &BODY_40_59(\$code,54,$A,$B,$C,$D,$E,$T); 505 &BODY_40_59(\$code,55,$T,$A,$B,$C,$D,$E); 506 &BODY_40_59(\$code,56,$E,$T,$A,$B,$C,$D); 507 &BODY_40_59(\$code,57,$D,$E,$T,$A,$B,$C); 508 &BODY_40_59(\$code,58,$C,$D,$E,$T,$A,$B); 509 &BODY_40_59(\$code,59,$B,$C,$D,$E,$T,$A); 510 511 &BODY_60_79(\$code,60,$A,$B,$C,$D,$E,$T); 512 &BODY_60_79(\$code,61,$T,$A,$B,$C,$D,$E); 513 &BODY_60_79(\$code,62,$E,$T,$A,$B,$C,$D); 514 &BODY_60_79(\$code,63,$D,$E,$T,$A,$B,$C); 515 &BODY_60_79(\$code,64,$C,$D,$E,$T,$A,$B); 516 &BODY_60_79(\$code,65,$B,$C,$D,$E,$T,$A); 517 &BODY_60_79(\$code,66,$A,$B,$C,$D,$E,$T); 518 &BODY_60_79(\$code,67,$T,$A,$B,$C,$D,$E); 519 &BODY_60_79(\$code,68,$E,$T,$A,$B,$C,$D); 520 &BODY_60_79(\$code,69,$D,$E,$T,$A,$B,$C); 521 &BODY_60_79(\$code,70,$C,$D,$E,$T,$A,$B); 522 &BODY_60_79(\$code,71,$B,$C,$D,$E,$T,$A); 523 &BODY_60_79(\$code,72,$A,$B,$C,$D,$E,$T); 524 &BODY_60_79(\$code,73,$T,$A,$B,$C,$D,$E); 525 &BODY_60_79(\$code,74,$E,$T,$A,$B,$C,$D); 526 &BODY_60_79(\$code,75,$D,$E,$T,$A,$B,$C); 527 &BODY_60_79(\$code,76,$C,$D,$E,$T,$A,$B); 528 &BODY_60_79(\$code,77,$B,$C,$D,$E,$T,$A); 529 &BODY_60_79(\$code,78,$A,$B,$C,$D,$E,$T); 530 &BODY_60_79(\$code,79,$T,$A,$B,$C,$D,$E); 531 532$code.=<<___; 533{ .mmb; add $h0=$h0,$E 534 nop.m 0 535 br.ctop.dptk.many .Ldtop };; 536.Ldend: 537{ .mmi; add tmp0=4,ctx 538 mov ar.lc=r3 };; 539{ .mmi; st4 [ctx]=$h0,8 540 st4 [tmp0]=$h1,8 };; 541{ .mmi; st4 [ctx]=$h2,8 542 st4 [tmp0]=$h3 };; 543{ .mib; st4 [ctx]=$h4,-16 544 mov pr=r2,0x1ffff 545 br.ret.sptk.many b0 };; 546.endp sha1_block_asm_data_order# 547___ 548 549print $code; 550