15b37fcf3Sryker#!/usr/local/bin/perl 25b37fcf3Sryker 35b37fcf3Srykerpush(@INC,"perlasm","../../perlasm"); 45b37fcf3Srykerrequire "x86asm.pl"; 55b37fcf3Sryker 6913ec974Sbeck&asm_init($ARGV[0],$0); 75b37fcf3Sryker 85b37fcf3Sryker&bn_mul_add_words("bn_mul_add_words"); 95b37fcf3Sryker&bn_mul_words("bn_mul_words"); 105b37fcf3Sryker&bn_sqr_words("bn_sqr_words"); 11913ec974Sbeck&bn_div_words("bn_div_words"); 125b37fcf3Sryker&bn_add_words("bn_add_words"); 13913ec974Sbeck&bn_sub_words("bn_sub_words"); 14*da347917Sbeck&bn_sub_part_words("bn_sub_part_words"); 155b37fcf3Sryker 165b37fcf3Sryker&asm_finish(); 175b37fcf3Sryker 185b37fcf3Srykersub bn_mul_add_words 195b37fcf3Sryker { 205b37fcf3Sryker local($name)=@_; 215b37fcf3Sryker 225b37fcf3Sryker &function_begin($name,""); 235b37fcf3Sryker 245b37fcf3Sryker &comment(""); 255b37fcf3Sryker $Low="eax"; 265b37fcf3Sryker $High="edx"; 275b37fcf3Sryker $a="ebx"; 285b37fcf3Sryker $w="ebp"; 295b37fcf3Sryker $r="edi"; 305b37fcf3Sryker $c="esi"; 315b37fcf3Sryker 325b37fcf3Sryker &xor($c,$c); # clear carry 335b37fcf3Sryker &mov($r,&wparam(0)); # 345b37fcf3Sryker 355b37fcf3Sryker &mov("ecx",&wparam(2)); # 365b37fcf3Sryker &mov($a,&wparam(1)); # 375b37fcf3Sryker 385b37fcf3Sryker &and("ecx",0xfffffff8); # num / 8 395b37fcf3Sryker &mov($w,&wparam(3)); # 405b37fcf3Sryker 415b37fcf3Sryker &push("ecx"); # Up the stack for a tmp variable 425b37fcf3Sryker 435b37fcf3Sryker &jz(&label("maw_finish")); 445b37fcf3Sryker 455b37fcf3Sryker &set_label("maw_loop",0); 465b37fcf3Sryker 475b37fcf3Sryker &mov(&swtmp(0),"ecx"); # 485b37fcf3Sryker 495b37fcf3Sryker for ($i=0; $i<32; $i+=4) 505b37fcf3Sryker { 515b37fcf3Sryker &comment("Round $i"); 525b37fcf3Sryker 535b37fcf3Sryker &mov("eax",&DWP($i,$a,"",0)); # *a 545b37fcf3Sryker &mul($w); # *a * w 555b37fcf3Sryker &add("eax",$c); # L(t)+= *r 565b37fcf3Sryker &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r 575b37fcf3Sryker &adc("edx",0); # H(t)+=carry 585b37fcf3Sryker &add("eax",$c); # L(t)+=c 595b37fcf3Sryker &adc("edx",0); # H(t)+=carry 605b37fcf3Sryker &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 615b37fcf3Sryker &mov($c,"edx"); # c= H(t); 625b37fcf3Sryker } 635b37fcf3Sryker 645b37fcf3Sryker &comment(""); 655b37fcf3Sryker &mov("ecx",&swtmp(0)); # 665b37fcf3Sryker &add($a,32); 675b37fcf3Sryker &add($r,32); 685b37fcf3Sryker &sub("ecx",8); 695b37fcf3Sryker &jnz(&label("maw_loop")); 705b37fcf3Sryker 715b37fcf3Sryker &set_label("maw_finish",0); 725b37fcf3Sryker &mov("ecx",&wparam(2)); # get num 735b37fcf3Sryker &and("ecx",7); 745b37fcf3Sryker &jnz(&label("maw_finish2")); # helps branch prediction 755b37fcf3Sryker &jmp(&label("maw_end")); 765b37fcf3Sryker 775b37fcf3Sryker &set_label("maw_finish2",1); 785b37fcf3Sryker for ($i=0; $i<7; $i++) 795b37fcf3Sryker { 805b37fcf3Sryker &comment("Tail Round $i"); 815b37fcf3Sryker &mov("eax",&DWP($i*4,$a,"",0));# *a 825b37fcf3Sryker &mul($w); # *a * w 835b37fcf3Sryker &add("eax",$c); # L(t)+=c 845b37fcf3Sryker &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r 855b37fcf3Sryker &adc("edx",0); # H(t)+=carry 865b37fcf3Sryker &add("eax",$c); 875b37fcf3Sryker &adc("edx",0); # H(t)+=carry 885b37fcf3Sryker &dec("ecx") if ($i != 7-1); 895b37fcf3Sryker &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); 905b37fcf3Sryker &mov($c,"edx"); # c= H(t); 915b37fcf3Sryker &jz(&label("maw_end")) if ($i != 7-1); 925b37fcf3Sryker } 935b37fcf3Sryker &set_label("maw_end",0); 945b37fcf3Sryker &mov("eax",$c); 955b37fcf3Sryker 965b37fcf3Sryker &pop("ecx"); # clear variable from 975b37fcf3Sryker 985b37fcf3Sryker &function_end($name); 995b37fcf3Sryker } 1005b37fcf3Sryker 1015b37fcf3Srykersub bn_mul_words 1025b37fcf3Sryker { 1035b37fcf3Sryker local($name)=@_; 1045b37fcf3Sryker 1055b37fcf3Sryker &function_begin($name,""); 1065b37fcf3Sryker 1075b37fcf3Sryker &comment(""); 1085b37fcf3Sryker $Low="eax"; 1095b37fcf3Sryker $High="edx"; 1105b37fcf3Sryker $a="ebx"; 1115b37fcf3Sryker $w="ecx"; 1125b37fcf3Sryker $r="edi"; 1135b37fcf3Sryker $c="esi"; 1145b37fcf3Sryker $num="ebp"; 1155b37fcf3Sryker 1165b37fcf3Sryker &xor($c,$c); # clear carry 1175b37fcf3Sryker &mov($r,&wparam(0)); # 1185b37fcf3Sryker &mov($a,&wparam(1)); # 1195b37fcf3Sryker &mov($num,&wparam(2)); # 1205b37fcf3Sryker &mov($w,&wparam(3)); # 1215b37fcf3Sryker 1225b37fcf3Sryker &and($num,0xfffffff8); # num / 8 1235b37fcf3Sryker &jz(&label("mw_finish")); 1245b37fcf3Sryker 1255b37fcf3Sryker &set_label("mw_loop",0); 1265b37fcf3Sryker for ($i=0; $i<32; $i+=4) 1275b37fcf3Sryker { 1285b37fcf3Sryker &comment("Round $i"); 1295b37fcf3Sryker 1305b37fcf3Sryker &mov("eax",&DWP($i,$a,"",0)); # *a 1315b37fcf3Sryker &mul($w); # *a * w 1325b37fcf3Sryker &add("eax",$c); # L(t)+=c 1335b37fcf3Sryker # XXX 1345b37fcf3Sryker 1355b37fcf3Sryker &adc("edx",0); # H(t)+=carry 1365b37fcf3Sryker &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 1375b37fcf3Sryker 1385b37fcf3Sryker &mov($c,"edx"); # c= H(t); 1395b37fcf3Sryker } 1405b37fcf3Sryker 1415b37fcf3Sryker &comment(""); 1425b37fcf3Sryker &add($a,32); 1435b37fcf3Sryker &add($r,32); 1445b37fcf3Sryker &sub($num,8); 1455b37fcf3Sryker &jz(&label("mw_finish")); 1465b37fcf3Sryker &jmp(&label("mw_loop")); 1475b37fcf3Sryker 1485b37fcf3Sryker &set_label("mw_finish",0); 1495b37fcf3Sryker &mov($num,&wparam(2)); # get num 1505b37fcf3Sryker &and($num,7); 1515b37fcf3Sryker &jnz(&label("mw_finish2")); 1525b37fcf3Sryker &jmp(&label("mw_end")); 1535b37fcf3Sryker 1545b37fcf3Sryker &set_label("mw_finish2",1); 1555b37fcf3Sryker for ($i=0; $i<7; $i++) 1565b37fcf3Sryker { 1575b37fcf3Sryker &comment("Tail Round $i"); 1585b37fcf3Sryker &mov("eax",&DWP($i*4,$a,"",0));# *a 1595b37fcf3Sryker &mul($w); # *a * w 1605b37fcf3Sryker &add("eax",$c); # L(t)+=c 1615b37fcf3Sryker # XXX 1625b37fcf3Sryker &adc("edx",0); # H(t)+=carry 1635b37fcf3Sryker &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); 1645b37fcf3Sryker &mov($c,"edx"); # c= H(t); 1655b37fcf3Sryker &dec($num) if ($i != 7-1); 1665b37fcf3Sryker &jz(&label("mw_end")) if ($i != 7-1); 1675b37fcf3Sryker } 1685b37fcf3Sryker &set_label("mw_end",0); 1695b37fcf3Sryker &mov("eax",$c); 1705b37fcf3Sryker 1715b37fcf3Sryker &function_end($name); 1725b37fcf3Sryker } 1735b37fcf3Sryker 1745b37fcf3Srykersub bn_sqr_words 1755b37fcf3Sryker { 1765b37fcf3Sryker local($name)=@_; 1775b37fcf3Sryker 1785b37fcf3Sryker &function_begin($name,""); 1795b37fcf3Sryker 1805b37fcf3Sryker &comment(""); 1815b37fcf3Sryker $r="esi"; 1825b37fcf3Sryker $a="edi"; 1835b37fcf3Sryker $num="ebx"; 1845b37fcf3Sryker 1855b37fcf3Sryker &mov($r,&wparam(0)); # 1865b37fcf3Sryker &mov($a,&wparam(1)); # 1875b37fcf3Sryker &mov($num,&wparam(2)); # 1885b37fcf3Sryker 1895b37fcf3Sryker &and($num,0xfffffff8); # num / 8 1905b37fcf3Sryker &jz(&label("sw_finish")); 1915b37fcf3Sryker 1925b37fcf3Sryker &set_label("sw_loop",0); 1935b37fcf3Sryker for ($i=0; $i<32; $i+=4) 1945b37fcf3Sryker { 1955b37fcf3Sryker &comment("Round $i"); 1965b37fcf3Sryker &mov("eax",&DWP($i,$a,"",0)); # *a 1975b37fcf3Sryker # XXX 1985b37fcf3Sryker &mul("eax"); # *a * *a 1995b37fcf3Sryker &mov(&DWP($i*2,$r,"",0),"eax"); # 2005b37fcf3Sryker &mov(&DWP($i*2+4,$r,"",0),"edx");# 2015b37fcf3Sryker } 2025b37fcf3Sryker 2035b37fcf3Sryker &comment(""); 2045b37fcf3Sryker &add($a,32); 2055b37fcf3Sryker &add($r,64); 2065b37fcf3Sryker &sub($num,8); 2075b37fcf3Sryker &jnz(&label("sw_loop")); 2085b37fcf3Sryker 2095b37fcf3Sryker &set_label("sw_finish",0); 2105b37fcf3Sryker &mov($num,&wparam(2)); # get num 2115b37fcf3Sryker &and($num,7); 2125b37fcf3Sryker &jz(&label("sw_end")); 2135b37fcf3Sryker 2145b37fcf3Sryker for ($i=0; $i<7; $i++) 2155b37fcf3Sryker { 2165b37fcf3Sryker &comment("Tail Round $i"); 2175b37fcf3Sryker &mov("eax",&DWP($i*4,$a,"",0)); # *a 2185b37fcf3Sryker # XXX 2195b37fcf3Sryker &mul("eax"); # *a * *a 2205b37fcf3Sryker &mov(&DWP($i*8,$r,"",0),"eax"); # 2215b37fcf3Sryker &dec($num) if ($i != 7-1); 2225b37fcf3Sryker &mov(&DWP($i*8+4,$r,"",0),"edx"); 2235b37fcf3Sryker &jz(&label("sw_end")) if ($i != 7-1); 2245b37fcf3Sryker } 2255b37fcf3Sryker &set_label("sw_end",0); 2265b37fcf3Sryker 2275b37fcf3Sryker &function_end($name); 2285b37fcf3Sryker } 2295b37fcf3Sryker 230913ec974Sbecksub bn_div_words 2315b37fcf3Sryker { 2325b37fcf3Sryker local($name)=@_; 2335b37fcf3Sryker 2345b37fcf3Sryker &function_begin($name,""); 2355b37fcf3Sryker &mov("edx",&wparam(0)); # 2365b37fcf3Sryker &mov("eax",&wparam(1)); # 2375b37fcf3Sryker &mov("ebx",&wparam(2)); # 2385b37fcf3Sryker &div("ebx"); 2395b37fcf3Sryker &function_end($name); 2405b37fcf3Sryker } 2415b37fcf3Sryker 2425b37fcf3Srykersub bn_add_words 2435b37fcf3Sryker { 2445b37fcf3Sryker local($name)=@_; 2455b37fcf3Sryker 2465b37fcf3Sryker &function_begin($name,""); 2475b37fcf3Sryker 2485b37fcf3Sryker &comment(""); 2495b37fcf3Sryker $a="esi"; 2505b37fcf3Sryker $b="edi"; 2515b37fcf3Sryker $c="eax"; 2525b37fcf3Sryker $r="ebx"; 2535b37fcf3Sryker $tmp1="ecx"; 2545b37fcf3Sryker $tmp2="edx"; 2555b37fcf3Sryker $num="ebp"; 2565b37fcf3Sryker 2575b37fcf3Sryker &mov($r,&wparam(0)); # get r 2585b37fcf3Sryker &mov($a,&wparam(1)); # get a 2595b37fcf3Sryker &mov($b,&wparam(2)); # get b 2605b37fcf3Sryker &mov($num,&wparam(3)); # get num 2615b37fcf3Sryker &xor($c,$c); # clear carry 2625b37fcf3Sryker &and($num,0xfffffff8); # num / 8 2635b37fcf3Sryker 2645b37fcf3Sryker &jz(&label("aw_finish")); 2655b37fcf3Sryker 2665b37fcf3Sryker &set_label("aw_loop",0); 2675b37fcf3Sryker for ($i=0; $i<8; $i++) 2685b37fcf3Sryker { 2695b37fcf3Sryker &comment("Round $i"); 2705b37fcf3Sryker 2715b37fcf3Sryker &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 2725b37fcf3Sryker &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 2735b37fcf3Sryker &add($tmp1,$c); 2745b37fcf3Sryker &mov($c,0); 2755b37fcf3Sryker &adc($c,$c); 2765b37fcf3Sryker &add($tmp1,$tmp2); 2775b37fcf3Sryker &adc($c,0); 2785b37fcf3Sryker &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 2795b37fcf3Sryker } 2805b37fcf3Sryker 2815b37fcf3Sryker &comment(""); 2825b37fcf3Sryker &add($a,32); 2835b37fcf3Sryker &add($b,32); 2845b37fcf3Sryker &add($r,32); 2855b37fcf3Sryker &sub($num,8); 2865b37fcf3Sryker &jnz(&label("aw_loop")); 2875b37fcf3Sryker 2885b37fcf3Sryker &set_label("aw_finish",0); 2895b37fcf3Sryker &mov($num,&wparam(3)); # get num 2905b37fcf3Sryker &and($num,7); 2915b37fcf3Sryker &jz(&label("aw_end")); 2925b37fcf3Sryker 2935b37fcf3Sryker for ($i=0; $i<7; $i++) 2945b37fcf3Sryker { 2955b37fcf3Sryker &comment("Tail Round $i"); 2965b37fcf3Sryker &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 2975b37fcf3Sryker &mov($tmp2,&DWP($i*4,$b,"",0));# *b 2985b37fcf3Sryker &add($tmp1,$c); 2995b37fcf3Sryker &mov($c,0); 3005b37fcf3Sryker &adc($c,$c); 3015b37fcf3Sryker &add($tmp1,$tmp2); 3025b37fcf3Sryker &adc($c,0); 3035b37fcf3Sryker &dec($num) if ($i != 6); 304*da347917Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 3055b37fcf3Sryker &jz(&label("aw_end")) if ($i != 6); 3065b37fcf3Sryker } 3075b37fcf3Sryker &set_label("aw_end",0); 3085b37fcf3Sryker 309913ec974Sbeck# &mov("eax",$c); # $c is "eax" 310913ec974Sbeck 311913ec974Sbeck &function_end($name); 312913ec974Sbeck } 313913ec974Sbeck 314913ec974Sbecksub bn_sub_words 315913ec974Sbeck { 316913ec974Sbeck local($name)=@_; 317913ec974Sbeck 318913ec974Sbeck &function_begin($name,""); 319913ec974Sbeck 320913ec974Sbeck &comment(""); 321913ec974Sbeck $a="esi"; 322913ec974Sbeck $b="edi"; 323913ec974Sbeck $c="eax"; 324913ec974Sbeck $r="ebx"; 325913ec974Sbeck $tmp1="ecx"; 326913ec974Sbeck $tmp2="edx"; 327913ec974Sbeck $num="ebp"; 328913ec974Sbeck 329913ec974Sbeck &mov($r,&wparam(0)); # get r 330913ec974Sbeck &mov($a,&wparam(1)); # get a 331913ec974Sbeck &mov($b,&wparam(2)); # get b 332913ec974Sbeck &mov($num,&wparam(3)); # get num 333913ec974Sbeck &xor($c,$c); # clear carry 334913ec974Sbeck &and($num,0xfffffff8); # num / 8 335913ec974Sbeck 336913ec974Sbeck &jz(&label("aw_finish")); 337913ec974Sbeck 338913ec974Sbeck &set_label("aw_loop",0); 339913ec974Sbeck for ($i=0; $i<8; $i++) 340913ec974Sbeck { 341913ec974Sbeck &comment("Round $i"); 342913ec974Sbeck 343913ec974Sbeck &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 344913ec974Sbeck &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 345913ec974Sbeck &sub($tmp1,$c); 346913ec974Sbeck &mov($c,0); 347913ec974Sbeck &adc($c,$c); 348913ec974Sbeck &sub($tmp1,$tmp2); 349913ec974Sbeck &adc($c,0); 350913ec974Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 351913ec974Sbeck } 352913ec974Sbeck 353913ec974Sbeck &comment(""); 354913ec974Sbeck &add($a,32); 355913ec974Sbeck &add($b,32); 356913ec974Sbeck &add($r,32); 357913ec974Sbeck &sub($num,8); 358913ec974Sbeck &jnz(&label("aw_loop")); 359913ec974Sbeck 360913ec974Sbeck &set_label("aw_finish",0); 361913ec974Sbeck &mov($num,&wparam(3)); # get num 362913ec974Sbeck &and($num,7); 363913ec974Sbeck &jz(&label("aw_end")); 364913ec974Sbeck 365913ec974Sbeck for ($i=0; $i<7; $i++) 366913ec974Sbeck { 367913ec974Sbeck &comment("Tail Round $i"); 368913ec974Sbeck &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 369913ec974Sbeck &mov($tmp2,&DWP($i*4,$b,"",0));# *b 370913ec974Sbeck &sub($tmp1,$c); 371913ec974Sbeck &mov($c,0); 372913ec974Sbeck &adc($c,$c); 373913ec974Sbeck &sub($tmp1,$tmp2); 374913ec974Sbeck &adc($c,0); 375913ec974Sbeck &dec($num) if ($i != 6); 376*da347917Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 377913ec974Sbeck &jz(&label("aw_end")) if ($i != 6); 378913ec974Sbeck } 379913ec974Sbeck &set_label("aw_end",0); 380913ec974Sbeck 381913ec974Sbeck# &mov("eax",$c); # $c is "eax" 3825b37fcf3Sryker 3835b37fcf3Sryker &function_end($name); 3845b37fcf3Sryker } 3855b37fcf3Sryker 386*da347917Sbecksub bn_sub_part_words 387*da347917Sbeck { 388*da347917Sbeck local($name)=@_; 389*da347917Sbeck 390*da347917Sbeck &function_begin($name,""); 391*da347917Sbeck 392*da347917Sbeck &comment(""); 393*da347917Sbeck $a="esi"; 394*da347917Sbeck $b="edi"; 395*da347917Sbeck $c="eax"; 396*da347917Sbeck $r="ebx"; 397*da347917Sbeck $tmp1="ecx"; 398*da347917Sbeck $tmp2="edx"; 399*da347917Sbeck $num="ebp"; 400*da347917Sbeck 401*da347917Sbeck &mov($r,&wparam(0)); # get r 402*da347917Sbeck &mov($a,&wparam(1)); # get a 403*da347917Sbeck &mov($b,&wparam(2)); # get b 404*da347917Sbeck &mov($num,&wparam(3)); # get num 405*da347917Sbeck &xor($c,$c); # clear carry 406*da347917Sbeck &and($num,0xfffffff8); # num / 8 407*da347917Sbeck 408*da347917Sbeck &jz(&label("aw_finish")); 409*da347917Sbeck 410*da347917Sbeck &set_label("aw_loop",0); 411*da347917Sbeck for ($i=0; $i<8; $i++) 412*da347917Sbeck { 413*da347917Sbeck &comment("Round $i"); 414*da347917Sbeck 415*da347917Sbeck &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 416*da347917Sbeck &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 417*da347917Sbeck &sub($tmp1,$c); 418*da347917Sbeck &mov($c,0); 419*da347917Sbeck &adc($c,$c); 420*da347917Sbeck &sub($tmp1,$tmp2); 421*da347917Sbeck &adc($c,0); 422*da347917Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 423*da347917Sbeck } 424*da347917Sbeck 425*da347917Sbeck &comment(""); 426*da347917Sbeck &add($a,32); 427*da347917Sbeck &add($b,32); 428*da347917Sbeck &add($r,32); 429*da347917Sbeck &sub($num,8); 430*da347917Sbeck &jnz(&label("aw_loop")); 431*da347917Sbeck 432*da347917Sbeck &set_label("aw_finish",0); 433*da347917Sbeck &mov($num,&wparam(3)); # get num 434*da347917Sbeck &and($num,7); 435*da347917Sbeck &jz(&label("aw_end")); 436*da347917Sbeck 437*da347917Sbeck for ($i=0; $i<7; $i++) 438*da347917Sbeck { 439*da347917Sbeck &comment("Tail Round $i"); 440*da347917Sbeck &mov($tmp1,&DWP(0,$a,"",0)); # *a 441*da347917Sbeck &mov($tmp2,&DWP(0,$b,"",0));# *b 442*da347917Sbeck &sub($tmp1,$c); 443*da347917Sbeck &mov($c,0); 444*da347917Sbeck &adc($c,$c); 445*da347917Sbeck &sub($tmp1,$tmp2); 446*da347917Sbeck &adc($c,0); 447*da347917Sbeck &mov(&DWP(0,$r,"",0),$tmp1); # *r 448*da347917Sbeck &add($a, 4); 449*da347917Sbeck &add($b, 4); 450*da347917Sbeck &add($r, 4); 451*da347917Sbeck &dec($num) if ($i != 6); 452*da347917Sbeck &jz(&label("aw_end")) if ($i != 6); 453*da347917Sbeck } 454*da347917Sbeck &set_label("aw_end",0); 455*da347917Sbeck 456*da347917Sbeck &cmp(&wparam(4),0); 457*da347917Sbeck &je(&label("pw_end")); 458*da347917Sbeck 459*da347917Sbeck &mov($num,&wparam(4)); # get dl 460*da347917Sbeck &cmp($num,0); 461*da347917Sbeck &je(&label("pw_end")); 462*da347917Sbeck &jge(&label("pw_pos")); 463*da347917Sbeck 464*da347917Sbeck &comment("pw_neg"); 465*da347917Sbeck &mov($tmp2,0); 466*da347917Sbeck &sub($tmp2,$num); 467*da347917Sbeck &mov($num,$tmp2); 468*da347917Sbeck &and($num,0xfffffff8); # num / 8 469*da347917Sbeck &jz(&label("pw_neg_finish")); 470*da347917Sbeck 471*da347917Sbeck &set_label("pw_neg_loop",0); 472*da347917Sbeck for ($i=0; $i<8; $i++) 473*da347917Sbeck { 474*da347917Sbeck &comment("dl<0 Round $i"); 475*da347917Sbeck 476*da347917Sbeck &mov($tmp1,0); 477*da347917Sbeck &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 478*da347917Sbeck &sub($tmp1,$c); 479*da347917Sbeck &mov($c,0); 480*da347917Sbeck &adc($c,$c); 481*da347917Sbeck &sub($tmp1,$tmp2); 482*da347917Sbeck &adc($c,0); 483*da347917Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 484*da347917Sbeck } 485*da347917Sbeck 486*da347917Sbeck &comment(""); 487*da347917Sbeck &add($b,32); 488*da347917Sbeck &add($r,32); 489*da347917Sbeck &sub($num,8); 490*da347917Sbeck &jnz(&label("pw_neg_loop")); 491*da347917Sbeck 492*da347917Sbeck &set_label("pw_neg_finish",0); 493*da347917Sbeck &mov($tmp2,&wparam(4)); # get dl 494*da347917Sbeck &mov($num,0); 495*da347917Sbeck &sub($num,$tmp2); 496*da347917Sbeck &and($num,7); 497*da347917Sbeck &jz(&label("pw_end")); 498*da347917Sbeck 499*da347917Sbeck for ($i=0; $i<7; $i++) 500*da347917Sbeck { 501*da347917Sbeck &comment("dl<0 Tail Round $i"); 502*da347917Sbeck &mov($tmp1,0); 503*da347917Sbeck &mov($tmp2,&DWP($i*4,$b,"",0));# *b 504*da347917Sbeck &sub($tmp1,$c); 505*da347917Sbeck &mov($c,0); 506*da347917Sbeck &adc($c,$c); 507*da347917Sbeck &sub($tmp1,$tmp2); 508*da347917Sbeck &adc($c,0); 509*da347917Sbeck &dec($num) if ($i != 6); 510*da347917Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 511*da347917Sbeck &jz(&label("pw_end")) if ($i != 6); 512*da347917Sbeck } 513*da347917Sbeck 514*da347917Sbeck &jmp(&label("pw_end")); 515*da347917Sbeck 516*da347917Sbeck &set_label("pw_pos",0); 517*da347917Sbeck 518*da347917Sbeck &and($num,0xfffffff8); # num / 8 519*da347917Sbeck &jz(&label("pw_pos_finish")); 520*da347917Sbeck 521*da347917Sbeck &set_label("pw_pos_loop",0); 522*da347917Sbeck 523*da347917Sbeck for ($i=0; $i<8; $i++) 524*da347917Sbeck { 525*da347917Sbeck &comment("dl>0 Round $i"); 526*da347917Sbeck 527*da347917Sbeck &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 528*da347917Sbeck &sub($tmp1,$c); 529*da347917Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 530*da347917Sbeck &jnc(&label("pw_nc".$i)); 531*da347917Sbeck } 532*da347917Sbeck 533*da347917Sbeck &comment(""); 534*da347917Sbeck &add($a,32); 535*da347917Sbeck &add($r,32); 536*da347917Sbeck &sub($num,8); 537*da347917Sbeck &jnz(&label("pw_pos_loop")); 538*da347917Sbeck 539*da347917Sbeck &set_label("pw_pos_finish",0); 540*da347917Sbeck &mov($num,&wparam(4)); # get dl 541*da347917Sbeck &and($num,7); 542*da347917Sbeck &jz(&label("pw_end")); 543*da347917Sbeck 544*da347917Sbeck for ($i=0; $i<7; $i++) 545*da347917Sbeck { 546*da347917Sbeck &comment("dl>0 Tail Round $i"); 547*da347917Sbeck &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 548*da347917Sbeck &sub($tmp1,$c); 549*da347917Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 550*da347917Sbeck &jnc(&label("pw_tail_nc".$i)); 551*da347917Sbeck &dec($num) if ($i != 6); 552*da347917Sbeck &jz(&label("pw_end")) if ($i != 6); 553*da347917Sbeck } 554*da347917Sbeck &mov($c,1); 555*da347917Sbeck &jmp(&label("pw_end")); 556*da347917Sbeck 557*da347917Sbeck &set_label("pw_nc_loop",0); 558*da347917Sbeck for ($i=0; $i<8; $i++) 559*da347917Sbeck { 560*da347917Sbeck &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 561*da347917Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 562*da347917Sbeck &set_label("pw_nc".$i,0); 563*da347917Sbeck } 564*da347917Sbeck 565*da347917Sbeck &comment(""); 566*da347917Sbeck &add($a,32); 567*da347917Sbeck &add($r,32); 568*da347917Sbeck &sub($num,8); 569*da347917Sbeck &jnz(&label("pw_nc_loop")); 570*da347917Sbeck 571*da347917Sbeck &mov($num,&wparam(4)); # get dl 572*da347917Sbeck &and($num,7); 573*da347917Sbeck &jz(&label("pw_nc_end")); 574*da347917Sbeck 575*da347917Sbeck for ($i=0; $i<7; $i++) 576*da347917Sbeck { 577*da347917Sbeck &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 578*da347917Sbeck &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 579*da347917Sbeck &set_label("pw_tail_nc".$i,0); 580*da347917Sbeck &dec($num) if ($i != 6); 581*da347917Sbeck &jz(&label("pw_nc_end")) if ($i != 6); 582*da347917Sbeck } 583*da347917Sbeck 584*da347917Sbeck &set_label("pw_nc_end",0); 585*da347917Sbeck &mov($c,0); 586*da347917Sbeck 587*da347917Sbeck &set_label("pw_end",0); 588*da347917Sbeck 589*da347917Sbeck# &mov("eax",$c); # $c is "eax" 590*da347917Sbeck 591*da347917Sbeck &function_end($name); 592*da347917Sbeck } 593*da347917Sbeck 594