1*5b37fcf3Sryker#!/usr/bin/perl 2*5b37fcf3Sryker# 3*5b37fcf3Sryker 4*5b37fcf3Sryker#!/usr/local/bin/perl 5*5b37fcf3Sryker 6*5b37fcf3Srykerpush(@INC,"perlasm","../../perlasm"); 7*5b37fcf3Srykerrequire "x86asm.pl"; 8*5b37fcf3Sryker 9*5b37fcf3Sryker&asm_init($ARGV[0],"bn-586.pl"); 10*5b37fcf3Sryker 11*5b37fcf3Sryker&bn_mul_add_words("bn_mul_add_words"); 12*5b37fcf3Sryker&bn_mul_words("bn_mul_words"); 13*5b37fcf3Sryker&bn_sqr_words("bn_sqr_words"); 14*5b37fcf3Sryker&bn_div64("bn_div64"); 15*5b37fcf3Sryker&bn_add_words("bn_add_words"); 16*5b37fcf3Sryker 17*5b37fcf3Sryker&asm_finish(); 18*5b37fcf3Sryker 19*5b37fcf3Srykersub bn_mul_add_words 20*5b37fcf3Sryker { 21*5b37fcf3Sryker local($name)=@_; 22*5b37fcf3Sryker 23*5b37fcf3Sryker &function_begin($name,""); 24*5b37fcf3Sryker 25*5b37fcf3Sryker &comment(""); 26*5b37fcf3Sryker $Low="eax"; 27*5b37fcf3Sryker $High="edx"; 28*5b37fcf3Sryker $a="ebx"; 29*5b37fcf3Sryker $w="ebp"; 30*5b37fcf3Sryker $r="edi"; 31*5b37fcf3Sryker $c="esi"; 32*5b37fcf3Sryker 33*5b37fcf3Sryker &xor($c,$c); # clear carry 34*5b37fcf3Sryker &mov($r,&wparam(0)); # 35*5b37fcf3Sryker 36*5b37fcf3Sryker &mov("ecx",&wparam(2)); # 37*5b37fcf3Sryker &mov($a,&wparam(1)); # 38*5b37fcf3Sryker 39*5b37fcf3Sryker &and("ecx",0xfffffff8); # num / 8 40*5b37fcf3Sryker &mov($w,&wparam(3)); # 41*5b37fcf3Sryker 42*5b37fcf3Sryker &push("ecx"); # Up the stack for a tmp variable 43*5b37fcf3Sryker 44*5b37fcf3Sryker &jz(&label("maw_finish")); 45*5b37fcf3Sryker 46*5b37fcf3Sryker &set_label("maw_loop",0); 47*5b37fcf3Sryker 48*5b37fcf3Sryker &mov(&swtmp(0),"ecx"); # 49*5b37fcf3Sryker 50*5b37fcf3Sryker for ($i=0; $i<32; $i+=4) 51*5b37fcf3Sryker { 52*5b37fcf3Sryker &comment("Round $i"); 53*5b37fcf3Sryker 54*5b37fcf3Sryker &mov("eax",&DWP($i,$a,"",0)); # *a 55*5b37fcf3Sryker &mul($w); # *a * w 56*5b37fcf3Sryker &add("eax",$c); # L(t)+= *r 57*5b37fcf3Sryker &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r 58*5b37fcf3Sryker &adc("edx",0); # H(t)+=carry 59*5b37fcf3Sryker &add("eax",$c); # L(t)+=c 60*5b37fcf3Sryker &adc("edx",0); # H(t)+=carry 61*5b37fcf3Sryker &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 62*5b37fcf3Sryker &mov($c,"edx"); # c= H(t); 63*5b37fcf3Sryker } 64*5b37fcf3Sryker 65*5b37fcf3Sryker &comment(""); 66*5b37fcf3Sryker &mov("ecx",&swtmp(0)); # 67*5b37fcf3Sryker &add($a,32); 68*5b37fcf3Sryker &add($r,32); 69*5b37fcf3Sryker &sub("ecx",8); 70*5b37fcf3Sryker &jnz(&label("maw_loop")); 71*5b37fcf3Sryker 72*5b37fcf3Sryker &set_label("maw_finish",0); 73*5b37fcf3Sryker &mov("ecx",&wparam(2)); # get num 74*5b37fcf3Sryker &and("ecx",7); 75*5b37fcf3Sryker &jnz(&label("maw_finish2")); # helps branch prediction 76*5b37fcf3Sryker &jmp(&label("maw_end")); 77*5b37fcf3Sryker 78*5b37fcf3Sryker &set_label("maw_finish2",1); 79*5b37fcf3Sryker for ($i=0; $i<7; $i++) 80*5b37fcf3Sryker { 81*5b37fcf3Sryker &comment("Tail Round $i"); 82*5b37fcf3Sryker &mov("eax",&DWP($i*4,$a,"",0));# *a 83*5b37fcf3Sryker &mul($w); # *a * w 84*5b37fcf3Sryker &add("eax",$c); # L(t)+=c 85*5b37fcf3Sryker &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r 86*5b37fcf3Sryker &adc("edx",0); # H(t)+=carry 87*5b37fcf3Sryker &add("eax",$c); 88*5b37fcf3Sryker &adc("edx",0); # H(t)+=carry 89*5b37fcf3Sryker &dec("ecx") if ($i != 7-1); 90*5b37fcf3Sryker &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); 91*5b37fcf3Sryker &mov($c,"edx"); # c= H(t); 92*5b37fcf3Sryker &jz(&label("maw_end")) if ($i != 7-1); 93*5b37fcf3Sryker } 94*5b37fcf3Sryker &set_label("maw_end",0); 95*5b37fcf3Sryker &mov("eax",$c); 96*5b37fcf3Sryker 97*5b37fcf3Sryker &pop("ecx"); # clear variable from 98*5b37fcf3Sryker 99*5b37fcf3Sryker &function_end($name); 100*5b37fcf3Sryker } 101*5b37fcf3Sryker 102*5b37fcf3Srykersub bn_mul_words 103*5b37fcf3Sryker { 104*5b37fcf3Sryker local($name)=@_; 105*5b37fcf3Sryker 106*5b37fcf3Sryker &function_begin($name,""); 107*5b37fcf3Sryker 108*5b37fcf3Sryker &comment(""); 109*5b37fcf3Sryker $Low="eax"; 110*5b37fcf3Sryker $High="edx"; 111*5b37fcf3Sryker $a="ebx"; 112*5b37fcf3Sryker $w="ecx"; 113*5b37fcf3Sryker $r="edi"; 114*5b37fcf3Sryker $c="esi"; 115*5b37fcf3Sryker $num="ebp"; 116*5b37fcf3Sryker 117*5b37fcf3Sryker &xor($c,$c); # clear carry 118*5b37fcf3Sryker &mov($r,&wparam(0)); # 119*5b37fcf3Sryker &mov($a,&wparam(1)); # 120*5b37fcf3Sryker &mov($num,&wparam(2)); # 121*5b37fcf3Sryker &mov($w,&wparam(3)); # 122*5b37fcf3Sryker 123*5b37fcf3Sryker &and($num,0xfffffff8); # num / 8 124*5b37fcf3Sryker &jz(&label("mw_finish")); 125*5b37fcf3Sryker 126*5b37fcf3Sryker &set_label("mw_loop",0); 127*5b37fcf3Sryker for ($i=0; $i<32; $i+=4) 128*5b37fcf3Sryker { 129*5b37fcf3Sryker &comment("Round $i"); 130*5b37fcf3Sryker 131*5b37fcf3Sryker &mov("eax",&DWP($i,$a,"",0)); # *a 132*5b37fcf3Sryker &mul($w); # *a * w 133*5b37fcf3Sryker &add("eax",$c); # L(t)+=c 134*5b37fcf3Sryker # XXX 135*5b37fcf3Sryker 136*5b37fcf3Sryker &adc("edx",0); # H(t)+=carry 137*5b37fcf3Sryker &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 138*5b37fcf3Sryker 139*5b37fcf3Sryker &mov($c,"edx"); # c= H(t); 140*5b37fcf3Sryker } 141*5b37fcf3Sryker 142*5b37fcf3Sryker &comment(""); 143*5b37fcf3Sryker &add($a,32); 144*5b37fcf3Sryker &add($r,32); 145*5b37fcf3Sryker &sub($num,8); 146*5b37fcf3Sryker &jz(&label("mw_finish")); 147*5b37fcf3Sryker &jmp(&label("mw_loop")); 148*5b37fcf3Sryker 149*5b37fcf3Sryker &set_label("mw_finish",0); 150*5b37fcf3Sryker &mov($num,&wparam(2)); # get num 151*5b37fcf3Sryker &and($num,7); 152*5b37fcf3Sryker &jnz(&label("mw_finish2")); 153*5b37fcf3Sryker &jmp(&label("mw_end")); 154*5b37fcf3Sryker 155*5b37fcf3Sryker &set_label("mw_finish2",1); 156*5b37fcf3Sryker for ($i=0; $i<7; $i++) 157*5b37fcf3Sryker { 158*5b37fcf3Sryker &comment("Tail Round $i"); 159*5b37fcf3Sryker &mov("eax",&DWP($i*4,$a,"",0));# *a 160*5b37fcf3Sryker &mul($w); # *a * w 161*5b37fcf3Sryker &add("eax",$c); # L(t)+=c 162*5b37fcf3Sryker # XXX 163*5b37fcf3Sryker &adc("edx",0); # H(t)+=carry 164*5b37fcf3Sryker &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); 165*5b37fcf3Sryker &mov($c,"edx"); # c= H(t); 166*5b37fcf3Sryker &dec($num) if ($i != 7-1); 167*5b37fcf3Sryker &jz(&label("mw_end")) if ($i != 7-1); 168*5b37fcf3Sryker } 169*5b37fcf3Sryker &set_label("mw_end",0); 170*5b37fcf3Sryker &mov("eax",$c); 171*5b37fcf3Sryker 172*5b37fcf3Sryker &function_end($name); 173*5b37fcf3Sryker } 174*5b37fcf3Sryker 175*5b37fcf3Srykersub bn_sqr_words 176*5b37fcf3Sryker { 177*5b37fcf3Sryker local($name)=@_; 178*5b37fcf3Sryker 179*5b37fcf3Sryker &function_begin($name,""); 180*5b37fcf3Sryker 181*5b37fcf3Sryker &comment(""); 182*5b37fcf3Sryker $r="esi"; 183*5b37fcf3Sryker $a="edi"; 184*5b37fcf3Sryker $num="ebx"; 185*5b37fcf3Sryker 186*5b37fcf3Sryker &mov($r,&wparam(0)); # 187*5b37fcf3Sryker &mov($a,&wparam(1)); # 188*5b37fcf3Sryker &mov($num,&wparam(2)); # 189*5b37fcf3Sryker 190*5b37fcf3Sryker &and($num,0xfffffff8); # num / 8 191*5b37fcf3Sryker &jz(&label("sw_finish")); 192*5b37fcf3Sryker 193*5b37fcf3Sryker &set_label("sw_loop",0); 194*5b37fcf3Sryker for ($i=0; $i<32; $i+=4) 195*5b37fcf3Sryker { 196*5b37fcf3Sryker &comment("Round $i"); 197*5b37fcf3Sryker &mov("eax",&DWP($i,$a,"",0)); # *a 198*5b37fcf3Sryker # XXX 199*5b37fcf3Sryker &mul("eax"); # *a * *a 200*5b37fcf3Sryker &mov(&DWP($i*2,$r,"",0),"eax"); # 201*5b37fcf3Sryker &mov(&DWP($i*2+4,$r,"",0),"edx");# 202*5b37fcf3Sryker } 203*5b37fcf3Sryker 204*5b37fcf3Sryker &comment(""); 205*5b37fcf3Sryker &add($a,32); 206*5b37fcf3Sryker &add($r,64); 207*5b37fcf3Sryker &sub($num,8); 208*5b37fcf3Sryker &jnz(&label("sw_loop")); 209*5b37fcf3Sryker 210*5b37fcf3Sryker &set_label("sw_finish",0); 211*5b37fcf3Sryker &mov($num,&wparam(2)); # get num 212*5b37fcf3Sryker &and($num,7); 213*5b37fcf3Sryker &jz(&label("sw_end")); 214*5b37fcf3Sryker 215*5b37fcf3Sryker for ($i=0; $i<7; $i++) 216*5b37fcf3Sryker { 217*5b37fcf3Sryker &comment("Tail Round $i"); 218*5b37fcf3Sryker &mov("eax",&DWP($i*4,$a,"",0)); # *a 219*5b37fcf3Sryker # XXX 220*5b37fcf3Sryker &mul("eax"); # *a * *a 221*5b37fcf3Sryker &mov(&DWP($i*8,$r,"",0),"eax"); # 222*5b37fcf3Sryker &dec($num) if ($i != 7-1); 223*5b37fcf3Sryker &mov(&DWP($i*8+4,$r,"",0),"edx"); 224*5b37fcf3Sryker &jz(&label("sw_end")) if ($i != 7-1); 225*5b37fcf3Sryker } 226*5b37fcf3Sryker &set_label("sw_end",0); 227*5b37fcf3Sryker 228*5b37fcf3Sryker &function_end($name); 229*5b37fcf3Sryker } 230*5b37fcf3Sryker 231*5b37fcf3Srykersub bn_div64 232*5b37fcf3Sryker { 233*5b37fcf3Sryker local($name)=@_; 234*5b37fcf3Sryker 235*5b37fcf3Sryker &function_begin($name,""); 236*5b37fcf3Sryker &mov("edx",&wparam(0)); # 237*5b37fcf3Sryker &mov("eax",&wparam(1)); # 238*5b37fcf3Sryker &mov("ebx",&wparam(2)); # 239*5b37fcf3Sryker &div("ebx"); 240*5b37fcf3Sryker &function_end($name); 241*5b37fcf3Sryker } 242*5b37fcf3Sryker 243*5b37fcf3Srykersub bn_add_words 244*5b37fcf3Sryker { 245*5b37fcf3Sryker local($name)=@_; 246*5b37fcf3Sryker 247*5b37fcf3Sryker &function_begin($name,""); 248*5b37fcf3Sryker 249*5b37fcf3Sryker &comment(""); 250*5b37fcf3Sryker $a="esi"; 251*5b37fcf3Sryker $b="edi"; 252*5b37fcf3Sryker $c="eax"; 253*5b37fcf3Sryker $r="ebx"; 254*5b37fcf3Sryker $tmp1="ecx"; 255*5b37fcf3Sryker $tmp2="edx"; 256*5b37fcf3Sryker $num="ebp"; 257*5b37fcf3Sryker 258*5b37fcf3Sryker &mov($r,&wparam(0)); # get r 259*5b37fcf3Sryker &mov($a,&wparam(1)); # get a 260*5b37fcf3Sryker &mov($b,&wparam(2)); # get b 261*5b37fcf3Sryker &mov($num,&wparam(3)); # get num 262*5b37fcf3Sryker &xor($c,$c); # clear carry 263*5b37fcf3Sryker &and($num,0xfffffff8); # num / 8 264*5b37fcf3Sryker 265*5b37fcf3Sryker &jz(&label("aw_finish")); 266*5b37fcf3Sryker 267*5b37fcf3Sryker &set_label("aw_loop",0); 268*5b37fcf3Sryker for ($i=0; $i<8; $i++) 269*5b37fcf3Sryker { 270*5b37fcf3Sryker &comment("Round $i"); 271*5b37fcf3Sryker 272*5b37fcf3Sryker &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 273*5b37fcf3Sryker &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 274*5b37fcf3Sryker &add($tmp1,$c); 275*5b37fcf3Sryker &mov($c,0); 276*5b37fcf3Sryker &adc($c,$c); 277*5b37fcf3Sryker &add($tmp1,$tmp2); 278*5b37fcf3Sryker &adc($c,0); 279*5b37fcf3Sryker &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 280*5b37fcf3Sryker } 281*5b37fcf3Sryker 282*5b37fcf3Sryker &comment(""); 283*5b37fcf3Sryker &add($a,32); 284*5b37fcf3Sryker &add($b,32); 285*5b37fcf3Sryker &add($r,32); 286*5b37fcf3Sryker &sub($num,8); 287*5b37fcf3Sryker &jnz(&label("aw_loop")); 288*5b37fcf3Sryker 289*5b37fcf3Sryker &set_label("aw_finish",0); 290*5b37fcf3Sryker &mov($num,&wparam(3)); # get num 291*5b37fcf3Sryker &and($num,7); 292*5b37fcf3Sryker &jz(&label("aw_end")); 293*5b37fcf3Sryker 294*5b37fcf3Sryker for ($i=0; $i<7; $i++) 295*5b37fcf3Sryker { 296*5b37fcf3Sryker &comment("Tail Round $i"); 297*5b37fcf3Sryker &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 298*5b37fcf3Sryker &mov($tmp2,&DWP($i*4,$b,"",0));# *b 299*5b37fcf3Sryker &add($tmp1,$c); 300*5b37fcf3Sryker &mov($c,0); 301*5b37fcf3Sryker &adc($c,$c); 302*5b37fcf3Sryker &add($tmp1,$tmp2); 303*5b37fcf3Sryker &adc($c,0); 304*5b37fcf3Sryker &dec($num) if ($i != 6); 305*5b37fcf3Sryker &mov(&DWP($i*4,$r,"",0),$tmp1); # *a 306*5b37fcf3Sryker &jz(&label("aw_end")) if ($i != 6); 307*5b37fcf3Sryker } 308*5b37fcf3Sryker &set_label("aw_end",0); 309*5b37fcf3Sryker 310*5b37fcf3Sryker &mov("eax",$c); 311*5b37fcf3Sryker 312*5b37fcf3Sryker &function_end($name); 313*5b37fcf3Sryker } 314*5b37fcf3Sryker 315