xref: /openbsd/lib/libcrypto/bn/asm/bn-586.pl (revision da347917)
15b37fcf3Sryker#!/usr/local/bin/perl
25b37fcf3Sryker
35b37fcf3Srykerpush(@INC,"perlasm","../../perlasm");
45b37fcf3Srykerrequire "x86asm.pl";
55b37fcf3Sryker
6913ec974Sbeck&asm_init($ARGV[0],$0);
75b37fcf3Sryker
85b37fcf3Sryker&bn_mul_add_words("bn_mul_add_words");
95b37fcf3Sryker&bn_mul_words("bn_mul_words");
105b37fcf3Sryker&bn_sqr_words("bn_sqr_words");
11913ec974Sbeck&bn_div_words("bn_div_words");
125b37fcf3Sryker&bn_add_words("bn_add_words");
13913ec974Sbeck&bn_sub_words("bn_sub_words");
14*da347917Sbeck&bn_sub_part_words("bn_sub_part_words");
155b37fcf3Sryker
165b37fcf3Sryker&asm_finish();
175b37fcf3Sryker
185b37fcf3Srykersub bn_mul_add_words
195b37fcf3Sryker	{
205b37fcf3Sryker	local($name)=@_;
215b37fcf3Sryker
225b37fcf3Sryker	&function_begin($name,"");
235b37fcf3Sryker
245b37fcf3Sryker	&comment("");
255b37fcf3Sryker	$Low="eax";
265b37fcf3Sryker	$High="edx";
275b37fcf3Sryker	$a="ebx";
285b37fcf3Sryker	$w="ebp";
295b37fcf3Sryker	$r="edi";
305b37fcf3Sryker	$c="esi";
315b37fcf3Sryker
325b37fcf3Sryker	&xor($c,$c);		# clear carry
335b37fcf3Sryker	&mov($r,&wparam(0));	#
345b37fcf3Sryker
355b37fcf3Sryker	&mov("ecx",&wparam(2));	#
365b37fcf3Sryker	&mov($a,&wparam(1));	#
375b37fcf3Sryker
385b37fcf3Sryker	&and("ecx",0xfffffff8);	# num / 8
395b37fcf3Sryker	&mov($w,&wparam(3));	#
405b37fcf3Sryker
415b37fcf3Sryker	&push("ecx");		# Up the stack for a tmp variable
425b37fcf3Sryker
435b37fcf3Sryker	&jz(&label("maw_finish"));
445b37fcf3Sryker
455b37fcf3Sryker	&set_label("maw_loop",0);
465b37fcf3Sryker
475b37fcf3Sryker	&mov(&swtmp(0),"ecx");	#
485b37fcf3Sryker
495b37fcf3Sryker	for ($i=0; $i<32; $i+=4)
505b37fcf3Sryker		{
515b37fcf3Sryker		&comment("Round $i");
525b37fcf3Sryker
535b37fcf3Sryker		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
545b37fcf3Sryker		&mul($w);			# *a * w
555b37fcf3Sryker		&add("eax",$c);		# L(t)+= *r
565b37fcf3Sryker		 &mov($c,&DWP($i,$r,"",0));	# L(t)+= *r
575b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
585b37fcf3Sryker		 &add("eax",$c);		# L(t)+=c
595b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
605b37fcf3Sryker		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
615b37fcf3Sryker		&mov($c,"edx");			# c=  H(t);
625b37fcf3Sryker		}
635b37fcf3Sryker
645b37fcf3Sryker	&comment("");
655b37fcf3Sryker	&mov("ecx",&swtmp(0));	#
665b37fcf3Sryker	&add($a,32);
675b37fcf3Sryker	&add($r,32);
685b37fcf3Sryker	&sub("ecx",8);
695b37fcf3Sryker	&jnz(&label("maw_loop"));
705b37fcf3Sryker
715b37fcf3Sryker	&set_label("maw_finish",0);
725b37fcf3Sryker	&mov("ecx",&wparam(2));	# get num
735b37fcf3Sryker	&and("ecx",7);
745b37fcf3Sryker	&jnz(&label("maw_finish2"));	# helps branch prediction
755b37fcf3Sryker	&jmp(&label("maw_end"));
765b37fcf3Sryker
775b37fcf3Sryker	&set_label("maw_finish2",1);
785b37fcf3Sryker	for ($i=0; $i<7; $i++)
795b37fcf3Sryker		{
805b37fcf3Sryker		&comment("Tail Round $i");
815b37fcf3Sryker		 &mov("eax",&DWP($i*4,$a,"",0));# *a
825b37fcf3Sryker		&mul($w);			# *a * w
835b37fcf3Sryker		&add("eax",$c);			# L(t)+=c
845b37fcf3Sryker		 &mov($c,&DWP($i*4,$r,"",0));	# L(t)+= *r
855b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
865b37fcf3Sryker		 &add("eax",$c);
875b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
885b37fcf3Sryker		 &dec("ecx") if ($i != 7-1);
895b37fcf3Sryker		&mov(&DWP($i*4,$r,"",0),"eax");	# *r= L(t);
905b37fcf3Sryker		 &mov($c,"edx");			# c=  H(t);
915b37fcf3Sryker		&jz(&label("maw_end")) if ($i != 7-1);
925b37fcf3Sryker		}
935b37fcf3Sryker	&set_label("maw_end",0);
945b37fcf3Sryker	&mov("eax",$c);
955b37fcf3Sryker
965b37fcf3Sryker	&pop("ecx");	# clear variable from
975b37fcf3Sryker
985b37fcf3Sryker	&function_end($name);
995b37fcf3Sryker	}
1005b37fcf3Sryker
1015b37fcf3Srykersub bn_mul_words
1025b37fcf3Sryker	{
1035b37fcf3Sryker	local($name)=@_;
1045b37fcf3Sryker
1055b37fcf3Sryker	&function_begin($name,"");
1065b37fcf3Sryker
1075b37fcf3Sryker	&comment("");
1085b37fcf3Sryker	$Low="eax";
1095b37fcf3Sryker	$High="edx";
1105b37fcf3Sryker	$a="ebx";
1115b37fcf3Sryker	$w="ecx";
1125b37fcf3Sryker	$r="edi";
1135b37fcf3Sryker	$c="esi";
1145b37fcf3Sryker	$num="ebp";
1155b37fcf3Sryker
1165b37fcf3Sryker	&xor($c,$c);		# clear carry
1175b37fcf3Sryker	&mov($r,&wparam(0));	#
1185b37fcf3Sryker	&mov($a,&wparam(1));	#
1195b37fcf3Sryker	&mov($num,&wparam(2));	#
1205b37fcf3Sryker	&mov($w,&wparam(3));	#
1215b37fcf3Sryker
1225b37fcf3Sryker	&and($num,0xfffffff8);	# num / 8
1235b37fcf3Sryker	&jz(&label("mw_finish"));
1245b37fcf3Sryker
1255b37fcf3Sryker	&set_label("mw_loop",0);
1265b37fcf3Sryker	for ($i=0; $i<32; $i+=4)
1275b37fcf3Sryker		{
1285b37fcf3Sryker		&comment("Round $i");
1295b37fcf3Sryker
1305b37fcf3Sryker		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
1315b37fcf3Sryker		&mul($w);			# *a * w
1325b37fcf3Sryker		&add("eax",$c);			# L(t)+=c
1335b37fcf3Sryker		 # XXX
1345b37fcf3Sryker
1355b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
1365b37fcf3Sryker		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
1375b37fcf3Sryker
1385b37fcf3Sryker		&mov($c,"edx");			# c=  H(t);
1395b37fcf3Sryker		}
1405b37fcf3Sryker
1415b37fcf3Sryker	&comment("");
1425b37fcf3Sryker	&add($a,32);
1435b37fcf3Sryker	&add($r,32);
1445b37fcf3Sryker	&sub($num,8);
1455b37fcf3Sryker	&jz(&label("mw_finish"));
1465b37fcf3Sryker	&jmp(&label("mw_loop"));
1475b37fcf3Sryker
1485b37fcf3Sryker	&set_label("mw_finish",0);
1495b37fcf3Sryker	&mov($num,&wparam(2));	# get num
1505b37fcf3Sryker	&and($num,7);
1515b37fcf3Sryker	&jnz(&label("mw_finish2"));
1525b37fcf3Sryker	&jmp(&label("mw_end"));
1535b37fcf3Sryker
1545b37fcf3Sryker	&set_label("mw_finish2",1);
1555b37fcf3Sryker	for ($i=0; $i<7; $i++)
1565b37fcf3Sryker		{
1575b37fcf3Sryker		&comment("Tail Round $i");
1585b37fcf3Sryker		 &mov("eax",&DWP($i*4,$a,"",0));# *a
1595b37fcf3Sryker		&mul($w);			# *a * w
1605b37fcf3Sryker		&add("eax",$c);			# L(t)+=c
1615b37fcf3Sryker		 # XXX
1625b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
1635b37fcf3Sryker		 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
1645b37fcf3Sryker		&mov($c,"edx");			# c=  H(t);
1655b37fcf3Sryker		 &dec($num) if ($i != 7-1);
1665b37fcf3Sryker		&jz(&label("mw_end")) if ($i != 7-1);
1675b37fcf3Sryker		}
1685b37fcf3Sryker	&set_label("mw_end",0);
1695b37fcf3Sryker	&mov("eax",$c);
1705b37fcf3Sryker
1715b37fcf3Sryker	&function_end($name);
1725b37fcf3Sryker	}
1735b37fcf3Sryker
1745b37fcf3Srykersub bn_sqr_words
1755b37fcf3Sryker	{
1765b37fcf3Sryker	local($name)=@_;
1775b37fcf3Sryker
1785b37fcf3Sryker	&function_begin($name,"");
1795b37fcf3Sryker
1805b37fcf3Sryker	&comment("");
1815b37fcf3Sryker	$r="esi";
1825b37fcf3Sryker	$a="edi";
1835b37fcf3Sryker	$num="ebx";
1845b37fcf3Sryker
1855b37fcf3Sryker	&mov($r,&wparam(0));	#
1865b37fcf3Sryker	&mov($a,&wparam(1));	#
1875b37fcf3Sryker	&mov($num,&wparam(2));	#
1885b37fcf3Sryker
1895b37fcf3Sryker	&and($num,0xfffffff8);	# num / 8
1905b37fcf3Sryker	&jz(&label("sw_finish"));
1915b37fcf3Sryker
1925b37fcf3Sryker	&set_label("sw_loop",0);
1935b37fcf3Sryker	for ($i=0; $i<32; $i+=4)
1945b37fcf3Sryker		{
1955b37fcf3Sryker		&comment("Round $i");
1965b37fcf3Sryker		&mov("eax",&DWP($i,$a,"",0)); 	# *a
1975b37fcf3Sryker		 # XXX
1985b37fcf3Sryker		&mul("eax");			# *a * *a
1995b37fcf3Sryker		&mov(&DWP($i*2,$r,"",0),"eax");	#
2005b37fcf3Sryker		 &mov(&DWP($i*2+4,$r,"",0),"edx");#
2015b37fcf3Sryker		}
2025b37fcf3Sryker
2035b37fcf3Sryker	&comment("");
2045b37fcf3Sryker	&add($a,32);
2055b37fcf3Sryker	&add($r,64);
2065b37fcf3Sryker	&sub($num,8);
2075b37fcf3Sryker	&jnz(&label("sw_loop"));
2085b37fcf3Sryker
2095b37fcf3Sryker	&set_label("sw_finish",0);
2105b37fcf3Sryker	&mov($num,&wparam(2));	# get num
2115b37fcf3Sryker	&and($num,7);
2125b37fcf3Sryker	&jz(&label("sw_end"));
2135b37fcf3Sryker
2145b37fcf3Sryker	for ($i=0; $i<7; $i++)
2155b37fcf3Sryker		{
2165b37fcf3Sryker		&comment("Tail Round $i");
2175b37fcf3Sryker		&mov("eax",&DWP($i*4,$a,"",0));	# *a
2185b37fcf3Sryker		 # XXX
2195b37fcf3Sryker		&mul("eax");			# *a * *a
2205b37fcf3Sryker		&mov(&DWP($i*8,$r,"",0),"eax");	#
2215b37fcf3Sryker		 &dec($num) if ($i != 7-1);
2225b37fcf3Sryker		&mov(&DWP($i*8+4,$r,"",0),"edx");
2235b37fcf3Sryker		 &jz(&label("sw_end")) if ($i != 7-1);
2245b37fcf3Sryker		}
2255b37fcf3Sryker	&set_label("sw_end",0);
2265b37fcf3Sryker
2275b37fcf3Sryker	&function_end($name);
2285b37fcf3Sryker	}
2295b37fcf3Sryker
230913ec974Sbecksub bn_div_words
2315b37fcf3Sryker	{
2325b37fcf3Sryker	local($name)=@_;
2335b37fcf3Sryker
2345b37fcf3Sryker	&function_begin($name,"");
2355b37fcf3Sryker	&mov("edx",&wparam(0));	#
2365b37fcf3Sryker	&mov("eax",&wparam(1));	#
2375b37fcf3Sryker	&mov("ebx",&wparam(2));	#
2385b37fcf3Sryker	&div("ebx");
2395b37fcf3Sryker	&function_end($name);
2405b37fcf3Sryker	}
2415b37fcf3Sryker
2425b37fcf3Srykersub bn_add_words
2435b37fcf3Sryker	{
2445b37fcf3Sryker	local($name)=@_;
2455b37fcf3Sryker
2465b37fcf3Sryker	&function_begin($name,"");
2475b37fcf3Sryker
2485b37fcf3Sryker	&comment("");
2495b37fcf3Sryker	$a="esi";
2505b37fcf3Sryker	$b="edi";
2515b37fcf3Sryker	$c="eax";
2525b37fcf3Sryker	$r="ebx";
2535b37fcf3Sryker	$tmp1="ecx";
2545b37fcf3Sryker	$tmp2="edx";
2555b37fcf3Sryker	$num="ebp";
2565b37fcf3Sryker
2575b37fcf3Sryker	&mov($r,&wparam(0));	# get r
2585b37fcf3Sryker	 &mov($a,&wparam(1));	# get a
2595b37fcf3Sryker	&mov($b,&wparam(2));	# get b
2605b37fcf3Sryker	 &mov($num,&wparam(3));	# get num
2615b37fcf3Sryker	&xor($c,$c);		# clear carry
2625b37fcf3Sryker	 &and($num,0xfffffff8);	# num / 8
2635b37fcf3Sryker
2645b37fcf3Sryker	&jz(&label("aw_finish"));
2655b37fcf3Sryker
2665b37fcf3Sryker	&set_label("aw_loop",0);
2675b37fcf3Sryker	for ($i=0; $i<8; $i++)
2685b37fcf3Sryker		{
2695b37fcf3Sryker		&comment("Round $i");
2705b37fcf3Sryker
2715b37fcf3Sryker		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
2725b37fcf3Sryker		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
2735b37fcf3Sryker		&add($tmp1,$c);
2745b37fcf3Sryker		 &mov($c,0);
2755b37fcf3Sryker		&adc($c,$c);
2765b37fcf3Sryker		 &add($tmp1,$tmp2);
2775b37fcf3Sryker		&adc($c,0);
2785b37fcf3Sryker		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
2795b37fcf3Sryker		}
2805b37fcf3Sryker
2815b37fcf3Sryker	&comment("");
2825b37fcf3Sryker	&add($a,32);
2835b37fcf3Sryker	 &add($b,32);
2845b37fcf3Sryker	&add($r,32);
2855b37fcf3Sryker	 &sub($num,8);
2865b37fcf3Sryker	&jnz(&label("aw_loop"));
2875b37fcf3Sryker
2885b37fcf3Sryker	&set_label("aw_finish",0);
2895b37fcf3Sryker	&mov($num,&wparam(3));	# get num
2905b37fcf3Sryker	&and($num,7);
2915b37fcf3Sryker	 &jz(&label("aw_end"));
2925b37fcf3Sryker
2935b37fcf3Sryker	for ($i=0; $i<7; $i++)
2945b37fcf3Sryker		{
2955b37fcf3Sryker		&comment("Tail Round $i");
2965b37fcf3Sryker		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
2975b37fcf3Sryker		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
2985b37fcf3Sryker		&add($tmp1,$c);
2995b37fcf3Sryker		 &mov($c,0);
3005b37fcf3Sryker		&adc($c,$c);
3015b37fcf3Sryker		 &add($tmp1,$tmp2);
3025b37fcf3Sryker		&adc($c,0);
3035b37fcf3Sryker		 &dec($num) if ($i != 6);
304*da347917Sbeck		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
3055b37fcf3Sryker		 &jz(&label("aw_end")) if ($i != 6);
3065b37fcf3Sryker		}
3075b37fcf3Sryker	&set_label("aw_end",0);
3085b37fcf3Sryker
309913ec974Sbeck#	&mov("eax",$c);		# $c is "eax"
310913ec974Sbeck
311913ec974Sbeck	&function_end($name);
312913ec974Sbeck	}
313913ec974Sbeck
314913ec974Sbecksub bn_sub_words
315913ec974Sbeck	{
316913ec974Sbeck	local($name)=@_;
317913ec974Sbeck
318913ec974Sbeck	&function_begin($name,"");
319913ec974Sbeck
320913ec974Sbeck	&comment("");
321913ec974Sbeck	$a="esi";
322913ec974Sbeck	$b="edi";
323913ec974Sbeck	$c="eax";
324913ec974Sbeck	$r="ebx";
325913ec974Sbeck	$tmp1="ecx";
326913ec974Sbeck	$tmp2="edx";
327913ec974Sbeck	$num="ebp";
328913ec974Sbeck
329913ec974Sbeck	&mov($r,&wparam(0));	# get r
330913ec974Sbeck	 &mov($a,&wparam(1));	# get a
331913ec974Sbeck	&mov($b,&wparam(2));	# get b
332913ec974Sbeck	 &mov($num,&wparam(3));	# get num
333913ec974Sbeck	&xor($c,$c);		# clear carry
334913ec974Sbeck	 &and($num,0xfffffff8);	# num / 8
335913ec974Sbeck
336913ec974Sbeck	&jz(&label("aw_finish"));
337913ec974Sbeck
338913ec974Sbeck	&set_label("aw_loop",0);
339913ec974Sbeck	for ($i=0; $i<8; $i++)
340913ec974Sbeck		{
341913ec974Sbeck		&comment("Round $i");
342913ec974Sbeck
343913ec974Sbeck		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
344913ec974Sbeck		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
345913ec974Sbeck		&sub($tmp1,$c);
346913ec974Sbeck		 &mov($c,0);
347913ec974Sbeck		&adc($c,$c);
348913ec974Sbeck		 &sub($tmp1,$tmp2);
349913ec974Sbeck		&adc($c,0);
350913ec974Sbeck		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
351913ec974Sbeck		}
352913ec974Sbeck
353913ec974Sbeck	&comment("");
354913ec974Sbeck	&add($a,32);
355913ec974Sbeck	 &add($b,32);
356913ec974Sbeck	&add($r,32);
357913ec974Sbeck	 &sub($num,8);
358913ec974Sbeck	&jnz(&label("aw_loop"));
359913ec974Sbeck
360913ec974Sbeck	&set_label("aw_finish",0);
361913ec974Sbeck	&mov($num,&wparam(3));	# get num
362913ec974Sbeck	&and($num,7);
363913ec974Sbeck	 &jz(&label("aw_end"));
364913ec974Sbeck
365913ec974Sbeck	for ($i=0; $i<7; $i++)
366913ec974Sbeck		{
367913ec974Sbeck		&comment("Tail Round $i");
368913ec974Sbeck		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
369913ec974Sbeck		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
370913ec974Sbeck		&sub($tmp1,$c);
371913ec974Sbeck		 &mov($c,0);
372913ec974Sbeck		&adc($c,$c);
373913ec974Sbeck		 &sub($tmp1,$tmp2);
374913ec974Sbeck		&adc($c,0);
375913ec974Sbeck		 &dec($num) if ($i != 6);
376*da347917Sbeck		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
377913ec974Sbeck		 &jz(&label("aw_end")) if ($i != 6);
378913ec974Sbeck		}
379913ec974Sbeck	&set_label("aw_end",0);
380913ec974Sbeck
381913ec974Sbeck#	&mov("eax",$c);		# $c is "eax"
3825b37fcf3Sryker
3835b37fcf3Sryker	&function_end($name);
3845b37fcf3Sryker	}
3855b37fcf3Sryker
386*da347917Sbecksub bn_sub_part_words
387*da347917Sbeck	{
388*da347917Sbeck	local($name)=@_;
389*da347917Sbeck
390*da347917Sbeck	&function_begin($name,"");
391*da347917Sbeck
392*da347917Sbeck	&comment("");
393*da347917Sbeck	$a="esi";
394*da347917Sbeck	$b="edi";
395*da347917Sbeck	$c="eax";
396*da347917Sbeck	$r="ebx";
397*da347917Sbeck	$tmp1="ecx";
398*da347917Sbeck	$tmp2="edx";
399*da347917Sbeck	$num="ebp";
400*da347917Sbeck
401*da347917Sbeck	&mov($r,&wparam(0));	# get r
402*da347917Sbeck	 &mov($a,&wparam(1));	# get a
403*da347917Sbeck	&mov($b,&wparam(2));	# get b
404*da347917Sbeck	 &mov($num,&wparam(3));	# get num
405*da347917Sbeck	&xor($c,$c);		# clear carry
406*da347917Sbeck	 &and($num,0xfffffff8);	# num / 8
407*da347917Sbeck
408*da347917Sbeck	&jz(&label("aw_finish"));
409*da347917Sbeck
410*da347917Sbeck	&set_label("aw_loop",0);
411*da347917Sbeck	for ($i=0; $i<8; $i++)
412*da347917Sbeck		{
413*da347917Sbeck		&comment("Round $i");
414*da347917Sbeck
415*da347917Sbeck		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
416*da347917Sbeck		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
417*da347917Sbeck		&sub($tmp1,$c);
418*da347917Sbeck		 &mov($c,0);
419*da347917Sbeck		&adc($c,$c);
420*da347917Sbeck		 &sub($tmp1,$tmp2);
421*da347917Sbeck		&adc($c,0);
422*da347917Sbeck		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
423*da347917Sbeck		}
424*da347917Sbeck
425*da347917Sbeck	&comment("");
426*da347917Sbeck	&add($a,32);
427*da347917Sbeck	 &add($b,32);
428*da347917Sbeck	&add($r,32);
429*da347917Sbeck	 &sub($num,8);
430*da347917Sbeck	&jnz(&label("aw_loop"));
431*da347917Sbeck
432*da347917Sbeck	&set_label("aw_finish",0);
433*da347917Sbeck	&mov($num,&wparam(3));	# get num
434*da347917Sbeck	&and($num,7);
435*da347917Sbeck	 &jz(&label("aw_end"));
436*da347917Sbeck
437*da347917Sbeck	for ($i=0; $i<7; $i++)
438*da347917Sbeck		{
439*da347917Sbeck		&comment("Tail Round $i");
440*da347917Sbeck		&mov($tmp1,&DWP(0,$a,"",0));	# *a
441*da347917Sbeck		 &mov($tmp2,&DWP(0,$b,"",0));# *b
442*da347917Sbeck		&sub($tmp1,$c);
443*da347917Sbeck		 &mov($c,0);
444*da347917Sbeck		&adc($c,$c);
445*da347917Sbeck		 &sub($tmp1,$tmp2);
446*da347917Sbeck		&adc($c,0);
447*da347917Sbeck		&mov(&DWP(0,$r,"",0),$tmp1);	# *r
448*da347917Sbeck		&add($a, 4);
449*da347917Sbeck		&add($b, 4);
450*da347917Sbeck		&add($r, 4);
451*da347917Sbeck		 &dec($num) if ($i != 6);
452*da347917Sbeck		 &jz(&label("aw_end")) if ($i != 6);
453*da347917Sbeck		}
454*da347917Sbeck	&set_label("aw_end",0);
455*da347917Sbeck
456*da347917Sbeck	&cmp(&wparam(4),0);
457*da347917Sbeck	&je(&label("pw_end"));
458*da347917Sbeck
459*da347917Sbeck	&mov($num,&wparam(4));	# get dl
460*da347917Sbeck	&cmp($num,0);
461*da347917Sbeck	&je(&label("pw_end"));
462*da347917Sbeck	&jge(&label("pw_pos"));
463*da347917Sbeck
464*da347917Sbeck	&comment("pw_neg");
465*da347917Sbeck	&mov($tmp2,0);
466*da347917Sbeck	&sub($tmp2,$num);
467*da347917Sbeck	&mov($num,$tmp2);
468*da347917Sbeck	&and($num,0xfffffff8);	# num / 8
469*da347917Sbeck	&jz(&label("pw_neg_finish"));
470*da347917Sbeck
471*da347917Sbeck	&set_label("pw_neg_loop",0);
472*da347917Sbeck	for ($i=0; $i<8; $i++)
473*da347917Sbeck	{
474*da347917Sbeck	    &comment("dl<0 Round $i");
475*da347917Sbeck
476*da347917Sbeck	    &mov($tmp1,0);
477*da347917Sbeck	    &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
478*da347917Sbeck	    &sub($tmp1,$c);
479*da347917Sbeck	    &mov($c,0);
480*da347917Sbeck	    &adc($c,$c);
481*da347917Sbeck	    &sub($tmp1,$tmp2);
482*da347917Sbeck	    &adc($c,0);
483*da347917Sbeck	    &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
484*da347917Sbeck	}
485*da347917Sbeck
486*da347917Sbeck	&comment("");
487*da347917Sbeck	&add($b,32);
488*da347917Sbeck	&add($r,32);
489*da347917Sbeck	&sub($num,8);
490*da347917Sbeck	&jnz(&label("pw_neg_loop"));
491*da347917Sbeck
492*da347917Sbeck	&set_label("pw_neg_finish",0);
493*da347917Sbeck	&mov($tmp2,&wparam(4));	# get dl
494*da347917Sbeck	&mov($num,0);
495*da347917Sbeck	&sub($num,$tmp2);
496*da347917Sbeck	&and($num,7);
497*da347917Sbeck	&jz(&label("pw_end"));
498*da347917Sbeck
499*da347917Sbeck	for ($i=0; $i<7; $i++)
500*da347917Sbeck	{
501*da347917Sbeck	    &comment("dl<0 Tail Round $i");
502*da347917Sbeck	    &mov($tmp1,0);
503*da347917Sbeck	    &mov($tmp2,&DWP($i*4,$b,"",0));# *b
504*da347917Sbeck	    &sub($tmp1,$c);
505*da347917Sbeck	    &mov($c,0);
506*da347917Sbeck	    &adc($c,$c);
507*da347917Sbeck	    &sub($tmp1,$tmp2);
508*da347917Sbeck	    &adc($c,0);
509*da347917Sbeck	    &dec($num) if ($i != 6);
510*da347917Sbeck	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
511*da347917Sbeck	    &jz(&label("pw_end")) if ($i != 6);
512*da347917Sbeck	}
513*da347917Sbeck
514*da347917Sbeck	&jmp(&label("pw_end"));
515*da347917Sbeck
516*da347917Sbeck	&set_label("pw_pos",0);
517*da347917Sbeck
518*da347917Sbeck	&and($num,0xfffffff8);	# num / 8
519*da347917Sbeck	&jz(&label("pw_pos_finish"));
520*da347917Sbeck
521*da347917Sbeck	&set_label("pw_pos_loop",0);
522*da347917Sbeck
523*da347917Sbeck	for ($i=0; $i<8; $i++)
524*da347917Sbeck	{
525*da347917Sbeck	    &comment("dl>0 Round $i");
526*da347917Sbeck
527*da347917Sbeck	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
528*da347917Sbeck	    &sub($tmp1,$c);
529*da347917Sbeck	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
530*da347917Sbeck	    &jnc(&label("pw_nc".$i));
531*da347917Sbeck	}
532*da347917Sbeck
533*da347917Sbeck	&comment("");
534*da347917Sbeck	&add($a,32);
535*da347917Sbeck	&add($r,32);
536*da347917Sbeck	&sub($num,8);
537*da347917Sbeck	&jnz(&label("pw_pos_loop"));
538*da347917Sbeck
539*da347917Sbeck	&set_label("pw_pos_finish",0);
540*da347917Sbeck	&mov($num,&wparam(4));	# get dl
541*da347917Sbeck	&and($num,7);
542*da347917Sbeck	&jz(&label("pw_end"));
543*da347917Sbeck
544*da347917Sbeck	for ($i=0; $i<7; $i++)
545*da347917Sbeck	{
546*da347917Sbeck	    &comment("dl>0 Tail Round $i");
547*da347917Sbeck	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
548*da347917Sbeck	    &sub($tmp1,$c);
549*da347917Sbeck	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
550*da347917Sbeck	    &jnc(&label("pw_tail_nc".$i));
551*da347917Sbeck	    &dec($num) if ($i != 6);
552*da347917Sbeck	    &jz(&label("pw_end")) if ($i != 6);
553*da347917Sbeck	}
554*da347917Sbeck	&mov($c,1);
555*da347917Sbeck	&jmp(&label("pw_end"));
556*da347917Sbeck
557*da347917Sbeck	&set_label("pw_nc_loop",0);
558*da347917Sbeck	for ($i=0; $i<8; $i++)
559*da347917Sbeck	{
560*da347917Sbeck	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
561*da347917Sbeck	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
562*da347917Sbeck	    &set_label("pw_nc".$i,0);
563*da347917Sbeck	}
564*da347917Sbeck
565*da347917Sbeck	&comment("");
566*da347917Sbeck	&add($a,32);
567*da347917Sbeck	&add($r,32);
568*da347917Sbeck	&sub($num,8);
569*da347917Sbeck	&jnz(&label("pw_nc_loop"));
570*da347917Sbeck
571*da347917Sbeck	&mov($num,&wparam(4));	# get dl
572*da347917Sbeck	&and($num,7);
573*da347917Sbeck	&jz(&label("pw_nc_end"));
574*da347917Sbeck
575*da347917Sbeck	for ($i=0; $i<7; $i++)
576*da347917Sbeck	{
577*da347917Sbeck	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
578*da347917Sbeck	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
579*da347917Sbeck	    &set_label("pw_tail_nc".$i,0);
580*da347917Sbeck	    &dec($num) if ($i != 6);
581*da347917Sbeck	    &jz(&label("pw_nc_end")) if ($i != 6);
582*da347917Sbeck	}
583*da347917Sbeck
584*da347917Sbeck	&set_label("pw_nc_end",0);
585*da347917Sbeck	&mov($c,0);
586*da347917Sbeck
587*da347917Sbeck	&set_label("pw_end",0);
588*da347917Sbeck
589*da347917Sbeck#	&mov("eax",$c);		# $c is "eax"
590*da347917Sbeck
591*da347917Sbeck	&function_end($name);
592*da347917Sbeck	}
593*da347917Sbeck
594