xref: /openbsd/lib/libcrypto/bn/asm/bn-586.pl (revision 5b37fcf3)
1*5b37fcf3Sryker#!/usr/bin/perl
2*5b37fcf3Sryker#
3*5b37fcf3Sryker
4*5b37fcf3Sryker#!/usr/local/bin/perl
5*5b37fcf3Sryker
6*5b37fcf3Srykerpush(@INC,"perlasm","../../perlasm");
7*5b37fcf3Srykerrequire "x86asm.pl";
8*5b37fcf3Sryker
9*5b37fcf3Sryker&asm_init($ARGV[0],"bn-586.pl");
10*5b37fcf3Sryker
11*5b37fcf3Sryker&bn_mul_add_words("bn_mul_add_words");
12*5b37fcf3Sryker&bn_mul_words("bn_mul_words");
13*5b37fcf3Sryker&bn_sqr_words("bn_sqr_words");
14*5b37fcf3Sryker&bn_div64("bn_div64");
15*5b37fcf3Sryker&bn_add_words("bn_add_words");
16*5b37fcf3Sryker
17*5b37fcf3Sryker&asm_finish();
18*5b37fcf3Sryker
19*5b37fcf3Srykersub bn_mul_add_words
20*5b37fcf3Sryker	{
21*5b37fcf3Sryker	local($name)=@_;
22*5b37fcf3Sryker
23*5b37fcf3Sryker	&function_begin($name,"");
24*5b37fcf3Sryker
25*5b37fcf3Sryker	&comment("");
26*5b37fcf3Sryker	$Low="eax";
27*5b37fcf3Sryker	$High="edx";
28*5b37fcf3Sryker	$a="ebx";
29*5b37fcf3Sryker	$w="ebp";
30*5b37fcf3Sryker	$r="edi";
31*5b37fcf3Sryker	$c="esi";
32*5b37fcf3Sryker
33*5b37fcf3Sryker	&xor($c,$c);		# clear carry
34*5b37fcf3Sryker	&mov($r,&wparam(0));	#
35*5b37fcf3Sryker
36*5b37fcf3Sryker	&mov("ecx",&wparam(2));	#
37*5b37fcf3Sryker	&mov($a,&wparam(1));	#
38*5b37fcf3Sryker
39*5b37fcf3Sryker	&and("ecx",0xfffffff8);	# num / 8
40*5b37fcf3Sryker	&mov($w,&wparam(3));	#
41*5b37fcf3Sryker
42*5b37fcf3Sryker	&push("ecx");		# Up the stack for a tmp variable
43*5b37fcf3Sryker
44*5b37fcf3Sryker	&jz(&label("maw_finish"));
45*5b37fcf3Sryker
46*5b37fcf3Sryker	&set_label("maw_loop",0);
47*5b37fcf3Sryker
48*5b37fcf3Sryker	&mov(&swtmp(0),"ecx");	#
49*5b37fcf3Sryker
50*5b37fcf3Sryker	for ($i=0; $i<32; $i+=4)
51*5b37fcf3Sryker		{
52*5b37fcf3Sryker		&comment("Round $i");
53*5b37fcf3Sryker
54*5b37fcf3Sryker		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
55*5b37fcf3Sryker		&mul($w);			# *a * w
56*5b37fcf3Sryker		&add("eax",$c);		# L(t)+= *r
57*5b37fcf3Sryker		 &mov($c,&DWP($i,$r,"",0));	# L(t)+= *r
58*5b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
59*5b37fcf3Sryker		 &add("eax",$c);		# L(t)+=c
60*5b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
61*5b37fcf3Sryker		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
62*5b37fcf3Sryker		&mov($c,"edx");			# c=  H(t);
63*5b37fcf3Sryker		}
64*5b37fcf3Sryker
65*5b37fcf3Sryker	&comment("");
66*5b37fcf3Sryker	&mov("ecx",&swtmp(0));	#
67*5b37fcf3Sryker	&add($a,32);
68*5b37fcf3Sryker	&add($r,32);
69*5b37fcf3Sryker	&sub("ecx",8);
70*5b37fcf3Sryker	&jnz(&label("maw_loop"));
71*5b37fcf3Sryker
72*5b37fcf3Sryker	&set_label("maw_finish",0);
73*5b37fcf3Sryker	&mov("ecx",&wparam(2));	# get num
74*5b37fcf3Sryker	&and("ecx",7);
75*5b37fcf3Sryker	&jnz(&label("maw_finish2"));	# helps branch prediction
76*5b37fcf3Sryker	&jmp(&label("maw_end"));
77*5b37fcf3Sryker
78*5b37fcf3Sryker	&set_label("maw_finish2",1);
79*5b37fcf3Sryker	for ($i=0; $i<7; $i++)
80*5b37fcf3Sryker		{
81*5b37fcf3Sryker		&comment("Tail Round $i");
82*5b37fcf3Sryker		 &mov("eax",&DWP($i*4,$a,"",0));# *a
83*5b37fcf3Sryker		&mul($w);			# *a * w
84*5b37fcf3Sryker		&add("eax",$c);			# L(t)+=c
85*5b37fcf3Sryker		 &mov($c,&DWP($i*4,$r,"",0));	# L(t)+= *r
86*5b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
87*5b37fcf3Sryker		 &add("eax",$c);
88*5b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
89*5b37fcf3Sryker		 &dec("ecx") if ($i != 7-1);
90*5b37fcf3Sryker		&mov(&DWP($i*4,$r,"",0),"eax");	# *r= L(t);
91*5b37fcf3Sryker		 &mov($c,"edx");			# c=  H(t);
92*5b37fcf3Sryker		&jz(&label("maw_end")) if ($i != 7-1);
93*5b37fcf3Sryker		}
94*5b37fcf3Sryker	&set_label("maw_end",0);
95*5b37fcf3Sryker	&mov("eax",$c);
96*5b37fcf3Sryker
97*5b37fcf3Sryker	&pop("ecx");	# clear variable from
98*5b37fcf3Sryker
99*5b37fcf3Sryker	&function_end($name);
100*5b37fcf3Sryker	}
101*5b37fcf3Sryker
102*5b37fcf3Srykersub bn_mul_words
103*5b37fcf3Sryker	{
104*5b37fcf3Sryker	local($name)=@_;
105*5b37fcf3Sryker
106*5b37fcf3Sryker	&function_begin($name,"");
107*5b37fcf3Sryker
108*5b37fcf3Sryker	&comment("");
109*5b37fcf3Sryker	$Low="eax";
110*5b37fcf3Sryker	$High="edx";
111*5b37fcf3Sryker	$a="ebx";
112*5b37fcf3Sryker	$w="ecx";
113*5b37fcf3Sryker	$r="edi";
114*5b37fcf3Sryker	$c="esi";
115*5b37fcf3Sryker	$num="ebp";
116*5b37fcf3Sryker
117*5b37fcf3Sryker	&xor($c,$c);		# clear carry
118*5b37fcf3Sryker	&mov($r,&wparam(0));	#
119*5b37fcf3Sryker	&mov($a,&wparam(1));	#
120*5b37fcf3Sryker	&mov($num,&wparam(2));	#
121*5b37fcf3Sryker	&mov($w,&wparam(3));	#
122*5b37fcf3Sryker
123*5b37fcf3Sryker	&and($num,0xfffffff8);	# num / 8
124*5b37fcf3Sryker	&jz(&label("mw_finish"));
125*5b37fcf3Sryker
126*5b37fcf3Sryker	&set_label("mw_loop",0);
127*5b37fcf3Sryker	for ($i=0; $i<32; $i+=4)
128*5b37fcf3Sryker		{
129*5b37fcf3Sryker		&comment("Round $i");
130*5b37fcf3Sryker
131*5b37fcf3Sryker		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
132*5b37fcf3Sryker		&mul($w);			# *a * w
133*5b37fcf3Sryker		&add("eax",$c);			# L(t)+=c
134*5b37fcf3Sryker		 # XXX
135*5b37fcf3Sryker
136*5b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
137*5b37fcf3Sryker		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
138*5b37fcf3Sryker
139*5b37fcf3Sryker		&mov($c,"edx");			# c=  H(t);
140*5b37fcf3Sryker		}
141*5b37fcf3Sryker
142*5b37fcf3Sryker	&comment("");
143*5b37fcf3Sryker	&add($a,32);
144*5b37fcf3Sryker	&add($r,32);
145*5b37fcf3Sryker	&sub($num,8);
146*5b37fcf3Sryker	&jz(&label("mw_finish"));
147*5b37fcf3Sryker	&jmp(&label("mw_loop"));
148*5b37fcf3Sryker
149*5b37fcf3Sryker	&set_label("mw_finish",0);
150*5b37fcf3Sryker	&mov($num,&wparam(2));	# get num
151*5b37fcf3Sryker	&and($num,7);
152*5b37fcf3Sryker	&jnz(&label("mw_finish2"));
153*5b37fcf3Sryker	&jmp(&label("mw_end"));
154*5b37fcf3Sryker
155*5b37fcf3Sryker	&set_label("mw_finish2",1);
156*5b37fcf3Sryker	for ($i=0; $i<7; $i++)
157*5b37fcf3Sryker		{
158*5b37fcf3Sryker		&comment("Tail Round $i");
159*5b37fcf3Sryker		 &mov("eax",&DWP($i*4,$a,"",0));# *a
160*5b37fcf3Sryker		&mul($w);			# *a * w
161*5b37fcf3Sryker		&add("eax",$c);			# L(t)+=c
162*5b37fcf3Sryker		 # XXX
163*5b37fcf3Sryker		&adc("edx",0);			# H(t)+=carry
164*5b37fcf3Sryker		 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
165*5b37fcf3Sryker		&mov($c,"edx");			# c=  H(t);
166*5b37fcf3Sryker		 &dec($num) if ($i != 7-1);
167*5b37fcf3Sryker		&jz(&label("mw_end")) if ($i != 7-1);
168*5b37fcf3Sryker		}
169*5b37fcf3Sryker	&set_label("mw_end",0);
170*5b37fcf3Sryker	&mov("eax",$c);
171*5b37fcf3Sryker
172*5b37fcf3Sryker	&function_end($name);
173*5b37fcf3Sryker	}
174*5b37fcf3Sryker
175*5b37fcf3Srykersub bn_sqr_words
176*5b37fcf3Sryker	{
177*5b37fcf3Sryker	local($name)=@_;
178*5b37fcf3Sryker
179*5b37fcf3Sryker	&function_begin($name,"");
180*5b37fcf3Sryker
181*5b37fcf3Sryker	&comment("");
182*5b37fcf3Sryker	$r="esi";
183*5b37fcf3Sryker	$a="edi";
184*5b37fcf3Sryker	$num="ebx";
185*5b37fcf3Sryker
186*5b37fcf3Sryker	&mov($r,&wparam(0));	#
187*5b37fcf3Sryker	&mov($a,&wparam(1));	#
188*5b37fcf3Sryker	&mov($num,&wparam(2));	#
189*5b37fcf3Sryker
190*5b37fcf3Sryker	&and($num,0xfffffff8);	# num / 8
191*5b37fcf3Sryker	&jz(&label("sw_finish"));
192*5b37fcf3Sryker
193*5b37fcf3Sryker	&set_label("sw_loop",0);
194*5b37fcf3Sryker	for ($i=0; $i<32; $i+=4)
195*5b37fcf3Sryker		{
196*5b37fcf3Sryker		&comment("Round $i");
197*5b37fcf3Sryker		&mov("eax",&DWP($i,$a,"",0)); 	# *a
198*5b37fcf3Sryker		 # XXX
199*5b37fcf3Sryker		&mul("eax");			# *a * *a
200*5b37fcf3Sryker		&mov(&DWP($i*2,$r,"",0),"eax");	#
201*5b37fcf3Sryker		 &mov(&DWP($i*2+4,$r,"",0),"edx");#
202*5b37fcf3Sryker		}
203*5b37fcf3Sryker
204*5b37fcf3Sryker	&comment("");
205*5b37fcf3Sryker	&add($a,32);
206*5b37fcf3Sryker	&add($r,64);
207*5b37fcf3Sryker	&sub($num,8);
208*5b37fcf3Sryker	&jnz(&label("sw_loop"));
209*5b37fcf3Sryker
210*5b37fcf3Sryker	&set_label("sw_finish",0);
211*5b37fcf3Sryker	&mov($num,&wparam(2));	# get num
212*5b37fcf3Sryker	&and($num,7);
213*5b37fcf3Sryker	&jz(&label("sw_end"));
214*5b37fcf3Sryker
215*5b37fcf3Sryker	for ($i=0; $i<7; $i++)
216*5b37fcf3Sryker		{
217*5b37fcf3Sryker		&comment("Tail Round $i");
218*5b37fcf3Sryker		&mov("eax",&DWP($i*4,$a,"",0));	# *a
219*5b37fcf3Sryker		 # XXX
220*5b37fcf3Sryker		&mul("eax");			# *a * *a
221*5b37fcf3Sryker		&mov(&DWP($i*8,$r,"",0),"eax");	#
222*5b37fcf3Sryker		 &dec($num) if ($i != 7-1);
223*5b37fcf3Sryker		&mov(&DWP($i*8+4,$r,"",0),"edx");
224*5b37fcf3Sryker		 &jz(&label("sw_end")) if ($i != 7-1);
225*5b37fcf3Sryker		}
226*5b37fcf3Sryker	&set_label("sw_end",0);
227*5b37fcf3Sryker
228*5b37fcf3Sryker	&function_end($name);
229*5b37fcf3Sryker	}
230*5b37fcf3Sryker
231*5b37fcf3Srykersub bn_div64
232*5b37fcf3Sryker	{
233*5b37fcf3Sryker	local($name)=@_;
234*5b37fcf3Sryker
235*5b37fcf3Sryker	&function_begin($name,"");
236*5b37fcf3Sryker	&mov("edx",&wparam(0));	#
237*5b37fcf3Sryker	&mov("eax",&wparam(1));	#
238*5b37fcf3Sryker	&mov("ebx",&wparam(2));	#
239*5b37fcf3Sryker	&div("ebx");
240*5b37fcf3Sryker	&function_end($name);
241*5b37fcf3Sryker	}
242*5b37fcf3Sryker
243*5b37fcf3Srykersub bn_add_words
244*5b37fcf3Sryker	{
245*5b37fcf3Sryker	local($name)=@_;
246*5b37fcf3Sryker
247*5b37fcf3Sryker	&function_begin($name,"");
248*5b37fcf3Sryker
249*5b37fcf3Sryker	&comment("");
250*5b37fcf3Sryker	$a="esi";
251*5b37fcf3Sryker	$b="edi";
252*5b37fcf3Sryker	$c="eax";
253*5b37fcf3Sryker	$r="ebx";
254*5b37fcf3Sryker	$tmp1="ecx";
255*5b37fcf3Sryker	$tmp2="edx";
256*5b37fcf3Sryker	$num="ebp";
257*5b37fcf3Sryker
258*5b37fcf3Sryker	&mov($r,&wparam(0));	# get r
259*5b37fcf3Sryker	 &mov($a,&wparam(1));	# get a
260*5b37fcf3Sryker	&mov($b,&wparam(2));	# get b
261*5b37fcf3Sryker	 &mov($num,&wparam(3));	# get num
262*5b37fcf3Sryker	&xor($c,$c);		# clear carry
263*5b37fcf3Sryker	 &and($num,0xfffffff8);	# num / 8
264*5b37fcf3Sryker
265*5b37fcf3Sryker	&jz(&label("aw_finish"));
266*5b37fcf3Sryker
267*5b37fcf3Sryker	&set_label("aw_loop",0);
268*5b37fcf3Sryker	for ($i=0; $i<8; $i++)
269*5b37fcf3Sryker		{
270*5b37fcf3Sryker		&comment("Round $i");
271*5b37fcf3Sryker
272*5b37fcf3Sryker		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
273*5b37fcf3Sryker		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
274*5b37fcf3Sryker		&add($tmp1,$c);
275*5b37fcf3Sryker		 &mov($c,0);
276*5b37fcf3Sryker		&adc($c,$c);
277*5b37fcf3Sryker		 &add($tmp1,$tmp2);
278*5b37fcf3Sryker		&adc($c,0);
279*5b37fcf3Sryker		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
280*5b37fcf3Sryker		}
281*5b37fcf3Sryker
282*5b37fcf3Sryker	&comment("");
283*5b37fcf3Sryker	&add($a,32);
284*5b37fcf3Sryker	 &add($b,32);
285*5b37fcf3Sryker	&add($r,32);
286*5b37fcf3Sryker	 &sub($num,8);
287*5b37fcf3Sryker	&jnz(&label("aw_loop"));
288*5b37fcf3Sryker
289*5b37fcf3Sryker	&set_label("aw_finish",0);
290*5b37fcf3Sryker	&mov($num,&wparam(3));	# get num
291*5b37fcf3Sryker	&and($num,7);
292*5b37fcf3Sryker	 &jz(&label("aw_end"));
293*5b37fcf3Sryker
294*5b37fcf3Sryker	for ($i=0; $i<7; $i++)
295*5b37fcf3Sryker		{
296*5b37fcf3Sryker		&comment("Tail Round $i");
297*5b37fcf3Sryker		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
298*5b37fcf3Sryker		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
299*5b37fcf3Sryker		&add($tmp1,$c);
300*5b37fcf3Sryker		 &mov($c,0);
301*5b37fcf3Sryker		&adc($c,$c);
302*5b37fcf3Sryker		 &add($tmp1,$tmp2);
303*5b37fcf3Sryker		&adc($c,0);
304*5b37fcf3Sryker		 &dec($num) if ($i != 6);
305*5b37fcf3Sryker		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *a
306*5b37fcf3Sryker		 &jz(&label("aw_end")) if ($i != 6);
307*5b37fcf3Sryker		}
308*5b37fcf3Sryker	&set_label("aw_end",0);
309*5b37fcf3Sryker
310*5b37fcf3Sryker	&mov("eax",$c);
311*5b37fcf3Sryker
312*5b37fcf3Sryker	&function_end($name);
313*5b37fcf3Sryker	}
314*5b37fcf3Sryker
315