xref: /freebsd/crypto/openssl/crypto/des/asm/des-586.pl (revision 10ff414c)
1#! /usr/bin/env perl
2# Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9# The inner loop instruction sequence and the IP/FP modifications are from
10# Svend Olaf Mikkelsen.
11
12$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
13push(@INC,"${dir}","${dir}../../perlasm");
14require "x86asm.pl";
15require "cbc.pl";
16require "desboth.pl";
17
18# base code is in Microsoft
19# op dest, source
20# format.
21#
22
23$output=pop;
24open STDOUT,">$output";
25
26&asm_init($ARGV[0]);
27
28$L="edi";
29$R="esi";
30$trans="ebp";
31$small_footprint=1 if (grep(/\-DOPENSSL_SMALL_FOOTPRINT/,@ARGV));
32# one can discuss setting this variable to 1 unconditionally, as
33# the folded loop is only 3% slower than unrolled, but >7 times smaller
34
35&public_label("DES_SPtrans");
36&static_label("des_sptrans");
37
38&DES_encrypt_internal();
39&DES_decrypt_internal();
40&DES_encrypt("DES_encrypt1",1);
41&DES_encrypt("DES_encrypt2",0);
42&DES_encrypt3("DES_encrypt3",1);
43&DES_encrypt3("DES_decrypt3",0);
44&cbc("DES_ncbc_encrypt","DES_encrypt1","DES_encrypt1",0,4,5,3,5,-1);
45&cbc("DES_ede3_cbc_encrypt","DES_encrypt3","DES_decrypt3",0,6,7,3,4,5);
46&DES_SPtrans();
47
48&asm_finish();
49
50close STDOUT or die "error closing STDOUT: $!";
51
52sub DES_encrypt_internal()
53	{
54	&function_begin_B("_x86_DES_encrypt");
55
56	if ($small_footprint)
57	    {
58	    &lea("edx",&DWP(128,"ecx"));
59	    &push("edx");
60	    &push("ecx");
61	    &set_label("eloop");
62		&D_ENCRYPT(0,$L,$R,0,$trans,"eax","ebx","ecx","edx",&swtmp(0));
63		&comment("");
64		&D_ENCRYPT(1,$R,$L,2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
65		&comment("");
66		&add("ecx",16);
67		&cmp("ecx",&swtmp(1));
68		&mov(&swtmp(0),"ecx");
69		&jb(&label("eloop"));
70	    &add("esp",8);
71	    }
72	else
73	    {
74	    &push("ecx");
75	    for ($i=0; $i<16; $i+=2)
76		{
77		&comment("Round $i");
78		&D_ENCRYPT($i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
79		&comment("Round ".sprintf("%d",$i+1));
80		&D_ENCRYPT($i+1,$R,$L,($i+1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
81		}
82	    &add("esp",4);
83	}
84	&ret();
85
86	&function_end_B("_x86_DES_encrypt");
87	}
88
89sub DES_decrypt_internal()
90	{
91	&function_begin_B("_x86_DES_decrypt");
92
93	if ($small_footprint)
94	    {
95	    &push("ecx");
96	    &lea("ecx",&DWP(128,"ecx"));
97	    &push("ecx");
98	    &set_label("dloop");
99		&D_ENCRYPT(0,$L,$R,-2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
100		&comment("");
101		&D_ENCRYPT(1,$R,$L,-4,$trans,"eax","ebx","ecx","edx",&swtmp(0));
102		&comment("");
103		&sub("ecx",16);
104		&cmp("ecx",&swtmp(1));
105		&mov(&swtmp(0),"ecx");
106		&ja(&label("dloop"));
107	    &add("esp",8);
108	    }
109	else
110	    {
111	    &push("ecx");
112	    for ($i=15; $i>0; $i-=2)
113		{
114		&comment("Round $i");
115		&D_ENCRYPT(15-$i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
116		&comment("Round ".sprintf("%d",$i-1));
117		&D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0));
118		}
119	    &add("esp",4);
120	    }
121	&ret();
122
123	&function_end_B("_x86_DES_decrypt");
124	}
125
126sub DES_encrypt
127	{
128	local($name,$do_ip)=@_;
129
130	&function_begin_B($name);
131
132	&push("esi");
133	&push("edi");
134
135	&comment("");
136	&comment("Load the 2 words");
137
138	if ($do_ip)
139		{
140		&mov($R,&wparam(0));
141		 &xor(	"ecx",		"ecx"		);
142
143		&push("ebx");
144		&push("ebp");
145
146		&mov("eax",&DWP(0,$R,"",0));
147		 &mov("ebx",&wparam(2));	# get encrypt flag
148		&mov($L,&DWP(4,$R,"",0));
149		&comment("");
150		&comment("IP");
151		&IP_new("eax",$L,$R,3);
152		}
153	else
154		{
155		&mov("eax",&wparam(0));
156		 &xor(	"ecx",		"ecx"		);
157
158		&push("ebx");
159		&push("ebp");
160
161		&mov($R,&DWP(0,"eax","",0));
162		 &mov("ebx",&wparam(2));	# get encrypt flag
163		&rotl($R,3);
164		&mov($L,&DWP(4,"eax","",0));
165		&rotl($L,3);
166		}
167
168	# PIC-ification:-)
169	&call	(&label("pic_point"));
170	&set_label("pic_point");
171	&blindpop($trans);
172	&lea	($trans,&DWP(&label("des_sptrans")."-".&label("pic_point"),$trans));
173
174	&mov(	"ecx",	&wparam(1)	);
175
176	&cmp("ebx","0");
177	&je(&label("decrypt"));
178	&call("_x86_DES_encrypt");
179	&jmp(&label("done"));
180	&set_label("decrypt");
181	&call("_x86_DES_decrypt");
182	&set_label("done");
183
184	if ($do_ip)
185		{
186		&comment("");
187		&comment("FP");
188		&mov("edx",&wparam(0));
189		&FP_new($L,$R,"eax",3);
190
191		&mov(&DWP(0,"edx","",0),"eax");
192		&mov(&DWP(4,"edx","",0),$R);
193		}
194	else
195		{
196		&comment("");
197		&comment("Fixup");
198		&rotr($L,3);		# r
199		 &mov("eax",&wparam(0));
200		&rotr($R,3);		# l
201		 &mov(&DWP(0,"eax","",0),$L);
202		 &mov(&DWP(4,"eax","",0),$R);
203		}
204
205	&pop("ebp");
206	&pop("ebx");
207	&pop("edi");
208	&pop("esi");
209	&ret();
210
211	&function_end_B($name);
212	}
213
214sub D_ENCRYPT
215	{
216	local($r,$L,$R,$S,$trans,$u,$tmp1,$tmp2,$t,$wp1)=@_;
217
218	 &mov(	$u,		&DWP(&n2a($S*4),$tmp2,"",0));
219	&xor(	$tmp1,		$tmp1);
220	 &mov(	$t,		&DWP(&n2a(($S+1)*4),$tmp2,"",0));
221	&xor(	$u,		$R);
222	&xor(	$tmp2,		$tmp2);
223	 &xor(	$t,		$R);
224	&and(	$u,		"0xfcfcfcfc"	);
225	 &and(	$t,		"0xcfcfcfcf"	);
226	&movb(	&LB($tmp1),	&LB($u)	);
227	 &movb(	&LB($tmp2),	&HB($u)	);
228	&rotr(	$t,		4		);
229	&xor(	$L,		&DWP("     ",$trans,$tmp1,0));
230	 &movb(	&LB($tmp1),	&LB($t)	);
231	 &xor(	$L,		&DWP("0x200",$trans,$tmp2,0));
232	 &movb(	&LB($tmp2),	&HB($t)	);
233	&shr(	$u,		16);
234	 &xor(	$L,		&DWP("0x100",$trans,$tmp1,0));
235	 &movb(	&LB($tmp1),	&HB($u)	);
236	&shr(	$t,		16);
237	 &xor(	$L,		&DWP("0x300",$trans,$tmp2,0));
238	&movb(	&LB($tmp2),	&HB($t)	);
239	 &and(	$u,		"0xff"	);
240	&and(	$t,		"0xff"	);
241	 &xor(	$L,		&DWP("0x600",$trans,$tmp1,0));
242	 &xor(	$L,		&DWP("0x700",$trans,$tmp2,0));
243	&mov(	$tmp2,		$wp1	);
244	 &xor(	$L,		&DWP("0x400",$trans,$u,0));
245	 &xor(	$L,		&DWP("0x500",$trans,$t,0));
246	}
247
248sub n2a
249	{
250	sprintf("%d",$_[0]);
251	}
252
253# now has a side affect of rotating $a by $shift
254sub R_PERM_OP
255	{
256	local($a,$b,$tt,$shift,$mask,$last)=@_;
257
258	&rotl(	$a,		$shift		) if ($shift != 0);
259	&mov(	$tt,		$a		);
260	&xor(	$a,		$b		);
261	&and(	$a,		$mask		);
262	# This can never succeed, and besides it is difficult to see what the
263	# idea was - Ben 13 Feb 99
264	if (!$last eq $b)
265		{
266		&xor(	$b,		$a		);
267		&xor(	$tt,		$a		);
268		}
269	else
270		{
271		&xor(	$tt,		$a		);
272		&xor(	$b,		$a		);
273		}
274	&comment("");
275	}
276
277sub IP_new
278	{
279	local($l,$r,$tt,$lr)=@_;
280
281	&R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l);
282	&R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l);
283	&R_PERM_OP($l,$tt,$r,14,"0x33333333",$r);
284	&R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r);
285	&R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r);
286
287	if ($lr != 3)
288		{
289		if (($lr-3) < 0)
290			{ &rotr($tt,	3-$lr); }
291		else	{ &rotl($tt,	$lr-3); }
292		}
293	if ($lr != 2)
294		{
295		if (($lr-2) < 0)
296			{ &rotr($r,	2-$lr); }
297		else	{ &rotl($r,	$lr-2); }
298		}
299	}
300
301sub FP_new
302	{
303	local($l,$r,$tt,$lr)=@_;
304
305	if ($lr != 2)
306		{
307		if (($lr-2) < 0)
308			{ &rotl($r,	2-$lr); }
309		else	{ &rotr($r,	$lr-2); }
310		}
311	if ($lr != 3)
312		{
313		if (($lr-3) < 0)
314			{ &rotl($l,	3-$lr); }
315		else	{ &rotr($l,	$lr-3); }
316		}
317
318	&R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r);
319	&R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r);
320	&R_PERM_OP($l,$r,$tt,10,"0x33333333",$l);
321	&R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l);
322	&R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r);
323	&rotr($tt	, 4);
324	}
325
326sub DES_SPtrans
327	{
328	&set_label("DES_SPtrans",64);
329	&set_label("des_sptrans");
330	&data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802);
331	&data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002);
332	&data_word(0x00080802, 0x02080800, 0x02080000, 0x00000802);
333	&data_word(0x02000802, 0x02000000, 0x00000000, 0x00080002);
334	&data_word(0x00080000, 0x00000002, 0x02000800, 0x00080800);
335	&data_word(0x02080802, 0x02080000, 0x00000802, 0x02000800);
336	&data_word(0x00000002, 0x00000800, 0x00080800, 0x02080002);
337	&data_word(0x00000800, 0x02000802, 0x02080002, 0x00000000);
338	&data_word(0x00000000, 0x02080802, 0x02000800, 0x00080002);
339	&data_word(0x02080800, 0x00080000, 0x00000802, 0x02000800);
340	&data_word(0x02080002, 0x00000800, 0x00080800, 0x02000002);
341	&data_word(0x00080802, 0x00000002, 0x02000002, 0x02080000);
342	&data_word(0x02080802, 0x00080800, 0x02080000, 0x02000802);
343	&data_word(0x02000000, 0x00000802, 0x00080002, 0x00000000);
344	&data_word(0x00080000, 0x02000000, 0x02000802, 0x02080800);
345	&data_word(0x00000002, 0x02080002, 0x00000800, 0x00080802);
346	# nibble 1
347	&data_word(0x40108010, 0x00000000, 0x00108000, 0x40100000);
348	&data_word(0x40000010, 0x00008010, 0x40008000, 0x00108000);
349	&data_word(0x00008000, 0x40100010, 0x00000010, 0x40008000);
350	&data_word(0x00100010, 0x40108000, 0x40100000, 0x00000010);
351	&data_word(0x00100000, 0x40008010, 0x40100010, 0x00008000);
352	&data_word(0x00108010, 0x40000000, 0x00000000, 0x00100010);
353	&data_word(0x40008010, 0x00108010, 0x40108000, 0x40000010);
354	&data_word(0x40000000, 0x00100000, 0x00008010, 0x40108010);
355	&data_word(0x00100010, 0x40108000, 0x40008000, 0x00108010);
356	&data_word(0x40108010, 0x00100010, 0x40000010, 0x00000000);
357	&data_word(0x40000000, 0x00008010, 0x00100000, 0x40100010);
358	&data_word(0x00008000, 0x40000000, 0x00108010, 0x40008010);
359	&data_word(0x40108000, 0x00008000, 0x00000000, 0x40000010);
360	&data_word(0x00000010, 0x40108010, 0x00108000, 0x40100000);
361	&data_word(0x40100010, 0x00100000, 0x00008010, 0x40008000);
362	&data_word(0x40008010, 0x00000010, 0x40100000, 0x00108000);
363	# nibble 2
364	&data_word(0x04000001, 0x04040100, 0x00000100, 0x04000101);
365	&data_word(0x00040001, 0x04000000, 0x04000101, 0x00040100);
366	&data_word(0x04000100, 0x00040000, 0x04040000, 0x00000001);
367	&data_word(0x04040101, 0x00000101, 0x00000001, 0x04040001);
368	&data_word(0x00000000, 0x00040001, 0x04040100, 0x00000100);
369	&data_word(0x00000101, 0x04040101, 0x00040000, 0x04000001);
370	&data_word(0x04040001, 0x04000100, 0x00040101, 0x04040000);
371	&data_word(0x00040100, 0x00000000, 0x04000000, 0x00040101);
372	&data_word(0x04040100, 0x00000100, 0x00000001, 0x00040000);
373	&data_word(0x00000101, 0x00040001, 0x04040000, 0x04000101);
374	&data_word(0x00000000, 0x04040100, 0x00040100, 0x04040001);
375	&data_word(0x00040001, 0x04000000, 0x04040101, 0x00000001);
376	&data_word(0x00040101, 0x04000001, 0x04000000, 0x04040101);
377	&data_word(0x00040000, 0x04000100, 0x04000101, 0x00040100);
378	&data_word(0x04000100, 0x00000000, 0x04040001, 0x00000101);
379	&data_word(0x04000001, 0x00040101, 0x00000100, 0x04040000);
380	# nibble 3
381	&data_word(0x00401008, 0x10001000, 0x00000008, 0x10401008);
382	&data_word(0x00000000, 0x10400000, 0x10001008, 0x00400008);
383	&data_word(0x10401000, 0x10000008, 0x10000000, 0x00001008);
384	&data_word(0x10000008, 0x00401008, 0x00400000, 0x10000000);
385	&data_word(0x10400008, 0x00401000, 0x00001000, 0x00000008);
386	&data_word(0x00401000, 0x10001008, 0x10400000, 0x00001000);
387	&data_word(0x00001008, 0x00000000, 0x00400008, 0x10401000);
388	&data_word(0x10001000, 0x10400008, 0x10401008, 0x00400000);
389	&data_word(0x10400008, 0x00001008, 0x00400000, 0x10000008);
390	&data_word(0x00401000, 0x10001000, 0x00000008, 0x10400000);
391	&data_word(0x10001008, 0x00000000, 0x00001000, 0x00400008);
392	&data_word(0x00000000, 0x10400008, 0x10401000, 0x00001000);
393	&data_word(0x10000000, 0x10401008, 0x00401008, 0x00400000);
394	&data_word(0x10401008, 0x00000008, 0x10001000, 0x00401008);
395	&data_word(0x00400008, 0x00401000, 0x10400000, 0x10001008);
396	&data_word(0x00001008, 0x10000000, 0x10000008, 0x10401000);
397	# nibble 4
398	&data_word(0x08000000, 0x00010000, 0x00000400, 0x08010420);
399	&data_word(0x08010020, 0x08000400, 0x00010420, 0x08010000);
400	&data_word(0x00010000, 0x00000020, 0x08000020, 0x00010400);
401	&data_word(0x08000420, 0x08010020, 0x08010400, 0x00000000);
402	&data_word(0x00010400, 0x08000000, 0x00010020, 0x00000420);
403	&data_word(0x08000400, 0x00010420, 0x00000000, 0x08000020);
404	&data_word(0x00000020, 0x08000420, 0x08010420, 0x00010020);
405	&data_word(0x08010000, 0x00000400, 0x00000420, 0x08010400);
406	&data_word(0x08010400, 0x08000420, 0x00010020, 0x08010000);
407	&data_word(0x00010000, 0x00000020, 0x08000020, 0x08000400);
408	&data_word(0x08000000, 0x00010400, 0x08010420, 0x00000000);
409	&data_word(0x00010420, 0x08000000, 0x00000400, 0x00010020);
410	&data_word(0x08000420, 0x00000400, 0x00000000, 0x08010420);
411	&data_word(0x08010020, 0x08010400, 0x00000420, 0x00010000);
412	&data_word(0x00010400, 0x08010020, 0x08000400, 0x00000420);
413	&data_word(0x00000020, 0x00010420, 0x08010000, 0x08000020);
414	# nibble 5
415	&data_word(0x80000040, 0x00200040, 0x00000000, 0x80202000);
416	&data_word(0x00200040, 0x00002000, 0x80002040, 0x00200000);
417	&data_word(0x00002040, 0x80202040, 0x00202000, 0x80000000);
418	&data_word(0x80002000, 0x80000040, 0x80200000, 0x00202040);
419	&data_word(0x00200000, 0x80002040, 0x80200040, 0x00000000);
420	&data_word(0x00002000, 0x00000040, 0x80202000, 0x80200040);
421	&data_word(0x80202040, 0x80200000, 0x80000000, 0x00002040);
422	&data_word(0x00000040, 0x00202000, 0x00202040, 0x80002000);
423	&data_word(0x00002040, 0x80000000, 0x80002000, 0x00202040);
424	&data_word(0x80202000, 0x00200040, 0x00000000, 0x80002000);
425	&data_word(0x80000000, 0x00002000, 0x80200040, 0x00200000);
426	&data_word(0x00200040, 0x80202040, 0x00202000, 0x00000040);
427	&data_word(0x80202040, 0x00202000, 0x00200000, 0x80002040);
428	&data_word(0x80000040, 0x80200000, 0x00202040, 0x00000000);
429	&data_word(0x00002000, 0x80000040, 0x80002040, 0x80202000);
430	&data_word(0x80200000, 0x00002040, 0x00000040, 0x80200040);
431	# nibble 6
432	&data_word(0x00004000, 0x00000200, 0x01000200, 0x01000004);
433	&data_word(0x01004204, 0x00004004, 0x00004200, 0x00000000);
434	&data_word(0x01000000, 0x01000204, 0x00000204, 0x01004000);
435	&data_word(0x00000004, 0x01004200, 0x01004000, 0x00000204);
436	&data_word(0x01000204, 0x00004000, 0x00004004, 0x01004204);
437	&data_word(0x00000000, 0x01000200, 0x01000004, 0x00004200);
438	&data_word(0x01004004, 0x00004204, 0x01004200, 0x00000004);
439	&data_word(0x00004204, 0x01004004, 0x00000200, 0x01000000);
440	&data_word(0x00004204, 0x01004000, 0x01004004, 0x00000204);
441	&data_word(0x00004000, 0x00000200, 0x01000000, 0x01004004);
442	&data_word(0x01000204, 0x00004204, 0x00004200, 0x00000000);
443	&data_word(0x00000200, 0x01000004, 0x00000004, 0x01000200);
444	&data_word(0x00000000, 0x01000204, 0x01000200, 0x00004200);
445	&data_word(0x00000204, 0x00004000, 0x01004204, 0x01000000);
446	&data_word(0x01004200, 0x00000004, 0x00004004, 0x01004204);
447	&data_word(0x01000004, 0x01004200, 0x01004000, 0x00004004);
448	# nibble 7
449	&data_word(0x20800080, 0x20820000, 0x00020080, 0x00000000);
450	&data_word(0x20020000, 0x00800080, 0x20800000, 0x20820080);
451	&data_word(0x00000080, 0x20000000, 0x00820000, 0x00020080);
452	&data_word(0x00820080, 0x20020080, 0x20000080, 0x20800000);
453	&data_word(0x00020000, 0x00820080, 0x00800080, 0x20020000);
454	&data_word(0x20820080, 0x20000080, 0x00000000, 0x00820000);
455	&data_word(0x20000000, 0x00800000, 0x20020080, 0x20800080);
456	&data_word(0x00800000, 0x00020000, 0x20820000, 0x00000080);
457	&data_word(0x00800000, 0x00020000, 0x20000080, 0x20820080);
458	&data_word(0x00020080, 0x20000000, 0x00000000, 0x00820000);
459	&data_word(0x20800080, 0x20020080, 0x20020000, 0x00800080);
460	&data_word(0x20820000, 0x00000080, 0x00800080, 0x20020000);
461	&data_word(0x20820080, 0x00800000, 0x20800000, 0x20000080);
462	&data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000);
463	&data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000);
464	&data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080);
465	}
466