xref: /freebsd/crypto/openssl/crypto/des/asm/des_enc.m4 (revision c697fb7f)
1! Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
2!
3! Licensed under the OpenSSL license (the "License").  You may not use
4! this file except in compliance with the License.  You can obtain a copy
5! in the file LICENSE in the source distribution or at
6! https://www.openssl.org/source/license.html
7!
8!  To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
9!
10!  Global registers 1 to 5 are used. This is the same as done by the
11!  cc compiler. The UltraSPARC load/store little endian feature is used.
12!
13!  Instruction grouping often refers to one CPU cycle.
14!
15!  Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
16!
17!  Assemble through cc:  cc -c -xarch=v8plusa -o des_enc.o des_enc.S
18!
19!  Performance improvement according to './apps/openssl speed des'
20!
21!	32-bit build:
22!		23%  faster than cc-5.2 -xarch=v8plus -xO5
23!		115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
24!	64-bit build:
25!		50%  faster than cc-5.2 -xarch=v9 -xO5
26!		100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
27!
28
29.ident "des_enc.m4 2.1"
30.file  "des_enc-sparc.S"
31
32#include <openssl/opensslconf.h>
33
34#if defined(__SUNPRO_C) && defined(__sparcv9)
35# define ABI64  /* They've said -xarch=v9 at command line */
36#elif defined(__GNUC__) && defined(__arch64__)
37# define ABI64  /* They've said -m64 at command line */
38#endif
39
40#ifdef ABI64
41  .register	%g2,#scratch
42  .register	%g3,#scratch
43# define	FRAME	-192
44# define	BIAS	2047
45# define	LDPTR	ldx
46# define	STPTR	stx
47# define	ARG0	128
48# define	ARGSZ	8
49#else
50# define	FRAME	-96
51# define	BIAS	0
52# define	LDPTR	ld
53# define	STPTR	st
54# define	ARG0	68
55# define	ARGSZ	4
56#endif
57
58#define LOOPS 7
59
60#define global0 %g0
61#define global1 %g1
62#define global2 %g2
63#define global3 %g3
64#define global4 %g4
65#define global5 %g5
66
67#define local0 %l0
68#define local1 %l1
69#define local2 %l2
70#define local3 %l3
71#define local4 %l4
72#define local5 %l5
73#define local7 %l6
74#define local6 %l7
75
76#define in0 %i0
77#define in1 %i1
78#define in2 %i2
79#define in3 %i3
80#define in4 %i4
81#define in5 %i5
82#define in6 %i6
83#define in7 %i7
84
85#define out0 %o0
86#define out1 %o1
87#define out2 %o2
88#define out3 %o3
89#define out4 %o4
90#define out5 %o5
91#define out6 %o6
92#define out7 %o7
93
94#define stub stb
95
96changequote({,})
97
98
99! Macro definitions:
100
101
102! {ip_macro}
103!
104! The logic used in initial and final permutations is the same as in
105! the C code. The permutations are done with a clever shift, xor, and
106! technique.
107!
108! The macro also loads address sbox 1 to 5 to global 1 to 5, address
109! sbox 6 to local6, and address sbox 8 to out3.
110!
111! Rotates the halves 3 left to bring the sbox bits in convenient positions.
112!
113! Loads key first round from address in parameter 5 to out0, out1.
114!
115! After the original LibDES initial permutation, the resulting left
116! is in the variable initially used for right and vice versa. The macro
117! implements the possibility to keep the halves in the original registers.
118!
119! parameter 1  left
120! parameter 2  right
121! parameter 3  result left (modify in first round)
122! parameter 4  result right (use in first round)
123! parameter 5  key address
124! parameter 6  1/2 for include encryption/decryption
125! parameter 7  1 for move in1 to in3
126! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
127! parameter 9  1 for load ks3 and ks2 to in4 and in3
128
129define(ip_macro, {
130
131! {ip_macro}
132! $1 $2 $4 $3 $5 $6 $7 $8 $9
133
134	ld	[out2+256], local1
135	srl	$2, 4, local4
136
137	xor	local4, $1, local4
138	ifelse($7,1,{mov in1, in3},{nop})
139
140	ld	[out2+260], local2
141	and	local4, local1, local4
142	ifelse($8,1,{mov in3, in4},{})
143	ifelse($8,2,{mov in4, in3},{})
144
145	ld	[out2+280], out4          ! loop counter
146	sll	local4, 4, local1
147	xor	$1, local4, $1
148
149	ld	[out2+264], local3
150	srl	$1, 16, local4
151	xor	$2, local1, $2
152
153	ifelse($9,1,{LDPTR	KS3, in4},{})
154	xor	local4, $2, local4
155	nop	!sethi	%hi(DES_SPtrans), global1 ! sbox addr
156
157	ifelse($9,1,{LDPTR	KS2, in3},{})
158	and	local4, local2, local4
159	nop	!or	global1, %lo(DES_SPtrans), global1   ! sbox addr
160
161	sll	local4, 16, local1
162	xor	$2, local4, $2
163
164	srl	$2, 2, local4
165	xor	$1, local1, $1
166
167	sethi	%hi(16711680), local5
168	xor	local4, $1, local4
169
170	and	local4, local3, local4
171	or	local5, 255, local5
172
173	sll	local4, 2, local2
174	xor	$1, local4, $1
175
176	srl	$1, 8, local4
177	xor	$2, local2, $2
178
179	xor	local4, $2, local4
180	add	global1, 768, global4
181
182	and	local4, local5, local4
183	add	global1, 1024, global5
184
185	ld	[out2+272], local7
186	sll	local4, 8, local1
187	xor	$2, local4, $2
188
189	srl	$2, 1, local4
190	xor	$1, local1, $1
191
192	ld	[$5], out0                ! key 7531
193	xor	local4, $1, local4
194	add	global1, 256, global2
195
196	ld	[$5+4], out1              ! key 8642
197	and	local4, local7, local4
198	add	global1, 512, global3
199
200	sll	local4, 1, local1
201	xor	$1, local4, $1
202
203	sll	$1, 3, local3
204	xor	$2, local1, $2
205
206	sll	$2, 3, local2
207	add	global1, 1280, local6     ! address sbox 8
208
209	srl	$1, 29, local4
210	add	global1, 1792, out3       ! address sbox 8
211
212	srl	$2, 29, local1
213	or	local4, local3, $4
214
215	or	local2, local1, $3
216
217	ifelse($6, 1, {
218
219		ld	[out2+284], local5     ! 0x0000FC00 used in the rounds
220		or	local2, local1, $3
221		xor	$4, out0, local1
222
223		call .des_enc.1
224		and	local1, 252, local1
225
226	},{})
227
228	ifelse($6, 2, {
229
230		ld	[out2+284], local5     ! 0x0000FC00 used in the rounds
231		or	local2, local1, $3
232		xor	$4, out0, local1
233
234		call .des_dec.1
235		and	local1, 252, local1
236
237	},{})
238})
239
240
241! {rounds_macro}
242!
243! The logic used in the DES rounds is the same as in the C code,
244! except that calculations for sbox 1 and sbox 5 begin before
245! the previous round is finished.
246!
247! In each round one half (work) is modified based on key and the
248! other half (use).
249!
250! In this version we do two rounds in a loop repeated 7 times
251! and two rounds separately.
252!
253! One half has the bits for the sboxes in the following positions:
254!
255!	777777xx555555xx333333xx111111xx
256!
257!	88xx666666xx444444xx222222xx8888
258!
259! The bits for each sbox are xor-ed with the key bits for that box.
260! The above xx bits are cleared, and the result used for lookup in
261! the sbox table. Each sbox entry contains the 4 output bits permuted
262! into 32 bits according to the P permutation.
263!
264! In the description of DES, left and right are switched after
265! each round, except after last round. In this code the original
266! left and right are kept in the same register in all rounds, meaning
267! that after the 16 rounds the result for right is in the register
268! originally used for left.
269!
270! parameter 1  first work (left in first round)
271! parameter 2  first use (right in first round)
272! parameter 3  enc/dec  1/-1
273! parameter 4  loop label
274! parameter 5  key address register
275! parameter 6  optional address for key next encryption/decryption
276! parameter 7  not empty for include retl
277!
278! also compares in2 to 8
279
280define(rounds_macro, {
281
282! {rounds_macro}
283! $1 $2 $3 $4 $5 $6 $7 $8 $9
284
285	xor	$2, out0, local1
286
287	ld	[out2+284], local5        ! 0x0000FC00
288	ba	$4
289	and	local1, 252, local1
290
291	.align 32
292
293$4:
294	! local6 is address sbox 6
295	! out3   is address sbox 8
296	! out4   is loop counter
297
298	ld	[global1+local1], local1
299	xor	$2, out1, out1            ! 8642
300	xor	$2, out0, out0            ! 7531
301	! fmovs	%f0, %f0                  ! fxor used for alignment
302
303	srl	out1, 4, local0           ! rotate 4 right
304	and	out0, local5, local3      ! 3
305	! fmovs	%f0, %f0
306
307	ld	[$5+$3*8], local7         ! key 7531 next round
308	srl	local3, 8, local3         ! 3
309	and	local0, 252, local2       ! 2
310	! fmovs	%f0, %f0
311
312	ld	[global3+local3],local3   ! 3
313	sll	out1, 28, out1            ! rotate
314	xor	$1, local1, $1            ! 1 finished, local1 now sbox 7
315
316	ld	[global2+local2], local2  ! 2
317	srl	out0, 24, local1          ! 7
318	or	out1, local0, out1        ! rotate
319
320	ldub	[out2+local1], local1     ! 7 (and 0xFC)
321	srl	out1, 24, local0          ! 8
322	and	out1, local5, local4      ! 4
323
324	ldub	[out2+local0], local0     ! 8 (and 0xFC)
325	srl	local4, 8, local4         ! 4
326	xor	$1, local2, $1            ! 2 finished local2 now sbox 6
327
328	ld	[global4+local4],local4   ! 4
329	srl	out1, 16, local2          ! 6
330	xor	$1, local3, $1            ! 3 finished local3 now sbox 5
331
332	ld	[out3+local0],local0      ! 8
333	and	local2, 252, local2       ! 6
334	add	global1, 1536, local5     ! address sbox 7
335
336	ld	[local6+local2], local2   ! 6
337	srl	out0, 16, local3          ! 5
338	xor	$1, local4, $1            ! 4 finished
339
340	ld	[local5+local1],local1    ! 7
341	and	local3, 252, local3       ! 5
342	xor	$1, local0, $1            ! 8 finished
343
344	ld	[global5+local3],local3   ! 5
345	xor	$1, local2, $1            ! 6 finished
346	subcc	out4, 1, out4
347
348	ld	[$5+$3*8+4], out0         ! key 8642 next round
349	xor	$1, local7, local2        ! sbox 5 next round
350	xor	$1, local1, $1            ! 7 finished
351
352	srl	local2, 16, local2        ! sbox 5 next round
353	xor	$1, local3, $1            ! 5 finished
354
355	ld	[$5+$3*16+4], out1        ! key 8642 next round again
356	and	local2, 252, local2       ! sbox5 next round
357! next round
358	xor	$1, local7, local7        ! 7531
359
360	ld	[global5+local2], local2  ! 5
361	srl	local7, 24, local3        ! 7
362	xor	$1, out0, out0            ! 8642
363
364	ldub	[out2+local3], local3     ! 7 (and 0xFC)
365	srl	out0, 4, local0           ! rotate 4 right
366	and	local7, 252, local1       ! 1
367
368	sll	out0, 28, out0            ! rotate
369	xor	$2, local2, $2            ! 5 finished local2 used
370
371	srl	local0, 8, local4         ! 4
372	and	local0, 252, local2       ! 2
373	ld	[local5+local3], local3   ! 7
374
375	srl	local0, 16, local5        ! 6
376	or	out0, local0, out0        ! rotate
377	ld	[global2+local2], local2  ! 2
378
379	srl	out0, 24, local0
380	ld	[$5+$3*16], out0          ! key 7531 next round
381	and	local4, 252, local4	  ! 4
382
383	and	local5, 252, local5       ! 6
384	ld	[global4+local4], local4  ! 4
385	xor	$2, local3, $2            ! 7 finished local3 used
386
387	and	local0, 252, local0       ! 8
388	ld	[local6+local5], local5   ! 6
389	xor	$2, local2, $2            ! 2 finished local2 now sbox 3
390
391	srl	local7, 8, local2         ! 3 start
392	ld	[out3+local0], local0     ! 8
393	xor	$2, local4, $2            ! 4 finished
394
395	and	local2, 252, local2       ! 3
396	ld	[global1+local1], local1  ! 1
397	xor	$2, local5, $2            ! 6 finished local5 used
398
399	ld	[global3+local2], local2  ! 3
400	xor	$2, local0, $2            ! 8 finished
401	add	$5, $3*16, $5             ! enc add 8, dec add -8 to key pointer
402
403	ld	[out2+284], local5        ! 0x0000FC00
404	xor	$2, out0, local4          ! sbox 1 next round
405	xor	$2, local1, $2            ! 1 finished
406
407	xor	$2, local2, $2            ! 3 finished
408	bne	$4
409	and	local4, 252, local1       ! sbox 1 next round
410
411! two rounds more:
412
413	ld	[global1+local1], local1
414	xor	$2, out1, out1
415	xor	$2, out0, out0
416
417	srl	out1, 4, local0           ! rotate
418	and	out0, local5, local3
419
420	ld	[$5+$3*8], local7         ! key 7531
421	srl	local3, 8, local3
422	and	local0, 252, local2
423
424	ld	[global3+local3],local3
425	sll	out1, 28, out1            ! rotate
426	xor	$1, local1, $1            ! 1 finished, local1 now sbox 7
427
428	ld	[global2+local2], local2
429	srl	out0, 24, local1
430	or	out1, local0, out1        ! rotate
431
432	ldub	[out2+local1], local1
433	srl	out1, 24, local0
434	and	out1, local5, local4
435
436	ldub	[out2+local0], local0
437	srl	local4, 8, local4
438	xor	$1, local2, $1            ! 2 finished local2 now sbox 6
439
440	ld	[global4+local4],local4
441	srl	out1, 16, local2
442	xor	$1, local3, $1            ! 3 finished local3 now sbox 5
443
444	ld	[out3+local0],local0
445	and	local2, 252, local2
446	add	global1, 1536, local5     ! address sbox 7
447
448	ld	[local6+local2], local2
449	srl	out0, 16, local3
450	xor	$1, local4, $1            ! 4 finished
451
452	ld	[local5+local1],local1
453	and	local3, 252, local3
454	xor	$1, local0, $1
455
456	ld	[global5+local3],local3
457	xor	$1, local2, $1            ! 6 finished
458	cmp	in2, 8
459
460	ifelse($6,{}, {}, {ld	[out2+280], out4})  ! loop counter
461	xor	$1, local7, local2        ! sbox 5 next round
462	xor	$1, local1, $1            ! 7 finished
463
464	ld	[$5+$3*8+4], out0
465	srl	local2, 16, local2        ! sbox 5 next round
466	xor	$1, local3, $1            ! 5 finished
467
468	and	local2, 252, local2
469! next round (two rounds more)
470	xor	$1, local7, local7        ! 7531
471
472	ld	[global5+local2], local2
473	srl	local7, 24, local3
474	xor	$1, out0, out0            ! 8642
475
476	ldub	[out2+local3], local3
477	srl	out0, 4, local0           ! rotate
478	and	local7, 252, local1
479
480	sll	out0, 28, out0            ! rotate
481	xor	$2, local2, $2            ! 5 finished local2 used
482
483	srl	local0, 8, local4
484	and	local0, 252, local2
485	ld	[local5+local3], local3
486
487	srl	local0, 16, local5
488	or	out0, local0, out0        ! rotate
489	ld	[global2+local2], local2
490
491	srl	out0, 24, local0
492	ifelse($6,{}, {}, {ld	[$6], out0})   ! key next encryption/decryption
493	and	local4, 252, local4
494
495	and	local5, 252, local5
496	ld	[global4+local4], local4
497	xor	$2, local3, $2            ! 7 finished local3 used
498
499	and	local0, 252, local0
500	ld	[local6+local5], local5
501	xor	$2, local2, $2            ! 2 finished local2 now sbox 3
502
503	srl	local7, 8, local2         ! 3 start
504	ld	[out3+local0], local0
505	xor	$2, local4, $2
506
507	and	local2, 252, local2
508	ld	[global1+local1], local1
509	xor	$2, local5, $2            ! 6 finished local5 used
510
511	ld	[global3+local2], local2
512	srl	$1, 3, local3
513	xor	$2, local0, $2
514
515	ifelse($6,{}, {}, {ld	[$6+4], out1}) ! key next encryption/decryption
516	sll	$1, 29, local4
517	xor	$2, local1, $2
518
519	ifelse($7,{}, {}, {retl})
520	xor	$2, local2, $2
521})
522
523
524! {fp_macro}
525!
526!  parameter 1   right (original left)
527!  parameter 2   left (original right)
528!  parameter 3   1 for optional store to [in0]
529!  parameter 4   1 for load input/output address to local5/7
530!
531!  The final permutation logic switches the halves, meaning that
532!  left and right ends up the registers originally used.
533
534define(fp_macro, {
535
536! {fp_macro}
537! $1 $2 $3 $4 $5 $6 $7 $8 $9
538
539	! initially undo the rotate 3 left done after initial permutation
540	! original left is received shifted 3 right and 29 left in local3/4
541
542	sll	$2, 29, local1
543	or	local3, local4, $1
544
545	srl	$2, 3, $2
546	sethi	%hi(0x55555555), local2
547
548	or	$2, local1, $2
549	or	local2, %lo(0x55555555), local2
550
551	srl	$2, 1, local3
552	sethi	%hi(0x00ff00ff), local1
553	xor	local3, $1, local3
554	or	local1, %lo(0x00ff00ff), local1
555	and	local3, local2, local3
556	sethi	%hi(0x33333333), local4
557	sll	local3, 1, local2
558
559	xor	$1, local3, $1
560
561	srl	$1, 8, local3
562	xor	$2, local2, $2
563	xor	local3, $2, local3
564	or	local4, %lo(0x33333333), local4
565	and	local3, local1, local3
566	sethi	%hi(0x0000ffff), local1
567	sll	local3, 8, local2
568
569	xor	$2, local3, $2
570
571	srl	$2, 2, local3
572	xor	$1, local2, $1
573	xor	local3, $1, local3
574	or	local1, %lo(0x0000ffff), local1
575	and	local3, local4, local3
576	sethi	%hi(0x0f0f0f0f), local4
577	sll	local3, 2, local2
578
579	ifelse($4,1, {LDPTR INPUT, local5})
580	xor	$1, local3, $1
581
582	ifelse($4,1, {LDPTR OUTPUT, local7})
583	srl	$1, 16, local3
584	xor	$2, local2, $2
585	xor	local3, $2, local3
586	or	local4, %lo(0x0f0f0f0f), local4
587	and	local3, local1, local3
588	sll	local3, 16, local2
589
590	xor	$2, local3, local1
591
592	srl	local1, 4, local3
593	xor	$1, local2, $1
594	xor	local3, $1, local3
595	and	local3, local4, local3
596	sll	local3, 4, local2
597
598	xor	$1, local3, $1
599
600	! optional store:
601
602	ifelse($3,1, {st $1, [in0]})
603
604	xor	local1, local2, $2
605
606	ifelse($3,1, {st $2, [in0+4]})
607
608})
609
610
611! {fp_ip_macro}
612!
613! Does initial permutation for next block mixed with
614! final permutation for current block.
615!
616! parameter 1   original left
617! parameter 2   original right
618! parameter 3   left ip
619! parameter 4   right ip
620! parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
621!                2: mov in4 to in3
622!
623! also adds -8 to length in2 and loads loop counter to out4
624
625define(fp_ip_macro, {
626
627! {fp_ip_macro}
628! $1 $2 $3 $4 $5 $6 $7 $8 $9
629
630	define({temp1},{out4})
631	define({temp2},{local3})
632
633	define({ip1},{local1})
634	define({ip2},{local2})
635	define({ip4},{local4})
636	define({ip5},{local5})
637
638	! $1 in local3, local4
639
640	ld	[out2+256], ip1
641	sll	out5, 29, temp1
642	or	local3, local4, $1
643
644	srl	out5, 3, $2
645	ifelse($5,2,{mov in4, in3})
646
647	ld	[out2+272], ip5
648	srl	$4, 4, local0
649	or	$2, temp1, $2
650
651	srl	$2, 1, temp1
652	xor	temp1, $1, temp1
653
654	and	temp1, ip5, temp1
655	xor	local0, $3, local0
656
657	sll	temp1, 1, temp2
658	xor	$1, temp1, $1
659
660	and	local0, ip1, local0
661	add	in2, -8, in2
662
663	sll	local0, 4, local7
664	xor	$3, local0, $3
665
666	ld	[out2+268], ip4
667	srl	$1, 8, temp1
668	xor	$2, temp2, $2
669	ld	[out2+260], ip2
670	srl	$3, 16, local0
671	xor	$4, local7, $4
672	xor	temp1, $2, temp1
673	xor	local0, $4, local0
674	and	temp1, ip4, temp1
675	and	local0, ip2, local0
676	sll	temp1, 8, temp2
677	xor	$2, temp1, $2
678	sll	local0, 16, local7
679	xor	$4, local0, $4
680
681	srl	$2, 2, temp1
682	xor	$1, temp2, $1
683
684	ld	[out2+264], temp2         ! ip3
685	srl	$4, 2, local0
686	xor	$3, local7, $3
687	xor	temp1, $1, temp1
688	xor	local0, $3, local0
689	and	temp1, temp2, temp1
690	and	local0, temp2, local0
691	sll	temp1, 2, temp2
692	xor	$1, temp1, $1
693	sll	local0, 2, local7
694	xor	$3, local0, $3
695
696	srl	$1, 16, temp1
697	xor	$2, temp2, $2
698	srl	$3, 8, local0
699	xor	$4, local7, $4
700	xor	temp1, $2, temp1
701	xor	local0, $4, local0
702	and	temp1, ip2, temp1
703	and	local0, ip4, local0
704	sll	temp1, 16, temp2
705	xor	$2, temp1, local4
706	sll	local0, 8, local7
707	xor	$4, local0, $4
708
709	srl	$4, 1, local0
710	xor	$3, local7, $3
711
712	srl	local4, 4, temp1
713	xor	local0, $3, local0
714
715	xor	$1, temp2, $1
716	and	local0, ip5, local0
717
718	sll	local0, 1, local7
719	xor	temp1, $1, temp1
720
721	xor	$3, local0, $3
722	xor	$4, local7, $4
723
724	sll	$3, 3, local5
725	and	temp1, ip1, temp1
726
727	sll	temp1, 4, temp2
728	xor	$1, temp1, $1
729
730	ifelse($5,1,{LDPTR	KS2, in4})
731	sll	$4, 3, local2
732	xor	local4, temp2, $2
733
734	! reload since used as temporary:
735
736	ld	[out2+280], out4          ! loop counter
737
738	srl	$3, 29, local0
739	ifelse($5,1,{add in4, 120, in4})
740
741	ifelse($5,1,{LDPTR	KS1, in3})
742	srl	$4, 29, local7
743
744	or	local0, local5, $4
745	or	local2, local7, $3
746
747})
748
749
750
751! {load_little_endian}
752!
753! parameter 1  address
754! parameter 2  destination left
755! parameter 3  destination right
756! parameter 4  temporary
757! parameter 5  label
758
759define(load_little_endian, {
760
761! {load_little_endian}
762! $1 $2 $3 $4 $5 $6 $7 $8 $9
763
764	! first in memory to rightmost in register
765
766$5:
767	ldub	[$1+3], $2
768
769	ldub	[$1+2], $4
770	sll	$2, 8, $2
771	or	$2, $4, $2
772
773	ldub	[$1+1], $4
774	sll	$2, 8, $2
775	or	$2, $4, $2
776
777	ldub	[$1+0], $4
778	sll	$2, 8, $2
779	or	$2, $4, $2
780
781
782	ldub	[$1+3+4], $3
783
784	ldub	[$1+2+4], $4
785	sll	$3, 8, $3
786	or	$3, $4, $3
787
788	ldub	[$1+1+4], $4
789	sll	$3, 8, $3
790	or	$3, $4, $3
791
792	ldub	[$1+0+4], $4
793	sll	$3, 8, $3
794	or	$3, $4, $3
795$5a:
796
797})
798
799
800! {load_little_endian_inc}
801!
802! parameter 1  address
803! parameter 2  destination left
804! parameter 3  destination right
805! parameter 4  temporary
806! parameter 4  label
807!
808! adds 8 to address
809
810define(load_little_endian_inc, {
811
812! {load_little_endian_inc}
813! $1 $2 $3 $4 $5 $6 $7 $8 $9
814
815	! first in memory to rightmost in register
816
817$5:
818	ldub	[$1+3], $2
819
820	ldub	[$1+2], $4
821	sll	$2, 8, $2
822	or	$2, $4, $2
823
824	ldub	[$1+1], $4
825	sll	$2, 8, $2
826	or	$2, $4, $2
827
828	ldub	[$1+0], $4
829	sll	$2, 8, $2
830	or	$2, $4, $2
831
832	ldub	[$1+3+4], $3
833	add	$1, 8, $1
834
835	ldub	[$1+2+4-8], $4
836	sll	$3, 8, $3
837	or	$3, $4, $3
838
839	ldub	[$1+1+4-8], $4
840	sll	$3, 8, $3
841	or	$3, $4, $3
842
843	ldub	[$1+0+4-8], $4
844	sll	$3, 8, $3
845	or	$3, $4, $3
846$5a:
847
848})
849
850
851! {load_n_bytes}
852!
853! Loads 1 to 7 bytes little endian
854! Remaining bytes are zeroed.
855!
856! parameter 1  address
857! parameter 2  length
858! parameter 3  destination register left
859! parameter 4  destination register right
860! parameter 5  temp
861! parameter 6  temp2
862! parameter 7  label
863! parameter 8  return label
864
865define(load_n_bytes, {
866
867! {load_n_bytes}
868! $1 $2 $5 $6 $7 $8 $7 $8 $9
869
870$7.0:	call	.+8
871	sll	$2, 2, $6
872
873	add	%o7,$7.jmp.table-$7.0,$5
874
875	add	$5, $6, $5
876	mov	0, $4
877
878	ld	[$5], $5
879
880	jmp	%o7+$5
881	mov	0, $3
882
883$7.7:
884	ldub	[$1+6], $5
885	sll	$5, 16, $5
886	or	$3, $5, $3
887$7.6:
888	ldub	[$1+5], $5
889	sll	$5, 8, $5
890	or	$3, $5, $3
891$7.5:
892	ldub	[$1+4], $5
893	or	$3, $5, $3
894$7.4:
895	ldub	[$1+3], $5
896	sll	$5, 24, $5
897	or	$4, $5, $4
898$7.3:
899	ldub	[$1+2], $5
900	sll	$5, 16, $5
901	or	$4, $5, $4
902$7.2:
903	ldub	[$1+1], $5
904	sll	$5, 8, $5
905	or	$4, $5, $4
906$7.1:
907	ldub	[$1+0], $5
908	ba	$8
909	or	$4, $5, $4
910
911	.align 4
912
913$7.jmp.table:
914	.word	0
915	.word	$7.1-$7.0
916	.word	$7.2-$7.0
917	.word	$7.3-$7.0
918	.word	$7.4-$7.0
919	.word	$7.5-$7.0
920	.word	$7.6-$7.0
921	.word	$7.7-$7.0
922})
923
924
925! {store_little_endian}
926!
927! parameter 1  address
928! parameter 2  source left
929! parameter 3  source right
930! parameter 4  temporary
931
932define(store_little_endian, {
933
934! {store_little_endian}
935! $1 $2 $3 $4 $5 $6 $7 $8 $9
936
937	! rightmost in register to first in memory
938
939$5:
940	and	$2, 255, $4
941	stub	$4, [$1+0]
942
943	srl	$2, 8, $4
944	and	$4, 255, $4
945	stub	$4, [$1+1]
946
947	srl	$2, 16, $4
948	and	$4, 255, $4
949	stub	$4, [$1+2]
950
951	srl	$2, 24, $4
952	stub	$4, [$1+3]
953
954
955	and	$3, 255, $4
956	stub	$4, [$1+0+4]
957
958	srl	$3, 8, $4
959	and	$4, 255, $4
960	stub	$4, [$1+1+4]
961
962	srl	$3, 16, $4
963	and	$4, 255, $4
964	stub	$4, [$1+2+4]
965
966	srl	$3, 24, $4
967	stub	$4, [$1+3+4]
968
969$5a:
970
971})
972
973
974! {store_n_bytes}
975!
976! Stores 1 to 7 bytes little endian
977!
978! parameter 1  address
979! parameter 2  length
980! parameter 3  source register left
981! parameter 4  source register right
982! parameter 5  temp
983! parameter 6  temp2
984! parameter 7  label
985! parameter 8  return label
986
987define(store_n_bytes, {
988
989! {store_n_bytes}
990! $1 $2 $5 $6 $7 $8 $7 $8 $9
991
992$7.0:	call	.+8
993	sll	$2, 2, $6
994
995	add	%o7,$7.jmp.table-$7.0,$5
996
997	add	$5, $6, $5
998
999	ld	[$5], $5
1000
1001	jmp	%o7+$5
1002	nop
1003
1004$7.7:
1005	srl	$3, 16, $5
1006	and	$5, 0xff, $5
1007	stub	$5, [$1+6]
1008$7.6:
1009	srl	$3, 8, $5
1010	and	$5, 0xff, $5
1011	stub	$5, [$1+5]
1012$7.5:
1013	and	$3, 0xff, $5
1014	stub	$5, [$1+4]
1015$7.4:
1016	srl	$4, 24, $5
1017	stub	$5, [$1+3]
1018$7.3:
1019	srl	$4, 16, $5
1020	and	$5, 0xff, $5
1021	stub	$5, [$1+2]
1022$7.2:
1023	srl	$4, 8, $5
1024	and	$5, 0xff, $5
1025	stub	$5, [$1+1]
1026$7.1:
1027	and	$4, 0xff, $5
1028
1029
1030	ba	$8
1031	stub	$5, [$1]
1032
1033	.align 4
1034
1035$7.jmp.table:
1036
1037	.word	0
1038	.word	$7.1-$7.0
1039	.word	$7.2-$7.0
1040	.word	$7.3-$7.0
1041	.word	$7.4-$7.0
1042	.word	$7.5-$7.0
1043	.word	$7.6-$7.0
1044	.word	$7.7-$7.0
1045})
1046
1047
1048define(testvalue,{1})
1049
1050define(register_init, {
1051
1052! For test purposes:
1053
1054	sethi	%hi(testvalue), local0
1055	or	local0, %lo(testvalue), local0
1056
1057	ifelse($1,{},{}, {mov	local0, $1})
1058	ifelse($2,{},{}, {mov	local0, $2})
1059	ifelse($3,{},{}, {mov	local0, $3})
1060	ifelse($4,{},{}, {mov	local0, $4})
1061	ifelse($5,{},{}, {mov	local0, $5})
1062	ifelse($6,{},{}, {mov	local0, $6})
1063	ifelse($7,{},{}, {mov	local0, $7})
1064	ifelse($8,{},{}, {mov	local0, $8})
1065
1066	mov	local0, local1
1067	mov	local0, local2
1068	mov	local0, local3
1069	mov	local0, local4
1070	mov	local0, local5
1071	mov	local0, local7
1072	mov	local0, local6
1073	mov	local0, out0
1074	mov	local0, out1
1075	mov	local0, out2
1076	mov	local0, out3
1077	mov	local0, out4
1078	mov	local0, out5
1079	mov	local0, global1
1080	mov	local0, global2
1081	mov	local0, global3
1082	mov	local0, global4
1083	mov	local0, global5
1084
1085})
1086
1087.section	".text"
1088
1089	.align 32
1090
1091.des_enc:
1092
1093	! key address in3
1094	! loads key next encryption/decryption first round from [in4]
1095
1096	rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1097
1098
1099	.align 32
1100
1101.des_dec:
1102
1103	! implemented with out5 as first parameter to avoid
1104	! register exchange in ede modes
1105
1106	! key address in4
1107	! loads key next encryption/decryption first round from [in3]
1108
1109	rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1110
1111
1112
1113! void DES_encrypt1(data, ks, enc)
1114! *******************************
1115
1116	.align 32
1117	.global DES_encrypt1
1118	.type	 DES_encrypt1,#function
1119
1120DES_encrypt1:
1121
1122	save	%sp, FRAME, %sp
1123
1124	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1125	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
11261:	call	.+8
1127	add	%o7,global1,global1
1128	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1129
1130	ld	[in0], in5                ! left
1131	cmp	in2, 0                    ! enc
1132
1133	be	.encrypt.dec
1134	ld	[in0+4], out5             ! right
1135
1136	! parameter 6  1/2 for include encryption/decryption
1137	! parameter 7  1 for move in1 to in3
1138	! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
1139
1140	ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1141
1142	rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1143
1144	fp_macro(in5, out5, 1)            ! 1 for store to [in0]
1145
1146	ret
1147	restore
1148
1149.encrypt.dec:
1150
1151	add	in1, 120, in3             ! use last subkey for first round
1152
1153	! parameter 6  1/2 for include encryption/decryption
1154	! parameter 7  1 for move in1 to in3
1155	! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
1156
1157	ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec,  ks in4
1158
1159	fp_macro(out5, in5, 1)            ! 1 for store to [in0]
1160
1161	ret
1162	restore
1163
1164.DES_encrypt1.end:
1165	.size	 DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1166
1167
1168! void DES_encrypt2(data, ks, enc)
1169!*********************************
1170
1171	! encrypts/decrypts without initial/final permutation
1172
1173	.align 32
1174	.global DES_encrypt2
1175	.type	 DES_encrypt2,#function
1176
1177DES_encrypt2:
1178
1179	save	%sp, FRAME, %sp
1180
1181	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1182	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
11831:	call	.+8
1184	add	%o7,global1,global1
1185	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1186
1187	! Set sbox address 1 to 6 and rotate halves 3 left
1188	! Errors caught by destest? Yes. Still? *NO*
1189
1190	!sethi	%hi(DES_SPtrans), global1 ! address sbox 1
1191
1192	!or	global1, %lo(DES_SPtrans), global1  ! sbox 1
1193
1194	add	global1, 256, global2     ! sbox 2
1195	add	global1, 512, global3     ! sbox 3
1196
1197	ld	[in0], out5               ! right
1198	add	global1, 768, global4     ! sbox 4
1199	add	global1, 1024, global5    ! sbox 5
1200
1201	ld	[in0+4], in5              ! left
1202	add	global1, 1280, local6     ! sbox 6
1203	add	global1, 1792, out3       ! sbox 8
1204
1205	! rotate
1206
1207	sll	in5, 3, local5
1208	mov	in1, in3                  ! key address to in3
1209
1210	sll	out5, 3, local7
1211	srl	in5, 29, in5
1212
1213	srl	out5, 29, out5
1214	add	in5, local5, in5
1215
1216	add	out5, local7, out5
1217	cmp	in2, 0
1218
1219	! we use our own stackframe
1220
1221	be	.encrypt2.dec
1222	STPTR	in0, [%sp+BIAS+ARG0+0*ARGSZ]
1223
1224	ld	[in3], out0               ! key 7531 first round
1225	mov	LOOPS, out4               ! loop counter
1226
1227	ld	[in3+4], out1             ! key 8642 first round
1228	sethi	%hi(0x0000FC00), local5
1229
1230	call .des_enc
1231	mov	in3, in4
1232
1233	! rotate
1234	sll	in5, 29, in0
1235	srl	in5, 3, in5
1236	sll	out5, 29, in1
1237	add	in5, in0, in5
1238	srl	out5, 3, out5
1239	LDPTR	[%sp+BIAS+ARG0+0*ARGSZ], in0
1240	add	out5, in1, out5
1241	st	in5, [in0]
1242	st	out5, [in0+4]
1243
1244	ret
1245	restore
1246
1247
1248.encrypt2.dec:
1249
1250	add in3, 120, in4
1251
1252	ld	[in4], out0               ! key 7531 first round
1253	mov	LOOPS, out4               ! loop counter
1254
1255	ld	[in4+4], out1             ! key 8642 first round
1256	sethi	%hi(0x0000FC00), local5
1257
1258	mov	in5, local1               ! left expected in out5
1259	mov	out5, in5
1260
1261	call .des_dec
1262	mov	local1, out5
1263
1264.encrypt2.finish:
1265
1266	! rotate
1267	sll	in5, 29, in0
1268	srl	in5, 3, in5
1269	sll	out5, 29, in1
1270	add	in5, in0, in5
1271	srl	out5, 3, out5
1272	LDPTR	[%sp+BIAS+ARG0+0*ARGSZ], in0
1273	add	out5, in1, out5
1274	st	out5, [in0]
1275	st	in5, [in0+4]
1276
1277	ret
1278	restore
1279
1280.DES_encrypt2.end:
1281	.size	 DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1282
1283
1284! void DES_encrypt3(data, ks1, ks2, ks3)
1285! **************************************
1286
1287	.align 32
1288	.global DES_encrypt3
1289	.type	 DES_encrypt3,#function
1290
1291DES_encrypt3:
1292
1293	save	%sp, FRAME, %sp
1294
1295	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1296	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
12971:	call	.+8
1298	add	%o7,global1,global1
1299	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1300
1301	ld	[in0], in5                ! left
1302	add	in2, 120, in4             ! ks2
1303
1304	ld	[in0+4], out5             ! right
1305	mov	in3, in2                  ! save ks3
1306
1307	! parameter 6  1/2 for include encryption/decryption
1308	! parameter 7  1 for mov in1 to in3
1309	! parameter 8  1 for mov in3 to in4
1310	! parameter 9  1 for load ks3 and ks2 to in4 and in3
1311
1312	ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1313
1314	call	.des_dec
1315	mov	in2, in3                  ! preload ks3
1316
1317	call	.des_enc
1318	nop
1319
1320	fp_macro(in5, out5, 1)
1321
1322	ret
1323	restore
1324
1325.DES_encrypt3.end:
1326	.size	 DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1327
1328
1329! void DES_decrypt3(data, ks1, ks2, ks3)
1330! **************************************
1331
1332	.align 32
1333	.global DES_decrypt3
1334	.type	 DES_decrypt3,#function
1335
1336DES_decrypt3:
1337
1338	save	%sp, FRAME, %sp
1339
1340	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1341	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
13421:	call	.+8
1343	add	%o7,global1,global1
1344	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1345
1346	ld	[in0], in5                ! left
1347	add	in3, 120, in4             ! ks3
1348
1349	ld	[in0+4], out5             ! right
1350	mov	in2, in3                  ! ks2
1351
1352	! parameter 6  1/2 for include encryption/decryption
1353	! parameter 7  1 for mov in1 to in3
1354	! parameter 8  1 for mov in3 to in4
1355	! parameter 9  1 for load ks3 and ks2 to in4 and in3
1356
1357	ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1358
1359	call	.des_enc
1360	add	in1, 120, in4             ! preload ks1
1361
1362	call	.des_dec
1363	nop
1364
1365	fp_macro(out5, in5, 1)
1366
1367	ret
1368	restore
1369
1370.DES_decrypt3.end:
1371	.size	 DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1372
1373! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1374! *****************************************************************
1375
1376
1377	.align 32
1378	.global DES_ncbc_encrypt
1379	.type	 DES_ncbc_encrypt,#function
1380
1381DES_ncbc_encrypt:
1382
1383	save	%sp, FRAME, %sp
1384
1385	define({INPUT},  { [%sp+BIAS+ARG0+0*ARGSZ] })
1386	define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1387	define({IVEC},   { [%sp+BIAS+ARG0+4*ARGSZ] })
1388
1389	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1390	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
13911:	call	.+8
1392	add	%o7,global1,global1
1393	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1394
1395	cmp	in5, 0                    ! enc
1396
1397	be	.ncbc.dec
1398	STPTR	in4, IVEC
1399
1400	! addr  left  right  temp  label
1401	load_little_endian(in4, in5, out5, local3, .LLE1)  ! iv
1402
1403	addcc	in2, -8, in2              ! bytes missing when first block done
1404
1405	bl	.ncbc.enc.seven.or.less
1406	mov	in3, in4                  ! schedule
1407
1408.ncbc.enc.next.block:
1409
1410	load_little_endian(in0, out4, global4, local3, .LLE2)  ! block
1411
1412.ncbc.enc.next.block_1:
1413
1414	xor	in5, out4, in5            ! iv xor
1415	xor	out5, global4, out5       ! iv xor
1416
1417	! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
1418	ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1419
1420.ncbc.enc.next.block_2:
1421
1422!//	call .des_enc                     ! compares in2 to 8
1423!	rounds inlined for alignment purposes
1424
1425	add	global1, 768, global4     ! address sbox 4 since register used below
1426
1427	rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption  ks in3
1428
1429	bl	.ncbc.enc.next.block_fp
1430	add	in0, 8, in0               ! input address
1431
1432	! If 8 or more bytes are to be encrypted after this block,
1433	! we combine final permutation for this block with initial
1434	! permutation for next block. Load next block:
1435
1436	load_little_endian(in0, global3, global4, local5, .LLE12)
1437
1438	!  parameter 1   original left
1439	!  parameter 2   original right
1440	!  parameter 3   left ip
1441	!  parameter 4   right ip
1442	!  parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
1443	!                2: mov in4 to in3
1444	!
1445	! also adds -8 to length in2 and loads loop counter to out4
1446
1447	fp_ip_macro(out0, out1, global3, global4, 2)
1448
1449	store_little_endian(in1, out0, out1, local3, .SLE10)  ! block
1450
1451	ld	[in3], out0               ! key 7531 first round next block
1452	mov 	in5, local1
1453	xor	global3, out5, in5        ! iv xor next block
1454
1455	ld	[in3+4], out1             ! key 8642
1456	add	global1, 512, global3     ! address sbox 3 since register used
1457	xor	global4, local1, out5     ! iv xor next block
1458
1459	ba	.ncbc.enc.next.block_2
1460	add	in1, 8, in1               ! output address
1461
1462.ncbc.enc.next.block_fp:
1463
1464	fp_macro(in5, out5)
1465
1466	store_little_endian(in1, in5, out5, local3, .SLE1)  ! block
1467
1468	addcc   in2, -8, in2              ! bytes missing when next block done
1469
1470	bpos	.ncbc.enc.next.block
1471	add	in1, 8, in1
1472
1473.ncbc.enc.seven.or.less:
1474
1475	cmp	in2, -8
1476
1477	ble	.ncbc.enc.finish
1478	nop
1479
1480	add	in2, 8, local1            ! bytes to load
1481
1482	! addr, length, dest left, dest right, temp, temp2, label, ret label
1483	load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1484
1485	! Loads 1 to 7 bytes little endian to global4, out4
1486
1487
1488.ncbc.enc.finish:
1489
1490	LDPTR	IVEC, local4
1491	store_little_endian(local4, in5, out5, local5, .SLE2)  ! ivec
1492
1493	ret
1494	restore
1495
1496
1497.ncbc.dec:
1498
1499	STPTR	in0, INPUT
1500	cmp	in2, 0                    ! length
1501	add	in3, 120, in3
1502
1503	LDPTR	IVEC, local7              ! ivec
1504	ble	.ncbc.dec.finish
1505	mov	in3, in4                  ! schedule
1506
1507	STPTR	in1, OUTPUT
1508	mov	in0, local5               ! input
1509
1510	load_little_endian(local7, in0, in1, local3, .LLE3)   ! ivec
1511
1512.ncbc.dec.next.block:
1513
1514	load_little_endian(local5, in5, out5, local3, .LLE4)  ! block
1515
1516	! parameter 6  1/2 for include encryption/decryption
1517	! parameter 7  1 for mov in1 to in3
1518	! parameter 8  1 for mov in3 to in4
1519
1520	ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption  ks in4
1521
1522	fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1523
1524	! in2 is bytes left to be stored
1525	! in2 is compared to 8 in the rounds
1526
1527	xor	out5, in0, out4           ! iv xor
1528	bl	.ncbc.dec.seven.or.less
1529	xor	in5, in1, global4         ! iv xor
1530
1531	! Load ivec next block now, since input and output address might be the same.
1532
1533	load_little_endian_inc(local5, in0, in1, local3, .LLE5)  ! iv
1534
1535	store_little_endian(local7, out4, global4, local3, .SLE3)
1536
1537	STPTR	local5, INPUT
1538	add	local7, 8, local7
1539	addcc   in2, -8, in2
1540
1541	bg	.ncbc.dec.next.block
1542	STPTR	local7, OUTPUT
1543
1544
1545.ncbc.dec.store.iv:
1546
1547	LDPTR	IVEC, local4              ! ivec
1548	store_little_endian(local4, in0, in1, local5, .SLE4)
1549
1550.ncbc.dec.finish:
1551
1552	ret
1553	restore
1554
1555.ncbc.dec.seven.or.less:
1556
1557	load_little_endian_inc(local5, in0, in1, local3, .LLE13)     ! ivec
1558
1559	store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1560
1561
1562.DES_ncbc_encrypt.end:
1563	.size	 DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1564
1565
1566! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc)
1567! **************************************************************************
1568
1569
1570	.align 32
1571	.global DES_ede3_cbc_encrypt
1572	.type	 DES_ede3_cbc_encrypt,#function
1573
1574DES_ede3_cbc_encrypt:
1575
1576	save	%sp, FRAME, %sp
1577
1578	define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1579	define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1580	define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1581
1582	sethi	%hi(.PIC.DES_SPtrans-1f),global1
1583	or	global1,%lo(.PIC.DES_SPtrans-1f),global1
15841:	call	.+8
1585	add	%o7,global1,global1
1586	sub	global1,.PIC.DES_SPtrans-.des_and,out2
1587
1588	LDPTR	[%fp+BIAS+ARG0+7*ARGSZ], local3          ! enc
1589	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local4          ! ivec
1590	cmp	local3, 0                 ! enc
1591
1592	be	.ede3.dec
1593	STPTR	in4, KS2
1594
1595	STPTR	in5, KS3
1596
1597	load_little_endian(local4, in5, out5, local3, .LLE6)  ! ivec
1598
1599	addcc	in2, -8, in2              ! bytes missing after next block
1600
1601	bl	.ede3.enc.seven.or.less
1602	STPTR	in3, KS1
1603
1604.ede3.enc.next.block:
1605
1606	load_little_endian(in0, out4, global4, local3, .LLE7)
1607
1608.ede3.enc.next.block_1:
1609
1610	LDPTR	KS2, in4
1611	xor	in5, out4, in5            ! iv xor
1612	xor	out5, global4, out5       ! iv xor
1613
1614	LDPTR	KS1, in3
1615	add	in4, 120, in4             ! for decryption we use last subkey first
1616	nop
1617
1618	ip_macro(in5, out5, in5, out5, in3)
1619
1620.ede3.enc.next.block_2:
1621
1622	call .des_enc                     ! ks1 in3
1623	nop
1624
1625	call .des_dec                     ! ks2 in4
1626	LDPTR	KS3, in3
1627
1628	call .des_enc                     ! ks3 in3  compares in2 to 8
1629	nop
1630
1631	bl	.ede3.enc.next.block_fp
1632	add	in0, 8, in0
1633
1634	! If 8 or more bytes are to be encrypted after this block,
1635	! we combine final permutation for this block with initial
1636	! permutation for next block. Load next block:
1637
1638	load_little_endian(in0, global3, global4, local5, .LLE11)
1639
1640	!  parameter 1   original left
1641	!  parameter 2   original right
1642	!  parameter 3   left ip
1643	!  parameter 4   right ip
1644	!  parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
1645	!                2: mov in4 to in3
1646	!
1647	! also adds -8 to length in2 and loads loop counter to out4
1648
1649	fp_ip_macro(out0, out1, global3, global4, 1)
1650
1651	store_little_endian(in1, out0, out1, local3, .SLE9)  ! block
1652
1653	mov 	in5, local1
1654	xor	global3, out5, in5        ! iv xor next block
1655
1656	ld	[in3], out0               ! key 7531
1657	add	global1, 512, global3     ! address sbox 3
1658	xor	global4, local1, out5     ! iv xor next block
1659
1660	ld	[in3+4], out1             ! key 8642
1661	add	global1, 768, global4     ! address sbox 4
1662	ba	.ede3.enc.next.block_2
1663	add	in1, 8, in1
1664
1665.ede3.enc.next.block_fp:
1666
1667	fp_macro(in5, out5)
1668
1669	store_little_endian(in1, in5, out5, local3, .SLE5)  ! block
1670
1671	addcc   in2, -8, in2              ! bytes missing when next block done
1672
1673	bpos	.ede3.enc.next.block
1674	add	in1, 8, in1
1675
1676.ede3.enc.seven.or.less:
1677
1678	cmp	in2, -8
1679
1680	ble	.ede3.enc.finish
1681	nop
1682
1683	add	in2, 8, local1            ! bytes to load
1684
1685	! addr, length, dest left, dest right, temp, temp2, label, ret label
1686	load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1687
1688.ede3.enc.finish:
1689
1690	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local4          ! ivec
1691	store_little_endian(local4, in5, out5, local5, .SLE6)  ! ivec
1692
1693	ret
1694	restore
1695
1696.ede3.dec:
1697
1698	STPTR	in0, INPUT
1699	add	in5, 120, in5
1700
1701	STPTR	in1, OUTPUT
1702	mov	in0, local5
1703	add	in3, 120, in3
1704
1705	STPTR	in3, KS1
1706	cmp	in2, 0
1707
1708	ble	.ede3.dec.finish
1709	STPTR	in5, KS3
1710
1711	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local7          ! iv
1712	load_little_endian(local7, in0, in1, local3, .LLE8)
1713
1714.ede3.dec.next.block:
1715
1716	load_little_endian(local5, in5, out5, local3, .LLE9)
1717
1718	! parameter 6  1/2 for include encryption/decryption
1719	! parameter 7  1 for mov in1 to in3
1720	! parameter 8  1 for mov in3 to in4
1721	! parameter 9  1 for load ks3 and ks2 to in4 and in3
1722
1723	ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1724
1725	call .des_enc                     ! ks2 in3
1726	LDPTR	KS1, in4
1727
1728	call .des_dec                     ! ks1 in4
1729	nop
1730
1731	fp_macro(out5, in5, 0, 1)   ! 1 for input and output address local5/7
1732
1733	! in2 is bytes left to be stored
1734	! in2 is compared to 8 in the rounds
1735
1736	xor	out5, in0, out4
1737	bl	.ede3.dec.seven.or.less
1738	xor	in5, in1, global4
1739
1740	load_little_endian_inc(local5, in0, in1, local3, .LLE10)   ! iv next block
1741
1742	store_little_endian(local7, out4, global4, local3, .SLE7)  ! block
1743
1744	STPTR	local5, INPUT
1745	addcc   in2, -8, in2
1746	add	local7, 8, local7
1747
1748	bg	.ede3.dec.next.block
1749	STPTR	local7, OUTPUT
1750
1751.ede3.dec.store.iv:
1752
1753	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local4          ! ivec
1754	store_little_endian(local4, in0, in1, local5, .SLE8)  ! ivec
1755
1756.ede3.dec.finish:
1757
1758	ret
1759	restore
1760
1761.ede3.dec.seven.or.less:
1762
1763	load_little_endian_inc(local5, in0, in1, local3, .LLE14)     ! iv
1764
1765	store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1766
1767
1768.DES_ede3_cbc_encrypt.end:
1769	.size	 DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
1770
1771	.align	256
1772	.type	 .des_and,#object
1773	.size	 .des_and,284
1774
1775.des_and:
1776
1777! This table is used for AND 0xFC when it is known that register
1778! bits 8-31 are zero. Makes it possible to do three arithmetic
1779! operations in one cycle.
1780
1781	.byte  0, 0, 0, 0, 4, 4, 4, 4
1782	.byte  8, 8, 8, 8, 12, 12, 12, 12
1783	.byte  16, 16, 16, 16, 20, 20, 20, 20
1784	.byte  24, 24, 24, 24, 28, 28, 28, 28
1785	.byte  32, 32, 32, 32, 36, 36, 36, 36
1786	.byte  40, 40, 40, 40, 44, 44, 44, 44
1787	.byte  48, 48, 48, 48, 52, 52, 52, 52
1788	.byte  56, 56, 56, 56, 60, 60, 60, 60
1789	.byte  64, 64, 64, 64, 68, 68, 68, 68
1790	.byte  72, 72, 72, 72, 76, 76, 76, 76
1791	.byte  80, 80, 80, 80, 84, 84, 84, 84
1792	.byte  88, 88, 88, 88, 92, 92, 92, 92
1793	.byte  96, 96, 96, 96, 100, 100, 100, 100
1794	.byte  104, 104, 104, 104, 108, 108, 108, 108
1795	.byte  112, 112, 112, 112, 116, 116, 116, 116
1796	.byte  120, 120, 120, 120, 124, 124, 124, 124
1797	.byte  128, 128, 128, 128, 132, 132, 132, 132
1798	.byte  136, 136, 136, 136, 140, 140, 140, 140
1799	.byte  144, 144, 144, 144, 148, 148, 148, 148
1800	.byte  152, 152, 152, 152, 156, 156, 156, 156
1801	.byte  160, 160, 160, 160, 164, 164, 164, 164
1802	.byte  168, 168, 168, 168, 172, 172, 172, 172
1803	.byte  176, 176, 176, 176, 180, 180, 180, 180
1804	.byte  184, 184, 184, 184, 188, 188, 188, 188
1805	.byte  192, 192, 192, 192, 196, 196, 196, 196
1806	.byte  200, 200, 200, 200, 204, 204, 204, 204
1807	.byte  208, 208, 208, 208, 212, 212, 212, 212
1808	.byte  216, 216, 216, 216, 220, 220, 220, 220
1809	.byte  224, 224, 224, 224, 228, 228, 228, 228
1810	.byte  232, 232, 232, 232, 236, 236, 236, 236
1811	.byte  240, 240, 240, 240, 244, 244, 244, 244
1812	.byte  248, 248, 248, 248, 252, 252, 252, 252
1813
1814	! 5 numbers for initial/final permutation
1815
1816	.word   0x0f0f0f0f                ! offset 256
1817	.word	0x0000ffff                ! 260
1818	.word	0x33333333                ! 264
1819	.word	0x00ff00ff                ! 268
1820	.word	0x55555555                ! 272
1821
1822	.word	0                         ! 276
1823	.word	LOOPS                     ! 280
1824	.word	0x0000FC00                ! 284
1825
1826	.global	DES_SPtrans
1827	.type	DES_SPtrans,#object
1828	.size	DES_SPtrans,2048
1829.align	64
1830DES_SPtrans:
1831.PIC.DES_SPtrans:
1832	! nibble 0
1833	.word	0x02080800, 0x00080000, 0x02000002, 0x02080802
1834	.word	0x02000000, 0x00080802, 0x00080002, 0x02000002
1835	.word	0x00080802, 0x02080800, 0x02080000, 0x00000802
1836	.word	0x02000802, 0x02000000, 0x00000000, 0x00080002
1837	.word	0x00080000, 0x00000002, 0x02000800, 0x00080800
1838	.word	0x02080802, 0x02080000, 0x00000802, 0x02000800
1839	.word	0x00000002, 0x00000800, 0x00080800, 0x02080002
1840	.word	0x00000800, 0x02000802, 0x02080002, 0x00000000
1841	.word	0x00000000, 0x02080802, 0x02000800, 0x00080002
1842	.word	0x02080800, 0x00080000, 0x00000802, 0x02000800
1843	.word	0x02080002, 0x00000800, 0x00080800, 0x02000002
1844	.word	0x00080802, 0x00000002, 0x02000002, 0x02080000
1845	.word	0x02080802, 0x00080800, 0x02080000, 0x02000802
1846	.word	0x02000000, 0x00000802, 0x00080002, 0x00000000
1847	.word	0x00080000, 0x02000000, 0x02000802, 0x02080800
1848	.word	0x00000002, 0x02080002, 0x00000800, 0x00080802
1849	! nibble 1
1850	.word	0x40108010, 0x00000000, 0x00108000, 0x40100000
1851	.word	0x40000010, 0x00008010, 0x40008000, 0x00108000
1852	.word	0x00008000, 0x40100010, 0x00000010, 0x40008000
1853	.word	0x00100010, 0x40108000, 0x40100000, 0x00000010
1854	.word	0x00100000, 0x40008010, 0x40100010, 0x00008000
1855	.word	0x00108010, 0x40000000, 0x00000000, 0x00100010
1856	.word	0x40008010, 0x00108010, 0x40108000, 0x40000010
1857	.word	0x40000000, 0x00100000, 0x00008010, 0x40108010
1858	.word	0x00100010, 0x40108000, 0x40008000, 0x00108010
1859	.word	0x40108010, 0x00100010, 0x40000010, 0x00000000
1860	.word	0x40000000, 0x00008010, 0x00100000, 0x40100010
1861	.word	0x00008000, 0x40000000, 0x00108010, 0x40008010
1862	.word	0x40108000, 0x00008000, 0x00000000, 0x40000010
1863	.word	0x00000010, 0x40108010, 0x00108000, 0x40100000
1864	.word	0x40100010, 0x00100000, 0x00008010, 0x40008000
1865	.word	0x40008010, 0x00000010, 0x40100000, 0x00108000
1866	! nibble 2
1867	.word	0x04000001, 0x04040100, 0x00000100, 0x04000101
1868	.word	0x00040001, 0x04000000, 0x04000101, 0x00040100
1869	.word	0x04000100, 0x00040000, 0x04040000, 0x00000001
1870	.word	0x04040101, 0x00000101, 0x00000001, 0x04040001
1871	.word	0x00000000, 0x00040001, 0x04040100, 0x00000100
1872	.word	0x00000101, 0x04040101, 0x00040000, 0x04000001
1873	.word	0x04040001, 0x04000100, 0x00040101, 0x04040000
1874	.word	0x00040100, 0x00000000, 0x04000000, 0x00040101
1875	.word	0x04040100, 0x00000100, 0x00000001, 0x00040000
1876	.word	0x00000101, 0x00040001, 0x04040000, 0x04000101
1877	.word	0x00000000, 0x04040100, 0x00040100, 0x04040001
1878	.word	0x00040001, 0x04000000, 0x04040101, 0x00000001
1879	.word	0x00040101, 0x04000001, 0x04000000, 0x04040101
1880	.word	0x00040000, 0x04000100, 0x04000101, 0x00040100
1881	.word	0x04000100, 0x00000000, 0x04040001, 0x00000101
1882	.word	0x04000001, 0x00040101, 0x00000100, 0x04040000
1883	! nibble 3
1884	.word	0x00401008, 0x10001000, 0x00000008, 0x10401008
1885	.word	0x00000000, 0x10400000, 0x10001008, 0x00400008
1886	.word	0x10401000, 0x10000008, 0x10000000, 0x00001008
1887	.word	0x10000008, 0x00401008, 0x00400000, 0x10000000
1888	.word	0x10400008, 0x00401000, 0x00001000, 0x00000008
1889	.word	0x00401000, 0x10001008, 0x10400000, 0x00001000
1890	.word	0x00001008, 0x00000000, 0x00400008, 0x10401000
1891	.word	0x10001000, 0x10400008, 0x10401008, 0x00400000
1892	.word	0x10400008, 0x00001008, 0x00400000, 0x10000008
1893	.word	0x00401000, 0x10001000, 0x00000008, 0x10400000
1894	.word	0x10001008, 0x00000000, 0x00001000, 0x00400008
1895	.word	0x00000000, 0x10400008, 0x10401000, 0x00001000
1896	.word	0x10000000, 0x10401008, 0x00401008, 0x00400000
1897	.word	0x10401008, 0x00000008, 0x10001000, 0x00401008
1898	.word	0x00400008, 0x00401000, 0x10400000, 0x10001008
1899	.word	0x00001008, 0x10000000, 0x10000008, 0x10401000
1900	! nibble 4
1901	.word	0x08000000, 0x00010000, 0x00000400, 0x08010420
1902	.word	0x08010020, 0x08000400, 0x00010420, 0x08010000
1903	.word	0x00010000, 0x00000020, 0x08000020, 0x00010400
1904	.word	0x08000420, 0x08010020, 0x08010400, 0x00000000
1905	.word	0x00010400, 0x08000000, 0x00010020, 0x00000420
1906	.word	0x08000400, 0x00010420, 0x00000000, 0x08000020
1907	.word	0x00000020, 0x08000420, 0x08010420, 0x00010020
1908	.word	0x08010000, 0x00000400, 0x00000420, 0x08010400
1909	.word	0x08010400, 0x08000420, 0x00010020, 0x08010000
1910	.word	0x00010000, 0x00000020, 0x08000020, 0x08000400
1911	.word	0x08000000, 0x00010400, 0x08010420, 0x00000000
1912	.word	0x00010420, 0x08000000, 0x00000400, 0x00010020
1913	.word	0x08000420, 0x00000400, 0x00000000, 0x08010420
1914	.word	0x08010020, 0x08010400, 0x00000420, 0x00010000
1915	.word	0x00010400, 0x08010020, 0x08000400, 0x00000420
1916	.word	0x00000020, 0x00010420, 0x08010000, 0x08000020
1917	! nibble 5
1918	.word	0x80000040, 0x00200040, 0x00000000, 0x80202000
1919	.word	0x00200040, 0x00002000, 0x80002040, 0x00200000
1920	.word	0x00002040, 0x80202040, 0x00202000, 0x80000000
1921	.word	0x80002000, 0x80000040, 0x80200000, 0x00202040
1922	.word	0x00200000, 0x80002040, 0x80200040, 0x00000000
1923	.word	0x00002000, 0x00000040, 0x80202000, 0x80200040
1924	.word	0x80202040, 0x80200000, 0x80000000, 0x00002040
1925	.word	0x00000040, 0x00202000, 0x00202040, 0x80002000
1926	.word	0x00002040, 0x80000000, 0x80002000, 0x00202040
1927	.word	0x80202000, 0x00200040, 0x00000000, 0x80002000
1928	.word	0x80000000, 0x00002000, 0x80200040, 0x00200000
1929	.word	0x00200040, 0x80202040, 0x00202000, 0x00000040
1930	.word	0x80202040, 0x00202000, 0x00200000, 0x80002040
1931	.word	0x80000040, 0x80200000, 0x00202040, 0x00000000
1932	.word	0x00002000, 0x80000040, 0x80002040, 0x80202000
1933	.word	0x80200000, 0x00002040, 0x00000040, 0x80200040
1934	! nibble 6
1935	.word	0x00004000, 0x00000200, 0x01000200, 0x01000004
1936	.word	0x01004204, 0x00004004, 0x00004200, 0x00000000
1937	.word	0x01000000, 0x01000204, 0x00000204, 0x01004000
1938	.word	0x00000004, 0x01004200, 0x01004000, 0x00000204
1939	.word	0x01000204, 0x00004000, 0x00004004, 0x01004204
1940	.word	0x00000000, 0x01000200, 0x01000004, 0x00004200
1941	.word	0x01004004, 0x00004204, 0x01004200, 0x00000004
1942	.word	0x00004204, 0x01004004, 0x00000200, 0x01000000
1943	.word	0x00004204, 0x01004000, 0x01004004, 0x00000204
1944	.word	0x00004000, 0x00000200, 0x01000000, 0x01004004
1945	.word	0x01000204, 0x00004204, 0x00004200, 0x00000000
1946	.word	0x00000200, 0x01000004, 0x00000004, 0x01000200
1947	.word	0x00000000, 0x01000204, 0x01000200, 0x00004200
1948	.word	0x00000204, 0x00004000, 0x01004204, 0x01000000
1949	.word	0x01004200, 0x00000004, 0x00004004, 0x01004204
1950	.word	0x01000004, 0x01004200, 0x01004000, 0x00004004
1951	! nibble 7
1952	.word	0x20800080, 0x20820000, 0x00020080, 0x00000000
1953	.word	0x20020000, 0x00800080, 0x20800000, 0x20820080
1954	.word	0x00000080, 0x20000000, 0x00820000, 0x00020080
1955	.word	0x00820080, 0x20020080, 0x20000080, 0x20800000
1956	.word	0x00020000, 0x00820080, 0x00800080, 0x20020000
1957	.word	0x20820080, 0x20000080, 0x00000000, 0x00820000
1958	.word	0x20000000, 0x00800000, 0x20020080, 0x20800080
1959	.word	0x00800000, 0x00020000, 0x20820000, 0x00000080
1960	.word	0x00800000, 0x00020000, 0x20000080, 0x20820080
1961	.word	0x00020080, 0x20000000, 0x00000000, 0x00820000
1962	.word	0x20800080, 0x20020080, 0x20020000, 0x00800080
1963	.word	0x20820000, 0x00000080, 0x00800080, 0x20020000
1964	.word	0x20820080, 0x00800000, 0x20800000, 0x20000080
1965	.word	0x00820000, 0x00020080, 0x20020080, 0x20800000
1966	.word	0x00000080, 0x20820000, 0x00820080, 0x00000000
1967	.word	0x20000000, 0x20800080, 0x00020000, 0x00820080
1968
1969