xref: /freebsd/crypto/openssl/crypto/des/asm/des_enc.m4 (revision 39beb93c)
1!  des_enc.m4
2!  des_enc.S  (generated from des_enc.m4)
3!
4!  UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
5!
6!  Version 1.0. 32-bit version.
7!
8!  June 8, 2000.
9!
10!  Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
11!		by Andy Polyakov.
12!
13!  January 1, 2003.
14!
15!  Assembler version: Copyright Svend Olaf Mikkelsen.
16!
17!  Original C code: Copyright Eric A. Young.
18!
19!  This code can be freely used by LibDES/SSLeay/OpenSSL users.
20!
21!  The LibDES/SSLeay/OpenSSL copyright notices must be respected.
22!
23!  This version can be redistributed.
24!
25!  To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
26!
27!  Global registers 1 to 5 are used. This is the same as done by the
28!  cc compiler. The UltraSPARC load/store little endian feature is used.
29!
30!  Instruction grouping often refers to one CPU cycle.
31!
32!  Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
33!
34!  Assemble through cc:  cc -c -xarch=v8plusa -o des_enc.o des_enc.S
35!
36!  Performance improvement according to './apps/openssl speed des'
37!
38!	32-bit build:
39!		23%  faster than cc-5.2 -xarch=v8plus -xO5
40!		115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
41!	64-bit build:
42!		50%  faster than cc-5.2 -xarch=v9 -xO5
43!		100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
44!
45
46.ident "des_enc.m4 2.1"
47
48#if defined(__SUNPRO_C) && defined(__sparcv9)
49# define ABI64  /* They've said -xarch=v9 at command line */
50#elif defined(__GNUC__) && defined(__arch64__)
51# define ABI64  /* They've said -m64 at command line */
52#endif
53
54#ifdef ABI64
55  .register	%g2,#scratch
56  .register	%g3,#scratch
57# define	FRAME	-192
58# define	BIAS	2047
59# define	LDPTR	ldx
60# define	STPTR	stx
61# define	ARG0	128
62# define	ARGSZ	8
63# ifndef OPENSSL_SYSNAME_ULTRASPARC
64# define OPENSSL_SYSNAME_ULTRASPARC
65# endif
66#else
67# define	FRAME	-96
68# define	BIAS	0
69# define	LDPTR	ld
70# define	STPTR	st
71# define	ARG0	68
72# define	ARGSZ	4
73#endif
74
75#define LOOPS 7
76
77#define global0 %g0
78#define global1 %g1
79#define global2 %g2
80#define global3 %g3
81#define global4 %g4
82#define global5 %g5
83
84#define local0 %l0
85#define local1 %l1
86#define local2 %l2
87#define local3 %l3
88#define local4 %l4
89#define local5 %l5
90#define local7 %l6
91#define local6 %l7
92
93#define in0 %i0
94#define in1 %i1
95#define in2 %i2
96#define in3 %i3
97#define in4 %i4
98#define in5 %i5
99#define in6 %i6
100#define in7 %i7
101
102#define out0 %o0
103#define out1 %o1
104#define out2 %o2
105#define out3 %o3
106#define out4 %o4
107#define out5 %o5
108#define out6 %o6
109#define out7 %o7
110
111#define stub stb
112
113changequote({,})
114
115
116! Macro definitions:
117
118
119! {ip_macro}
120!
121! The logic used in initial and final permutations is the same as in
122! the C code. The permutations are done with a clever shift, xor, and
123! technique.
124!
125! The macro also loads address sbox 1 to 5 to global 1 to 5, address
126! sbox 6 to local6, and addres sbox 8 to out3.
127!
128! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
129!
130! Loads key first round from address in parameter 5 to out0, out1.
131!
132! After the the original LibDES initial permutation, the resulting left
133! is in the variable initially used for right and vice versa. The macro
134! implements the possibility to keep the halfs in the original registers.
135!
136! parameter 1  left
137! parameter 2  right
138! parameter 3  result left (modify in first round)
139! parameter 4  result right (use in first round)
140! parameter 5  key address
141! parameter 6  1/2 for include encryption/decryption
142! parameter 7  1 for move in1 to in3
143! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
144! parameter 9  1 for load ks3 and ks2 to in4 and in3
145
146define(ip_macro, {
147
148! {ip_macro}
149! $1 $2 $4 $3 $5 $6 $7 $8 $9
150
151	ld	[out2+256], local1
152	srl	$2, 4, local4
153
154	xor	local4, $1, local4
155	ifelse($7,1,{mov in1, in3},{nop})
156
157	ld	[out2+260], local2
158	and	local4, local1, local4
159	ifelse($8,1,{mov in3, in4},{})
160	ifelse($8,2,{mov in4, in3},{})
161
162	ld	[out2+280], out4          ! loop counter
163	sll	local4, 4, local1
164	xor	$1, local4, $1
165
166	ld	[out2+264], local3
167	srl	$1, 16, local4
168	xor	$2, local1, $2
169
170	ifelse($9,1,{LDPTR	KS3, in4},{})
171	xor	local4, $2, local4
172	nop	!sethi	%hi(DES_SPtrans), global1 ! sbox addr
173
174	ifelse($9,1,{LDPTR	KS2, in3},{})
175	and	local4, local2, local4
176	nop	!or	global1, %lo(DES_SPtrans), global1   ! sbox addr
177
178	sll	local4, 16, local1
179	xor	$2, local4, $2
180
181	srl	$2, 2, local4
182	xor	$1, local1, $1
183
184	sethi	%hi(16711680), local5
185	xor	local4, $1, local4
186
187	and	local4, local3, local4
188	or	local5, 255, local5
189
190	sll	local4, 2, local2
191	xor	$1, local4, $1
192
193	srl	$1, 8, local4
194	xor	$2, local2, $2
195
196	xor	local4, $2, local4
197	add	global1, 768, global4
198
199	and	local4, local5, local4
200	add	global1, 1024, global5
201
202	ld	[out2+272], local7
203	sll	local4, 8, local1
204	xor	$2, local4, $2
205
206	srl	$2, 1, local4
207	xor	$1, local1, $1
208
209	ld	[$5], out0                ! key 7531
210	xor	local4, $1, local4
211	add	global1, 256, global2
212
213	ld	[$5+4], out1              ! key 8642
214	and	local4, local7, local4
215	add	global1, 512, global3
216
217	sll	local4, 1, local1
218	xor	$1, local4, $1
219
220	sll	$1, 3, local3
221	xor	$2, local1, $2
222
223	sll	$2, 3, local2
224	add	global1, 1280, local6     ! address sbox 8
225
226	srl	$1, 29, local4
227	add	global1, 1792, out3       ! address sbox 8
228
229	srl	$2, 29, local1
230	or	local4, local3, $4
231
232	or	local2, local1, $3
233
234	ifelse($6, 1, {
235
236		ld	[out2+284], local5     ! 0x0000FC00 used in the rounds
237		or	local2, local1, $3
238		xor	$4, out0, local1
239
240		call .des_enc.1
241		and	local1, 252, local1
242
243	},{})
244
245	ifelse($6, 2, {
246
247		ld	[out2+284], local5     ! 0x0000FC00 used in the rounds
248		or	local2, local1, $3
249		xor	$4, out0, local1
250
251		call .des_dec.1
252		and	local1, 252, local1
253
254	},{})
255})
256
257
258! {rounds_macro}
259!
260! The logic used in the DES rounds is the same as in the C code,
261! except that calculations for sbox 1 and sbox 5 begin before
262! the previous round is finished.
263!
264! In each round one half (work) is modified based on key and the
265! other half (use).
266!
267! In this version we do two rounds in a loop repeated 7 times
268! and two rounds seperately.
269!
270! One half has the bits for the sboxes in the following positions:
271!
272!	777777xx555555xx333333xx111111xx
273!
274!	88xx666666xx444444xx222222xx8888
275!
276! The bits for each sbox are xor-ed with the key bits for that box.
277! The above xx bits are cleared, and the result used for lookup in
278! the sbox table. Each sbox entry contains the 4 output bits permuted
279! into 32 bits according to the P permutation.
280!
281! In the description of DES, left and right are switched after
282! each round, except after last round. In this code the original
283! left and right are kept in the same register in all rounds, meaning
284! that after the 16 rounds the result for right is in the register
285! originally used for left.
286!
287! parameter 1  first work (left in first round)
288! parameter 2  first use (right in first round)
289! parameter 3  enc/dec  1/-1
290! parameter 4  loop label
291! parameter 5  key address register
292! parameter 6  optional address for key next encryption/decryption
293! parameter 7  not empty for include retl
294!
295! also compares in2 to 8
296
297define(rounds_macro, {
298
299! {rounds_macro}
300! $1 $2 $3 $4 $5 $6 $7 $8 $9
301
302	xor	$2, out0, local1
303
304	ld	[out2+284], local5        ! 0x0000FC00
305	ba	$4
306	and	local1, 252, local1
307
308	.align 32
309
310$4:
311	! local6 is address sbox 6
312	! out3   is address sbox 8
313	! out4   is loop counter
314
315	ld	[global1+local1], local1
316	xor	$2, out1, out1            ! 8642
317	xor	$2, out0, out0            ! 7531
318	fmovs	%f0, %f0                  ! fxor used for alignment
319
320	srl	out1, 4, local0           ! rotate 4 right
321	and	out0, local5, local3      ! 3
322	fmovs	%f0, %f0
323
324	ld	[$5+$3*8], local7         ! key 7531 next round
325	srl	local3, 8, local3         ! 3
326	and	local0, 252, local2       ! 2
327	fmovs	%f0, %f0
328
329	ld	[global3+local3],local3   ! 3
330	sll	out1, 28, out1            ! rotate
331	xor	$1, local1, $1            ! 1 finished, local1 now sbox 7
332
333	ld	[global2+local2], local2  ! 2
334	srl	out0, 24, local1          ! 7
335	or	out1, local0, out1        ! rotate
336
337	ldub	[out2+local1], local1     ! 7 (and 0xFC)
338	srl	out1, 24, local0          ! 8
339	and	out1, local5, local4      ! 4
340
341	ldub	[out2+local0], local0     ! 8 (and 0xFC)
342	srl	local4, 8, local4         ! 4
343	xor	$1, local2, $1            ! 2 finished local2 now sbox 6
344
345	ld	[global4+local4],local4   ! 4
346	srl	out1, 16, local2          ! 6
347	xor	$1, local3, $1            ! 3 finished local3 now sbox 5
348
349	ld	[out3+local0],local0      ! 8
350	and	local2, 252, local2       ! 6
351	add	global1, 1536, local5     ! address sbox 7
352
353	ld	[local6+local2], local2   ! 6
354	srl	out0, 16, local3          ! 5
355	xor	$1, local4, $1            ! 4 finished
356
357	ld	[local5+local1],local1    ! 7
358	and	local3, 252, local3       ! 5
359	xor	$1, local0, $1            ! 8 finished
360
361	ld	[global5+local3],local3   ! 5
362	xor	$1, local2, $1            ! 6 finished
363	subcc	out4, 1, out4
364
365	ld	[$5+$3*8+4], out0         ! key 8642 next round
366	xor	$1, local7, local2        ! sbox 5 next round
367	xor	$1, local1, $1            ! 7 finished
368
369	srl	local2, 16, local2        ! sbox 5 next round
370	xor	$1, local3, $1            ! 5 finished
371
372	ld	[$5+$3*16+4], out1        ! key 8642 next round again
373	and	local2, 252, local2       ! sbox5 next round
374! next round
375	xor	$1, local7, local7        ! 7531
376
377	ld	[global5+local2], local2  ! 5
378	srl	local7, 24, local3        ! 7
379	xor	$1, out0, out0            ! 8642
380
381	ldub	[out2+local3], local3     ! 7 (and 0xFC)
382	srl	out0, 4, local0           ! rotate 4 right
383	and	local7, 252, local1       ! 1
384
385	sll	out0, 28, out0            ! rotate
386	xor	$2, local2, $2            ! 5 finished local2 used
387
388	srl	local0, 8, local4         ! 4
389	and	local0, 252, local2       ! 2
390	ld	[local5+local3], local3   ! 7
391
392	srl	local0, 16, local5        ! 6
393	or	out0, local0, out0        ! rotate
394	ld	[global2+local2], local2  ! 2
395
396	srl	out0, 24, local0
397	ld	[$5+$3*16], out0          ! key 7531 next round
398	and	local4, 252, local4	  ! 4
399
400	and	local5, 252, local5       ! 6
401	ld	[global4+local4], local4  ! 4
402	xor	$2, local3, $2            ! 7 finished local3 used
403
404	and	local0, 252, local0       ! 8
405	ld	[local6+local5], local5   ! 6
406	xor	$2, local2, $2            ! 2 finished local2 now sbox 3
407
408	srl	local7, 8, local2         ! 3 start
409	ld	[out3+local0], local0     ! 8
410	xor	$2, local4, $2            ! 4 finished
411
412	and	local2, 252, local2       ! 3
413	ld	[global1+local1], local1  ! 1
414	xor	$2, local5, $2            ! 6 finished local5 used
415
416	ld	[global3+local2], local2  ! 3
417	xor	$2, local0, $2            ! 8 finished
418	add	$5, $3*16, $5             ! enc add 8, dec add -8 to key pointer
419
420	ld	[out2+284], local5        ! 0x0000FC00
421	xor	$2, out0, local4          ! sbox 1 next round
422	xor	$2, local1, $2            ! 1 finished
423
424	xor	$2, local2, $2            ! 3 finished
425#ifdef OPENSSL_SYSNAME_ULTRASPARC
426	bne,pt	%icc, $4
427#else
428	bne	$4
429#endif
430	and	local4, 252, local1       ! sbox 1 next round
431
432! two rounds more:
433
434	ld	[global1+local1], local1
435	xor	$2, out1, out1
436	xor	$2, out0, out0
437
438	srl	out1, 4, local0           ! rotate
439	and	out0, local5, local3
440
441	ld	[$5+$3*8], local7         ! key 7531
442	srl	local3, 8, local3
443	and	local0, 252, local2
444
445	ld	[global3+local3],local3
446	sll	out1, 28, out1            ! rotate
447	xor	$1, local1, $1            ! 1 finished, local1 now sbox 7
448
449	ld	[global2+local2], local2
450	srl	out0, 24, local1
451	or	out1, local0, out1        ! rotate
452
453	ldub	[out2+local1], local1
454	srl	out1, 24, local0
455	and	out1, local5, local4
456
457	ldub	[out2+local0], local0
458	srl	local4, 8, local4
459	xor	$1, local2, $1            ! 2 finished local2 now sbox 6
460
461	ld	[global4+local4],local4
462	srl	out1, 16, local2
463	xor	$1, local3, $1            ! 3 finished local3 now sbox 5
464
465	ld	[out3+local0],local0
466	and	local2, 252, local2
467	add	global1, 1536, local5     ! address sbox 7
468
469	ld	[local6+local2], local2
470	srl	out0, 16, local3
471	xor	$1, local4, $1            ! 4 finished
472
473	ld	[local5+local1],local1
474	and	local3, 252, local3
475	xor	$1, local0, $1
476
477	ld	[global5+local3],local3
478	xor	$1, local2, $1            ! 6 finished
479	cmp	in2, 8
480
481	ifelse($6,{}, {}, {ld	[out2+280], out4})  ! loop counter
482	xor	$1, local7, local2        ! sbox 5 next round
483	xor	$1, local1, $1            ! 7 finished
484
485	ld	[$5+$3*8+4], out0
486	srl	local2, 16, local2        ! sbox 5 next round
487	xor	$1, local3, $1            ! 5 finished
488
489	and	local2, 252, local2
490! next round (two rounds more)
491	xor	$1, local7, local7        ! 7531
492
493	ld	[global5+local2], local2
494	srl	local7, 24, local3
495	xor	$1, out0, out0            ! 8642
496
497	ldub	[out2+local3], local3
498	srl	out0, 4, local0           ! rotate
499	and	local7, 252, local1
500
501	sll	out0, 28, out0            ! rotate
502	xor	$2, local2, $2            ! 5 finished local2 used
503
504	srl	local0, 8, local4
505	and	local0, 252, local2
506	ld	[local5+local3], local3
507
508	srl	local0, 16, local5
509	or	out0, local0, out0        ! rotate
510	ld	[global2+local2], local2
511
512	srl	out0, 24, local0
513	ifelse($6,{}, {}, {ld	[$6], out0})   ! key next encryption/decryption
514	and	local4, 252, local4
515
516	and	local5, 252, local5
517	ld	[global4+local4], local4
518	xor	$2, local3, $2            ! 7 finished local3 used
519
520	and	local0, 252, local0
521	ld	[local6+local5], local5
522	xor	$2, local2, $2            ! 2 finished local2 now sbox 3
523
524	srl	local7, 8, local2         ! 3 start
525	ld	[out3+local0], local0
526	xor	$2, local4, $2
527
528	and	local2, 252, local2
529	ld	[global1+local1], local1
530	xor	$2, local5, $2            ! 6 finished local5 used
531
532	ld	[global3+local2], local2
533	srl	$1, 3, local3
534	xor	$2, local0, $2
535
536	ifelse($6,{}, {}, {ld	[$6+4], out1}) ! key next encryption/decryption
537	sll	$1, 29, local4
538	xor	$2, local1, $2
539
540	ifelse($7,{}, {}, {retl})
541	xor	$2, local2, $2
542})
543
544
545! {fp_macro}
546!
547!  parameter 1   right (original left)
548!  parameter 2   left (original right)
549!  parameter 3   1 for optional store to [in0]
550!  parameter 4   1 for load input/output address to local5/7
551!
552!  The final permutation logic switches the halfes, meaning that
553!  left and right ends up the the registers originally used.
554
555define(fp_macro, {
556
557! {fp_macro}
558! $1 $2 $3 $4 $5 $6 $7 $8 $9
559
560	! initially undo the rotate 3 left done after initial permutation
561	! original left is received shifted 3 right and 29 left in local3/4
562
563	sll	$2, 29, local1
564	or	local3, local4, $1
565
566	srl	$2, 3, $2
567	sethi	%hi(0x55555555), local2
568
569	or	$2, local1, $2
570	or	local2, %lo(0x55555555), local2
571
572	srl	$2, 1, local3
573	sethi	%hi(0x00ff00ff), local1
574	xor	local3, $1, local3
575	or	local1, %lo(0x00ff00ff), local1
576	and	local3, local2, local3
577	sethi	%hi(0x33333333), local4
578	sll	local3, 1, local2
579
580	xor	$1, local3, $1
581
582	srl	$1, 8, local3
583	xor	$2, local2, $2
584	xor	local3, $2, local3
585	or	local4, %lo(0x33333333), local4
586	and	local3, local1, local3
587	sethi	%hi(0x0000ffff), local1
588	sll	local3, 8, local2
589
590	xor	$2, local3, $2
591
592	srl	$2, 2, local3
593	xor	$1, local2, $1
594	xor	local3, $1, local3
595	or	local1, %lo(0x0000ffff), local1
596	and	local3, local4, local3
597	sethi	%hi(0x0f0f0f0f), local4
598	sll	local3, 2, local2
599
600	ifelse($4,1, {LDPTR INPUT, local5})
601	xor	$1, local3, $1
602
603	ifelse($4,1, {LDPTR OUTPUT, local7})
604	srl	$1, 16, local3
605	xor	$2, local2, $2
606	xor	local3, $2, local3
607	or	local4, %lo(0x0f0f0f0f), local4
608	and	local3, local1, local3
609	sll	local3, 16, local2
610
611	xor	$2, local3, local1
612
613	srl	local1, 4, local3
614	xor	$1, local2, $1
615	xor	local3, $1, local3
616	and	local3, local4, local3
617	sll	local3, 4, local2
618
619	xor	$1, local3, $1
620
621	! optional store:
622
623	ifelse($3,1, {st $1, [in0]})
624
625	xor	local1, local2, $2
626
627	ifelse($3,1, {st $2, [in0+4]})
628
629})
630
631
632! {fp_ip_macro}
633!
634! Does initial permutation for next block mixed with
635! final permutation for current block.
636!
637! parameter 1   original left
638! parameter 2   original right
639! parameter 3   left ip
640! parameter 4   right ip
641! parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
642!                2: mov in4 to in3
643!
644! also adds -8 to length in2 and loads loop counter to out4
645
646define(fp_ip_macro, {
647
648! {fp_ip_macro}
649! $1 $2 $3 $4 $5 $6 $7 $8 $9
650
651	define({temp1},{out4})
652	define({temp2},{local3})
653
654	define({ip1},{local1})
655	define({ip2},{local2})
656	define({ip4},{local4})
657	define({ip5},{local5})
658
659	! $1 in local3, local4
660
661	ld	[out2+256], ip1
662	sll	out5, 29, temp1
663	or	local3, local4, $1
664
665	srl	out5, 3, $2
666	ifelse($5,2,{mov in4, in3})
667
668	ld	[out2+272], ip5
669	srl	$4, 4, local0
670	or	$2, temp1, $2
671
672	srl	$2, 1, temp1
673	xor	temp1, $1, temp1
674
675	and	temp1, ip5, temp1
676	xor	local0, $3, local0
677
678	sll	temp1, 1, temp2
679	xor	$1, temp1, $1
680
681	and	local0, ip1, local0
682	add	in2, -8, in2
683
684	sll	local0, 4, local7
685	xor	$3, local0, $3
686
687	ld	[out2+268], ip4
688	srl	$1, 8, temp1
689	xor	$2, temp2, $2
690	ld	[out2+260], ip2
691	srl	$3, 16, local0
692	xor	$4, local7, $4
693	xor	temp1, $2, temp1
694	xor	local0, $4, local0
695	and	temp1, ip4, temp1
696	and	local0, ip2, local0
697	sll	temp1, 8, temp2
698	xor	$2, temp1, $2
699	sll	local0, 16, local7
700	xor	$4, local0, $4
701
702	srl	$2, 2, temp1
703	xor	$1, temp2, $1
704
705	ld	[out2+264], temp2         ! ip3
706	srl	$4, 2, local0
707	xor	$3, local7, $3
708	xor	temp1, $1, temp1
709	xor	local0, $3, local0
710	and	temp1, temp2, temp1
711	and	local0, temp2, local0
712	sll	temp1, 2, temp2
713	xor	$1, temp1, $1
714	sll	local0, 2, local7
715	xor	$3, local0, $3
716
717	srl	$1, 16, temp1
718	xor	$2, temp2, $2
719	srl	$3, 8, local0
720	xor	$4, local7, $4
721	xor	temp1, $2, temp1
722	xor	local0, $4, local0
723	and	temp1, ip2, temp1
724	and	local0, ip4, local0
725	sll	temp1, 16, temp2
726	xor	$2, temp1, local4
727	sll	local0, 8, local7
728	xor	$4, local0, $4
729
730	srl	$4, 1, local0
731	xor	$3, local7, $3
732
733	srl	local4, 4, temp1
734	xor	local0, $3, local0
735
736	xor	$1, temp2, $1
737	and	local0, ip5, local0
738
739	sll	local0, 1, local7
740	xor	temp1, $1, temp1
741
742	xor	$3, local0, $3
743	xor	$4, local7, $4
744
745	sll	$3, 3, local5
746	and	temp1, ip1, temp1
747
748	sll	temp1, 4, temp2
749	xor	$1, temp1, $1
750
751	ifelse($5,1,{LDPTR	KS2, in4})
752	sll	$4, 3, local2
753	xor	local4, temp2, $2
754
755	! reload since used as temporar:
756
757	ld	[out2+280], out4          ! loop counter
758
759	srl	$3, 29, local0
760	ifelse($5,1,{add in4, 120, in4})
761
762	ifelse($5,1,{LDPTR	KS1, in3})
763	srl	$4, 29, local7
764
765	or	local0, local5, $4
766	or	local2, local7, $3
767
768})
769
770
771
772! {load_little_endian}
773!
774! parameter 1  address
775! parameter 2  destination left
776! parameter 3  destination right
777! parameter 4  temporar
778! parameter 5  label
779
780define(load_little_endian, {
781
782! {load_little_endian}
783! $1 $2 $3 $4 $5 $6 $7 $8 $9
784
785	! first in memory to rightmost in register
786
787#ifdef OPENSSL_SYSNAME_ULTRASPARC
788	andcc	$1, 3, global0
789	bne,pn	%icc, $5
790	nop
791
792	lda	[$1] 0x88, $2
793	add	$1, 4, $4
794
795	ba,pt	%icc, $5a
796	lda	[$4] 0x88, $3
797#endif
798
799$5:
800	ldub	[$1+3], $2
801
802	ldub	[$1+2], $4
803	sll	$2, 8, $2
804	or	$2, $4, $2
805
806	ldub	[$1+1], $4
807	sll	$2, 8, $2
808	or	$2, $4, $2
809
810	ldub	[$1+0], $4
811	sll	$2, 8, $2
812	or	$2, $4, $2
813
814
815	ldub	[$1+3+4], $3
816
817	ldub	[$1+2+4], $4
818	sll	$3, 8, $3
819	or	$3, $4, $3
820
821	ldub	[$1+1+4], $4
822	sll	$3, 8, $3
823	or	$3, $4, $3
824
825	ldub	[$1+0+4], $4
826	sll	$3, 8, $3
827	or	$3, $4, $3
828$5a:
829
830})
831
832
833! {load_little_endian_inc}
834!
835! parameter 1  address
836! parameter 2  destination left
837! parameter 3  destination right
838! parameter 4  temporar
839! parameter 4  label
840!
841! adds 8 to address
842
843define(load_little_endian_inc, {
844
845! {load_little_endian_inc}
846! $1 $2 $3 $4 $5 $6 $7 $8 $9
847
848	! first in memory to rightmost in register
849
850#ifdef OPENSSL_SYSNAME_ULTRASPARC
851	andcc	$1, 3, global0
852	bne,pn	%icc, $5
853	nop
854
855	lda	[$1] 0x88, $2
856	add	$1, 4, $1
857
858	lda	[$1] 0x88, $3
859	ba,pt	%icc, $5a
860	add	$1, 4, $1
861#endif
862
863$5:
864	ldub	[$1+3], $2
865
866	ldub	[$1+2], $4
867	sll	$2, 8, $2
868	or	$2, $4, $2
869
870	ldub	[$1+1], $4
871	sll	$2, 8, $2
872	or	$2, $4, $2
873
874	ldub	[$1+0], $4
875	sll	$2, 8, $2
876	or	$2, $4, $2
877
878	ldub	[$1+3+4], $3
879	add	$1, 8, $1
880
881	ldub	[$1+2+4-8], $4
882	sll	$3, 8, $3
883	or	$3, $4, $3
884
885	ldub	[$1+1+4-8], $4
886	sll	$3, 8, $3
887	or	$3, $4, $3
888
889	ldub	[$1+0+4-8], $4
890	sll	$3, 8, $3
891	or	$3, $4, $3
892$5a:
893
894})
895
896
897! {load_n_bytes}
898!
899! Loads 1 to 7 bytes little endian
900! Remaining bytes are zeroed.
901!
902! parameter 1  address
903! parameter 2  length
904! parameter 3  destination register left
905! parameter 4  destination register right
906! parameter 5  temp
907! parameter 6  temp2
908! parameter 7  label
909! parameter 8  return label
910
911define(load_n_bytes, {
912
913! {load_n_bytes}
914! $1 $2 $5 $6 $7 $8 $7 $8 $9
915
916$7.0:	call	.+8
917	sll	$2, 2, $6
918
919	add	%o7,$7.jmp.table-$7.0,$5
920
921	add	$5, $6, $5
922	mov	0, $4
923
924	ld	[$5], $5
925
926	jmp	%o7+$5
927	mov	0, $3
928
929$7.7:
930	ldub	[$1+6], $5
931	sll	$5, 16, $5
932	or	$3, $5, $3
933$7.6:
934	ldub	[$1+5], $5
935	sll	$5, 8, $5
936	or	$3, $5, $3
937$7.5:
938	ldub	[$1+4], $5
939	or	$3, $5, $3
940$7.4:
941	ldub	[$1+3], $5
942	sll	$5, 24, $5
943	or	$4, $5, $4
944$7.3:
945	ldub	[$1+2], $5
946	sll	$5, 16, $5
947	or	$4, $5, $4
948$7.2:
949	ldub	[$1+1], $5
950	sll	$5, 8, $5
951	or	$4, $5, $4
952$7.1:
953	ldub	[$1+0], $5
954	ba	$8
955	or	$4, $5, $4
956
957	.align 4
958
959$7.jmp.table:
960	.word	0
961	.word	$7.1-$7.0
962	.word	$7.2-$7.0
963	.word	$7.3-$7.0
964	.word	$7.4-$7.0
965	.word	$7.5-$7.0
966	.word	$7.6-$7.0
967	.word	$7.7-$7.0
968})
969
970
971! {store_little_endian}
972!
973! parameter 1  address
974! parameter 2  source left
975! parameter 3  source right
976! parameter 4  temporar
977
978define(store_little_endian, {
979
980! {store_little_endian}
981! $1 $2 $3 $4 $5 $6 $7 $8 $9
982
983	! rightmost in register to first in memory
984
985#ifdef OPENSSL_SYSNAME_ULTRASPARC
986	andcc	$1, 3, global0
987	bne,pn	%icc, $5
988	nop
989
990	sta	$2, [$1] 0x88
991	add	$1, 4, $4
992
993	ba,pt	%icc, $5a
994	sta	$3, [$4] 0x88
995#endif
996
997$5:
998	and	$2, 255, $4
999	stub	$4, [$1+0]
1000
1001	srl	$2, 8, $4
1002	and	$4, 255, $4
1003	stub	$4, [$1+1]
1004
1005	srl	$2, 16, $4
1006	and	$4, 255, $4
1007	stub	$4, [$1+2]
1008
1009	srl	$2, 24, $4
1010	stub	$4, [$1+3]
1011
1012
1013	and	$3, 255, $4
1014	stub	$4, [$1+0+4]
1015
1016	srl	$3, 8, $4
1017	and	$4, 255, $4
1018	stub	$4, [$1+1+4]
1019
1020	srl	$3, 16, $4
1021	and	$4, 255, $4
1022	stub	$4, [$1+2+4]
1023
1024	srl	$3, 24, $4
1025	stub	$4, [$1+3+4]
1026
1027$5a:
1028
1029})
1030
1031
1032! {store_n_bytes}
1033!
1034! Stores 1 to 7 bytes little endian
1035!
1036! parameter 1  address
1037! parameter 2  length
1038! parameter 3  source register left
1039! parameter 4  source register right
1040! parameter 5  temp
1041! parameter 6  temp2
1042! parameter 7  label
1043! parameter 8  return label
1044
1045define(store_n_bytes, {
1046
1047! {store_n_bytes}
1048! $1 $2 $5 $6 $7 $8 $7 $8 $9
1049
1050$7.0:	call	.+8
1051	sll	$2, 2, $6
1052
1053	add	%o7,$7.jmp.table-$7.0,$5
1054
1055	add	$5, $6, $5
1056
1057	ld	[$5], $5
1058
1059	jmp	%o7+$5
1060	nop
1061
1062$7.7:
1063	srl	$3, 16, $5
1064	and	$5, 0xff, $5
1065	stub	$5, [$1+6]
1066$7.6:
1067	srl	$3, 8, $5
1068	and	$5, 0xff, $5
1069	stub	$5, [$1+5]
1070$7.5:
1071	and	$3, 0xff, $5
1072	stub	$5, [$1+4]
1073$7.4:
1074	srl	$4, 24, $5
1075	stub	$5, [$1+3]
1076$7.3:
1077	srl	$4, 16, $5
1078	and	$5, 0xff, $5
1079	stub	$5, [$1+2]
1080$7.2:
1081	srl	$4, 8, $5
1082	and	$5, 0xff, $5
1083	stub	$5, [$1+1]
1084$7.1:
1085	and	$4, 0xff, $5
1086
1087
1088	ba	$8
1089	stub	$5, [$1]
1090
1091	.align 4
1092
1093$7.jmp.table:
1094
1095	.word	0
1096	.word	$7.1-$7.0
1097	.word	$7.2-$7.0
1098	.word	$7.3-$7.0
1099	.word	$7.4-$7.0
1100	.word	$7.5-$7.0
1101	.word	$7.6-$7.0
1102	.word	$7.7-$7.0
1103})
1104
1105
1106define(testvalue,{1})
1107
1108define(register_init, {
1109
1110! For test purposes:
1111
1112	sethi	%hi(testvalue), local0
1113	or	local0, %lo(testvalue), local0
1114
1115	ifelse($1,{},{}, {mov	local0, $1})
1116	ifelse($2,{},{}, {mov	local0, $2})
1117	ifelse($3,{},{}, {mov	local0, $3})
1118	ifelse($4,{},{}, {mov	local0, $4})
1119	ifelse($5,{},{}, {mov	local0, $5})
1120	ifelse($6,{},{}, {mov	local0, $6})
1121	ifelse($7,{},{}, {mov	local0, $7})
1122	ifelse($8,{},{}, {mov	local0, $8})
1123
1124	mov	local0, local1
1125	mov	local0, local2
1126	mov	local0, local3
1127	mov	local0, local4
1128	mov	local0, local5
1129	mov	local0, local7
1130	mov	local0, local6
1131	mov	local0, out0
1132	mov	local0, out1
1133	mov	local0, out2
1134	mov	local0, out3
1135	mov	local0, out4
1136	mov	local0, out5
1137	mov	local0, global1
1138	mov	local0, global2
1139	mov	local0, global3
1140	mov	local0, global4
1141	mov	local0, global5
1142
1143})
1144
1145.section	".text"
1146
1147	.align 32
1148
1149.des_enc:
1150
1151	! key address in3
1152	! loads key next encryption/decryption first round from [in4]
1153
1154	rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1155
1156
1157	.align 32
1158
1159.des_dec:
1160
1161	! implemented with out5 as first parameter to avoid
1162	! register exchange in ede modes
1163
1164	! key address in4
1165	! loads key next encryption/decryption first round from [in3]
1166
1167	rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1168
1169
1170
1171! void DES_encrypt1(data, ks, enc)
1172! *******************************
1173
1174	.align 32
1175	.global DES_encrypt1
1176	.type	 DES_encrypt1,#function
1177
1178DES_encrypt1:
1179
1180	save	%sp, FRAME, %sp
1181
1182	call	.PIC.me.up
1183	mov	.PIC.me.up-(.-4),out0
1184
1185	ld	[in0], in5                ! left
1186	cmp	in2, 0                    ! enc
1187
1188#ifdef OPENSSL_SYSNAME_ULTRASPARC
1189	be,pn	%icc, .encrypt.dec        ! enc/dec
1190#else
1191	be	.encrypt.dec
1192#endif
1193	ld	[in0+4], out5             ! right
1194
1195	! parameter 6  1/2 for include encryption/decryption
1196	! parameter 7  1 for move in1 to in3
1197	! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
1198
1199	ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1200
1201	rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1202
1203	fp_macro(in5, out5, 1)            ! 1 for store to [in0]
1204
1205	ret
1206	restore
1207
1208.encrypt.dec:
1209
1210	add	in1, 120, in3             ! use last subkey for first round
1211
1212	! parameter 6  1/2 for include encryption/decryption
1213	! parameter 7  1 for move in1 to in3
1214	! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
1215
1216	ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec,  ks in4
1217
1218	fp_macro(out5, in5, 1)            ! 1 for store to [in0]
1219
1220	ret
1221	restore
1222
1223.DES_encrypt1.end:
1224	.size	 DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1225
1226
1227! void DES_encrypt2(data, ks, enc)
1228!*********************************
1229
1230	! encrypts/decrypts without initial/final permutation
1231
1232	.align 32
1233	.global DES_encrypt2
1234	.type	 DES_encrypt2,#function
1235
1236DES_encrypt2:
1237
1238	save	%sp, FRAME, %sp
1239
1240	call	.PIC.me.up
1241	mov	.PIC.me.up-(.-4),out0
1242
1243	! Set sbox address 1 to 6 and rotate halfs 3 left
1244	! Errors caught by destest? Yes. Still? *NO*
1245
1246	!sethi	%hi(DES_SPtrans), global1 ! address sbox 1
1247
1248	!or	global1, %lo(DES_SPtrans), global1  ! sbox 1
1249
1250	add	global1, 256, global2     ! sbox 2
1251	add	global1, 512, global3     ! sbox 3
1252
1253	ld	[in0], out5               ! right
1254	add	global1, 768, global4     ! sbox 4
1255	add	global1, 1024, global5    ! sbox 5
1256
1257	ld	[in0+4], in5              ! left
1258	add	global1, 1280, local6     ! sbox 6
1259	add	global1, 1792, out3       ! sbox 8
1260
1261	! rotate
1262
1263	sll	in5, 3, local5
1264	mov	in1, in3                  ! key address to in3
1265
1266	sll	out5, 3, local7
1267	srl	in5, 29, in5
1268
1269	srl	out5, 29, out5
1270	add	in5, local5, in5
1271
1272	add	out5, local7, out5
1273	cmp	in2, 0
1274
1275	! we use our own stackframe
1276
1277#ifdef OPENSSL_SYSNAME_ULTRASPARC
1278	be,pn	%icc, .encrypt2.dec       ! decryption
1279#else
1280	be	.encrypt2.dec
1281#endif
1282	STPTR	in0, [%sp+BIAS+ARG0+0*ARGSZ]
1283
1284	ld	[in3], out0               ! key 7531 first round
1285	mov	LOOPS, out4               ! loop counter
1286
1287	ld	[in3+4], out1             ! key 8642 first round
1288	sethi	%hi(0x0000FC00), local5
1289
1290	call .des_enc
1291	mov	in3, in4
1292
1293	! rotate
1294	sll	in5, 29, in0
1295	srl	in5, 3, in5
1296	sll	out5, 29, in1
1297	add	in5, in0, in5
1298	srl	out5, 3, out5
1299	LDPTR	[%sp+BIAS+ARG0+0*ARGSZ], in0
1300	add	out5, in1, out5
1301	st	in5, [in0]
1302	st	out5, [in0+4]
1303
1304	ret
1305	restore
1306
1307
1308.encrypt2.dec:
1309
1310	add in3, 120, in4
1311
1312	ld	[in4], out0               ! key 7531 first round
1313	mov	LOOPS, out4               ! loop counter
1314
1315	ld	[in4+4], out1             ! key 8642 first round
1316	sethi	%hi(0x0000FC00), local5
1317
1318	mov	in5, local1               ! left expected in out5
1319	mov	out5, in5
1320
1321	call .des_dec
1322	mov	local1, out5
1323
1324.encrypt2.finish:
1325
1326	! rotate
1327	sll	in5, 29, in0
1328	srl	in5, 3, in5
1329	sll	out5, 29, in1
1330	add	in5, in0, in5
1331	srl	out5, 3, out5
1332	LDPTR	[%sp+BIAS+ARG0+0*ARGSZ], in0
1333	add	out5, in1, out5
1334	st	out5, [in0]
1335	st	in5, [in0+4]
1336
1337	ret
1338	restore
1339
1340.DES_encrypt2.end:
1341	.size	 DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1342
1343
1344! void DES_encrypt3(data, ks1, ks2, ks3)
1345! **************************************
1346
1347	.align 32
1348	.global DES_encrypt3
1349	.type	 DES_encrypt3,#function
1350
1351DES_encrypt3:
1352
1353	save	%sp, FRAME, %sp
1354
1355	call	.PIC.me.up
1356	mov	.PIC.me.up-(.-4),out0
1357
1358	ld	[in0], in5                ! left
1359	add	in2, 120, in4             ! ks2
1360
1361	ld	[in0+4], out5             ! right
1362	mov	in3, in2                  ! save ks3
1363
1364	! parameter 6  1/2 for include encryption/decryption
1365	! parameter 7  1 for mov in1 to in3
1366	! parameter 8  1 for mov in3 to in4
1367	! parameter 9  1 for load ks3 and ks2 to in4 and in3
1368
1369	ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1370
1371	call	.des_dec
1372	mov	in2, in3                  ! preload ks3
1373
1374	call	.des_enc
1375	nop
1376
1377	fp_macro(in5, out5, 1)
1378
1379	ret
1380	restore
1381
1382.DES_encrypt3.end:
1383	.size	 DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1384
1385
1386! void DES_decrypt3(data, ks1, ks2, ks3)
1387! **************************************
1388
1389	.align 32
1390	.global DES_decrypt3
1391	.type	 DES_decrypt3,#function
1392
1393DES_decrypt3:
1394
1395	save	%sp, FRAME, %sp
1396
1397	call	.PIC.me.up
1398	mov	.PIC.me.up-(.-4),out0
1399
1400	ld	[in0], in5                ! left
1401	add	in3, 120, in4             ! ks3
1402
1403	ld	[in0+4], out5             ! right
1404	mov	in2, in3                  ! ks2
1405
1406	! parameter 6  1/2 for include encryption/decryption
1407	! parameter 7  1 for mov in1 to in3
1408	! parameter 8  1 for mov in3 to in4
1409	! parameter 9  1 for load ks3 and ks2 to in4 and in3
1410
1411	ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1412
1413	call	.des_enc
1414	add	in1, 120, in4             ! preload ks1
1415
1416	call	.des_dec
1417	nop
1418
1419	fp_macro(out5, in5, 1)
1420
1421	ret
1422	restore
1423
1424.DES_decrypt3.end:
1425	.size	 DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1426
1427	.align	256
1428	.type	 .des_and,#object
1429	.size	 .des_and,284
1430
1431.des_and:
1432
1433! This table is used for AND 0xFC when it is known that register
1434! bits 8-31 are zero. Makes it possible to do three arithmetic
1435! operations in one cycle.
1436
1437	.byte  0, 0, 0, 0, 4, 4, 4, 4
1438	.byte  8, 8, 8, 8, 12, 12, 12, 12
1439	.byte  16, 16, 16, 16, 20, 20, 20, 20
1440	.byte  24, 24, 24, 24, 28, 28, 28, 28
1441	.byte  32, 32, 32, 32, 36, 36, 36, 36
1442	.byte  40, 40, 40, 40, 44, 44, 44, 44
1443	.byte  48, 48, 48, 48, 52, 52, 52, 52
1444	.byte  56, 56, 56, 56, 60, 60, 60, 60
1445	.byte  64, 64, 64, 64, 68, 68, 68, 68
1446	.byte  72, 72, 72, 72, 76, 76, 76, 76
1447	.byte  80, 80, 80, 80, 84, 84, 84, 84
1448	.byte  88, 88, 88, 88, 92, 92, 92, 92
1449	.byte  96, 96, 96, 96, 100, 100, 100, 100
1450	.byte  104, 104, 104, 104, 108, 108, 108, 108
1451	.byte  112, 112, 112, 112, 116, 116, 116, 116
1452	.byte  120, 120, 120, 120, 124, 124, 124, 124
1453	.byte  128, 128, 128, 128, 132, 132, 132, 132
1454	.byte  136, 136, 136, 136, 140, 140, 140, 140
1455	.byte  144, 144, 144, 144, 148, 148, 148, 148
1456	.byte  152, 152, 152, 152, 156, 156, 156, 156
1457	.byte  160, 160, 160, 160, 164, 164, 164, 164
1458	.byte  168, 168, 168, 168, 172, 172, 172, 172
1459	.byte  176, 176, 176, 176, 180, 180, 180, 180
1460	.byte  184, 184, 184, 184, 188, 188, 188, 188
1461	.byte  192, 192, 192, 192, 196, 196, 196, 196
1462	.byte  200, 200, 200, 200, 204, 204, 204, 204
1463	.byte  208, 208, 208, 208, 212, 212, 212, 212
1464	.byte  216, 216, 216, 216, 220, 220, 220, 220
1465	.byte  224, 224, 224, 224, 228, 228, 228, 228
1466	.byte  232, 232, 232, 232, 236, 236, 236, 236
1467	.byte  240, 240, 240, 240, 244, 244, 244, 244
1468	.byte  248, 248, 248, 248, 252, 252, 252, 252
1469
1470	! 5 numbers for initil/final permutation
1471
1472	.word   0x0f0f0f0f                ! offset 256
1473	.word	0x0000ffff                ! 260
1474	.word	0x33333333                ! 264
1475	.word	0x00ff00ff                ! 268
1476	.word	0x55555555                ! 272
1477
1478	.word	0                         ! 276
1479	.word	LOOPS                     ! 280
1480	.word	0x0000FC00                ! 284
1481.PIC.DES_SPtrans:
1482	.word	%r_disp32(DES_SPtrans)
1483
1484! input:	out0	offset between .PIC.me.up and caller
1485! output:	out0	pointer to .PIC.me.up
1486!		out2	pointer to .des_and
1487!		global1	pointer to DES_SPtrans
1488	.align	32
1489.PIC.me.up:
1490	add	out0,%o7,out0			! pointer to .PIC.me.up
1491#if 1
1492	ld	[out0+(.PIC.DES_SPtrans-.PIC.me.up)],global1
1493	add	global1,(.PIC.DES_SPtrans-.PIC.me.up),global1
1494	add	global1,out0,global1
1495#else
1496# ifdef OPENSSL_PIC
1497	! In case anybody wonders why this code is same for both ABI.
1498	! To start with it is not. Do note LDPTR below. But of course
1499	! you must be wondering why the rest of it does not contain
1500	! things like %hh, %hm and %lm. Well, those are needed only
1501	! if OpenSSL library *itself* will become larger than 4GB,
1502	! which is not going to happen any time soon.
1503	sethi	%hi(DES_SPtrans),global1
1504	or	global1,%lo(DES_SPtrans),global1
1505	sethi	%hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1506	add	global1,out0,global1
1507	add	out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1508	LDPTR	[out2+global1],global1
1509# elif 0
1510	setn	DES_SPtrans,out2,global1	! synthetic instruction !
1511# elif defined(ABI64)
1512	sethi	%hh(DES_SPtrans),out2
1513	or	out2,%hm(DES_SPtrans),out2
1514	sethi	%lm(DES_SPtrans),global1
1515	or	global1,%lo(DES_SPtrans),global1
1516	sllx	out2,32,out2
1517	or	out2,global1,global1
1518# else
1519	sethi	%hi(DES_SPtrans),global1
1520	or	global1,%lo(DES_SPtrans),global1
1521# endif
1522#endif
1523	retl
1524	add	out0,.des_and-.PIC.me.up,out2
1525
1526! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1527! *****************************************************************
1528
1529
1530	.align 32
1531	.global DES_ncbc_encrypt
1532	.type	 DES_ncbc_encrypt,#function
1533
1534DES_ncbc_encrypt:
1535
1536	save	%sp, FRAME, %sp
1537
1538	define({INPUT},  { [%sp+BIAS+ARG0+0*ARGSZ] })
1539	define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1540	define({IVEC},   { [%sp+BIAS+ARG0+4*ARGSZ] })
1541
1542	call	.PIC.me.up
1543	mov	.PIC.me.up-(.-4),out0
1544
1545	cmp	in5, 0                    ! enc
1546
1547#ifdef OPENSSL_SYSNAME_ULTRASPARC
1548	be,pn	%icc, .ncbc.dec
1549#else
1550	be	.ncbc.dec
1551#endif
1552	STPTR	in4, IVEC
1553
1554	! addr  left  right  temp  label
1555	load_little_endian(in4, in5, out5, local3, .LLE1)  ! iv
1556
1557	addcc	in2, -8, in2              ! bytes missing when first block done
1558
1559#ifdef OPENSSL_SYSNAME_ULTRASPARC
1560	bl,pn	%icc, .ncbc.enc.seven.or.less
1561#else
1562	bl	.ncbc.enc.seven.or.less
1563#endif
1564	mov	in3, in4                  ! schedule
1565
1566.ncbc.enc.next.block:
1567
1568	load_little_endian(in0, out4, global4, local3, .LLE2)  ! block
1569
1570.ncbc.enc.next.block_1:
1571
1572	xor	in5, out4, in5            ! iv xor
1573	xor	out5, global4, out5       ! iv xor
1574
1575	! parameter 8  1 for move in3 to in4, 2 for move in4 to in3
1576	ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1577
1578.ncbc.enc.next.block_2:
1579
1580!//	call .des_enc                     ! compares in2 to 8
1581!	rounds inlined for alignment purposes
1582
1583	add	global1, 768, global4     ! address sbox 4 since register used below
1584
1585	rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption  ks in3
1586
1587#ifdef OPENSSL_SYSNAME_ULTRASPARC
1588	bl,pn	%icc, .ncbc.enc.next.block_fp
1589#else
1590	bl	.ncbc.enc.next.block_fp
1591#endif
1592	add	in0, 8, in0               ! input address
1593
1594	! If 8 or more bytes are to be encrypted after this block,
1595	! we combine final permutation for this block with initial
1596	! permutation for next block. Load next block:
1597
1598	load_little_endian(in0, global3, global4, local5, .LLE12)
1599
1600	!  parameter 1   original left
1601	!  parameter 2   original right
1602	!  parameter 3   left ip
1603	!  parameter 4   right ip
1604	!  parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
1605	!                2: mov in4 to in3
1606	!
1607	! also adds -8 to length in2 and loads loop counter to out4
1608
1609	fp_ip_macro(out0, out1, global3, global4, 2)
1610
1611	store_little_endian(in1, out0, out1, local3, .SLE10)  ! block
1612
1613	ld	[in3], out0               ! key 7531 first round next block
1614	mov 	in5, local1
1615	xor	global3, out5, in5        ! iv xor next block
1616
1617	ld	[in3+4], out1             ! key 8642
1618	add	global1, 512, global3     ! address sbox 3 since register used
1619	xor	global4, local1, out5     ! iv xor next block
1620
1621	ba	.ncbc.enc.next.block_2
1622	add	in1, 8, in1               ! output adress
1623
1624.ncbc.enc.next.block_fp:
1625
1626	fp_macro(in5, out5)
1627
1628	store_little_endian(in1, in5, out5, local3, .SLE1)  ! block
1629
1630	addcc   in2, -8, in2              ! bytes missing when next block done
1631
1632#ifdef OPENSSL_SYSNAME_ULTRASPARC
1633	bpos,pt	%icc, .ncbc.enc.next.block  ! also jumps if 0
1634#else
1635	bpos	.ncbc.enc.next.block
1636#endif
1637	add	in1, 8, in1
1638
1639.ncbc.enc.seven.or.less:
1640
1641	cmp	in2, -8
1642
1643#ifdef OPENSSL_SYSNAME_ULTRASPARC
1644	ble,pt	%icc, .ncbc.enc.finish
1645#else
1646	ble	.ncbc.enc.finish
1647#endif
1648	nop
1649
1650	add	in2, 8, local1            ! bytes to load
1651
1652	! addr, length, dest left, dest right, temp, temp2, label, ret label
1653	load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1654
1655	! Loads 1 to 7 bytes little endian to global4, out4
1656
1657
1658.ncbc.enc.finish:
1659
1660	LDPTR	IVEC, local4
1661	store_little_endian(local4, in5, out5, local5, .SLE2)  ! ivec
1662
1663	ret
1664	restore
1665
1666
1667.ncbc.dec:
1668
1669	STPTR	in0, INPUT
1670	cmp	in2, 0                    ! length
1671	add	in3, 120, in3
1672
1673	LDPTR	IVEC, local7              ! ivec
1674#ifdef OPENSSL_SYSNAME_ULTRASPARC
1675	ble,pn	%icc, .ncbc.dec.finish
1676#else
1677	ble	.ncbc.dec.finish
1678#endif
1679	mov	in3, in4                  ! schedule
1680
1681	STPTR	in1, OUTPUT
1682	mov	in0, local5               ! input
1683
1684	load_little_endian(local7, in0, in1, local3, .LLE3)   ! ivec
1685
1686.ncbc.dec.next.block:
1687
1688	load_little_endian(local5, in5, out5, local3, .LLE4)  ! block
1689
1690	! parameter 6  1/2 for include encryption/decryption
1691	! parameter 7  1 for mov in1 to in3
1692	! parameter 8  1 for mov in3 to in4
1693
1694	ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion  ks in4
1695
1696	fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1697
1698	! in2 is bytes left to be stored
1699	! in2 is compared to 8 in the rounds
1700
1701	xor	out5, in0, out4           ! iv xor
1702#ifdef OPENSSL_SYSNAME_ULTRASPARC
1703	bl,pn	%icc, .ncbc.dec.seven.or.less
1704#else
1705	bl	.ncbc.dec.seven.or.less
1706#endif
1707	xor	in5, in1, global4         ! iv xor
1708
1709	! Load ivec next block now, since input and output address might be the same.
1710
1711	load_little_endian_inc(local5, in0, in1, local3, .LLE5)  ! iv
1712
1713	store_little_endian(local7, out4, global4, local3, .SLE3)
1714
1715	STPTR	local5, INPUT
1716	add	local7, 8, local7
1717	addcc   in2, -8, in2
1718
1719#ifdef OPENSSL_SYSNAME_ULTRASPARC
1720	bg,pt	%icc, .ncbc.dec.next.block
1721#else
1722	bg	.ncbc.dec.next.block
1723#endif
1724	STPTR	local7, OUTPUT
1725
1726
1727.ncbc.dec.store.iv:
1728
1729	LDPTR	IVEC, local4              ! ivec
1730	store_little_endian(local4, in0, in1, local5, .SLE4)
1731
1732.ncbc.dec.finish:
1733
1734	ret
1735	restore
1736
1737.ncbc.dec.seven.or.less:
1738
1739	load_little_endian_inc(local5, in0, in1, local3, .LLE13)     ! ivec
1740
1741	store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1742
1743
1744.DES_ncbc_encrypt.end:
1745	.size	 DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1746
1747
1748! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
1749! **************************************************************************
1750
1751
1752	.align 32
1753	.global DES_ede3_cbc_encrypt
1754	.type	 DES_ede3_cbc_encrypt,#function
1755
1756DES_ede3_cbc_encrypt:
1757
1758	save	%sp, FRAME, %sp
1759
1760	define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1761	define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1762	define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1763
1764	call	.PIC.me.up
1765	mov	.PIC.me.up-(.-4),out0
1766
1767	LDPTR	[%fp+BIAS+ARG0+7*ARGSZ], local3          ! enc
1768	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local4          ! ivec
1769	cmp	local3, 0                 ! enc
1770
1771#ifdef OPENSSL_SYSNAME_ULTRASPARC
1772	be,pn	%icc, .ede3.dec
1773#else
1774	be	.ede3.dec
1775#endif
1776	STPTR	in4, KS2
1777
1778	STPTR	in5, KS3
1779
1780	load_little_endian(local4, in5, out5, local3, .LLE6)  ! ivec
1781
1782	addcc	in2, -8, in2              ! bytes missing after next block
1783
1784#ifdef OPENSSL_SYSNAME_ULTRASPARC
1785	bl,pn	%icc,  .ede3.enc.seven.or.less
1786#else
1787	bl	.ede3.enc.seven.or.less
1788#endif
1789	STPTR	in3, KS1
1790
1791.ede3.enc.next.block:
1792
1793	load_little_endian(in0, out4, global4, local3, .LLE7)
1794
1795.ede3.enc.next.block_1:
1796
1797	LDPTR	KS2, in4
1798	xor	in5, out4, in5            ! iv xor
1799	xor	out5, global4, out5       ! iv xor
1800
1801	LDPTR	KS1, in3
1802	add	in4, 120, in4             ! for decryption we use last subkey first
1803	nop
1804
1805	ip_macro(in5, out5, in5, out5, in3)
1806
1807.ede3.enc.next.block_2:
1808
1809	call .des_enc                     ! ks1 in3
1810	nop
1811
1812	call .des_dec                     ! ks2 in4
1813	LDPTR	KS3, in3
1814
1815	call .des_enc                     ! ks3 in3  compares in2 to 8
1816	nop
1817
1818#ifdef OPENSSL_SYSNAME_ULTRASPARC
1819	bl,pn	%icc, .ede3.enc.next.block_fp
1820#else
1821	bl	.ede3.enc.next.block_fp
1822#endif
1823	add	in0, 8, in0
1824
1825	! If 8 or more bytes are to be encrypted after this block,
1826	! we combine final permutation for this block with initial
1827	! permutation for next block. Load next block:
1828
1829	load_little_endian(in0, global3, global4, local5, .LLE11)
1830
1831	!  parameter 1   original left
1832	!  parameter 2   original right
1833	!  parameter 3   left ip
1834	!  parameter 4   right ip
1835	!  parameter 5   1: load ks1/ks2 to in3/in4, add 120 to in4
1836	!                2: mov in4 to in3
1837	!
1838	! also adds -8 to length in2 and loads loop counter to out4
1839
1840	fp_ip_macro(out0, out1, global3, global4, 1)
1841
1842	store_little_endian(in1, out0, out1, local3, .SLE9)  ! block
1843
1844	mov 	in5, local1
1845	xor	global3, out5, in5        ! iv xor next block
1846
1847	ld	[in3], out0               ! key 7531
1848	add	global1, 512, global3     ! address sbox 3
1849	xor	global4, local1, out5     ! iv xor next block
1850
1851	ld	[in3+4], out1             ! key 8642
1852	add	global1, 768, global4     ! address sbox 4
1853	ba	.ede3.enc.next.block_2
1854	add	in1, 8, in1
1855
1856.ede3.enc.next.block_fp:
1857
1858	fp_macro(in5, out5)
1859
1860	store_little_endian(in1, in5, out5, local3, .SLE5)  ! block
1861
1862	addcc   in2, -8, in2              ! bytes missing when next block done
1863
1864#ifdef OPENSSL_SYSNAME_ULTRASPARC
1865	bpos,pt	%icc, .ede3.enc.next.block
1866#else
1867	bpos	.ede3.enc.next.block
1868#endif
1869	add	in1, 8, in1
1870
1871.ede3.enc.seven.or.less:
1872
1873	cmp	in2, -8
1874
1875#ifdef OPENSSL_SYSNAME_ULTRASPARC
1876	ble,pt	%icc, .ede3.enc.finish
1877#else
1878	ble	.ede3.enc.finish
1879#endif
1880	nop
1881
1882	add	in2, 8, local1            ! bytes to load
1883
1884	! addr, length, dest left, dest right, temp, temp2, label, ret label
1885	load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1886
1887.ede3.enc.finish:
1888
1889	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local4          ! ivec
1890	store_little_endian(local4, in5, out5, local5, .SLE6)  ! ivec
1891
1892	ret
1893	restore
1894
1895.ede3.dec:
1896
1897	STPTR	in0, INPUT
1898	add	in5, 120, in5
1899
1900	STPTR	in1, OUTPUT
1901	mov	in0, local5
1902	add	in3, 120, in3
1903
1904	STPTR	in3, KS1
1905	cmp	in2, 0
1906
1907#ifdef OPENSSL_SYSNAME_ULTRASPARC
1908	ble	%icc, .ede3.dec.finish
1909#else
1910	ble	.ede3.dec.finish
1911#endif
1912	STPTR	in5, KS3
1913
1914	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local7          ! iv
1915	load_little_endian(local7, in0, in1, local3, .LLE8)
1916
1917.ede3.dec.next.block:
1918
1919	load_little_endian(local5, in5, out5, local3, .LLE9)
1920
1921	! parameter 6  1/2 for include encryption/decryption
1922	! parameter 7  1 for mov in1 to in3
1923	! parameter 8  1 for mov in3 to in4
1924	! parameter 9  1 for load ks3 and ks2 to in4 and in3
1925
1926	ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1927
1928	call .des_enc                     ! ks2 in3
1929	LDPTR	KS1, in4
1930
1931	call .des_dec                     ! ks1 in4
1932	nop
1933
1934	fp_macro(out5, in5, 0, 1)   ! 1 for input and output address local5/7
1935
1936	! in2 is bytes left to be stored
1937	! in2 is compared to 8 in the rounds
1938
1939	xor	out5, in0, out4
1940#ifdef OPENSSL_SYSNAME_ULTRASPARC
1941	bl,pn	%icc, .ede3.dec.seven.or.less
1942#else
1943	bl	.ede3.dec.seven.or.less
1944#endif
1945	xor	in5, in1, global4
1946
1947	load_little_endian_inc(local5, in0, in1, local3, .LLE10)   ! iv next block
1948
1949	store_little_endian(local7, out4, global4, local3, .SLE7)  ! block
1950
1951	STPTR	local5, INPUT
1952	addcc   in2, -8, in2
1953	add	local7, 8, local7
1954
1955#ifdef OPENSSL_SYSNAME_ULTRASPARC
1956	bg,pt	%icc, .ede3.dec.next.block
1957#else
1958	bg	.ede3.dec.next.block
1959#endif
1960	STPTR	local7, OUTPUT
1961
1962.ede3.dec.store.iv:
1963
1964	LDPTR	[%fp+BIAS+ARG0+6*ARGSZ], local4          ! ivec
1965	store_little_endian(local4, in0, in1, local5, .SLE8)  ! ivec
1966
1967.ede3.dec.finish:
1968
1969	ret
1970	restore
1971
1972.ede3.dec.seven.or.less:
1973
1974	load_little_endian_inc(local5, in0, in1, local3, .LLE14)     ! iv
1975
1976	store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1977
1978
1979.DES_ede3_cbc_encrypt.end:
1980	.size	 DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
1981