1.text
2
3.globl	cmll_t4_encrypt
4.align	32
5cmll_t4_encrypt:
6	andcc		%o0, 7, %g1		! is input aligned?
7	andn		%o0, 7, %o0
8
9	ldx		[%o2 + 0], %g4
10	ldx		[%o2 + 8], %g5
11
12	ldx		[%o0 + 0], %o4
13	bz,pt		%icc, 1f
14	ldx		[%o0 + 8], %o5
15	ldx		[%o0 + 16], %o0
16	sll		%g1, 3, %g1
17	sub		%g0, %g1, %o3
18	sllx		%o4, %g1, %o4
19	sllx		%o5, %g1, %g1
20	srlx		%o5, %o3, %o5
21	srlx		%o0, %o3, %o3
22	or		%o5, %o4, %o4
23	or		%o3, %g1, %o5
241:
25	ld		[%o2 + 272], %o3	! grandRounds, 3 or 4
26	ldd		[%o2 + 16], %f12
27	ldd		[%o2 + 24], %f14
28	xor		%g4, %o4, %o4
29	xor		%g5, %o5, %o5
30	ldd		[%o2 + 32], %f16
31	ldd		[%o2 + 40], %f18
32	.word	0x81b0230c !movxtod	%o4,%f0
33	.word	0x85b0230d !movxtod	%o5,%f2
34	ldd		[%o2 + 48], %f20
35	ldd		[%o2 + 56], %f22
36	sub		%o3, 1, %o3
37	ldd		[%o2 + 64], %f24
38	ldd		[%o2 + 72], %f26
39	add		%o2, 80, %o2
40
41.Lenc:
42	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
43	ldd		[%o2 + 0], %f12
44	sub		%o3,1,%o3
45	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
46	ldd		[%o2 + 8], %f14
47	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
48	ldd		[%o2 + 16], %f16
49	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
50	ldd		[%o2 + 24], %f18
51	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
52	ldd		[%o2 + 32], %f20
53	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
54	ldd		[%o2 + 40], %f22
55	.word	0x81b62780 !camellia_fl	%f24,%f0,%f0
56	ldd		[%o2 + 48], %f24
57	.word	0x85b6a7a2 !camellia_fli	%f26,%f2,%f2
58	ldd		[%o2 + 56], %f26
59	brnz,pt		%o3, .Lenc
60	add		%o2, 64, %o2
61
62	andcc		%o1, 7, %o4		! is output aligned?
63	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
64	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
65	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
66	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
67	.word	0x88cd0182 !camellia_f	%f20,%f2,%f0,%f4
68	.word	0x84cd8980 !camellia_f	%f22,%f0,%f4,%f2
69	.word	0x81b60d84 !fxor	%f24,%f4,%f0
70	.word	0x85b68d82 !fxor	%f26,%f2,%f2
71
72	bnz,pn		%icc, 2f
73	nop
74
75	std		%f0, [%o1 + 0]
76	retl
77	std		%f2, [%o1 + 8]
78
792:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
80	mov		0xff, %o5
81	srl		%o5, %o4, %o5
82
83	.word	0x89b00900 !faligndata	%f0,%f0,%f4
84	.word	0x8db00902 !faligndata	%f0,%f2,%f6
85	.word	0x91b08902 !faligndata	%f2,%f2,%f8
86
87	stda		%f4, [%o1 + %o5]0xc0	! partial store
88	std		%f6, [%o1 + 8]
89	add		%o1, 16, %o1
90	orn		%g0, %o5, %o5
91	retl
92	stda		%f8, [%o1 + %o5]0xc0	! partial store
93.type	cmll_t4_encrypt,#function
94.size	cmll_t4_encrypt,.-cmll_t4_encrypt
95
96.globl	cmll_t4_decrypt
97.align	32
98cmll_t4_decrypt:
99	ld		[%o2 + 272], %o3	! grandRounds, 3 or 4
100	andcc		%o0, 7, %g1		! is input aligned?
101	andn		%o0, 7, %o0
102
103	sll		%o3, 6, %o3
104	add		%o3, %o2, %o2
105
106	ldx		[%o0 + 0], %o4
107	bz,pt		%icc, 1f
108	ldx		[%o0 + 8], %o5
109	ldx		[%o0 + 16], %o0
110	sll		%g1, 3, %g1
111	sub		%g0, %g1, %g4
112	sllx		%o4, %g1, %o4
113	sllx		%o5, %g1, %g1
114	srlx		%o5, %g4, %o5
115	srlx		%o0, %g4, %g4
116	or		%o5, %o4, %o4
117	or		%g4, %g1, %o5
1181:
119	ldx		[%o2 + 0], %g4
120	ldx		[%o2 + 8], %g5
121	ldd		[%o2 - 8], %f12
122	ldd		[%o2 - 16], %f14
123	xor		%g4, %o4, %o4
124	xor		%g5, %o5, %o5
125	ldd		[%o2 - 24], %f16
126	ldd		[%o2 - 32], %f18
127	.word	0x81b0230c !movxtod	%o4,%f0
128	.word	0x85b0230d !movxtod	%o5,%f2
129	ldd		[%o2 - 40], %f20
130	ldd		[%o2 - 48], %f22
131	sub		%o3, 64, %o3
132	ldd		[%o2 - 56], %f24
133	ldd		[%o2 - 64], %f26
134	sub		%o2, 64, %o2
135
136.Ldec:
137	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
138	ldd		[%o2 - 8], %f12
139	sub		%o3, 64, %o3
140	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
141	ldd		[%o2 - 16], %f14
142	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
143	ldd		[%o2 - 24], %f16
144	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
145	ldd		[%o2 - 32], %f18
146	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
147	ldd		[%o2 - 40], %f20
148	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
149	ldd		[%o2 - 48], %f22
150	.word	0x81b62780 !camellia_fl	%f24,%f0,%f0
151	ldd		[%o2 - 56], %f24
152	.word	0x85b6a7a2 !camellia_fli	%f26,%f2,%f2
153	ldd		[%o2 - 64], %f26
154	brnz,pt		%o3, .Ldec
155	sub		%o2, 64, %o2
156
157	andcc		%o1, 7, %o4		! is output aligned?
158	.word	0x84cb0182 !camellia_f	%f12,%f2,%f0,%f2
159	.word	0x80cb8580 !camellia_f	%f14,%f0,%f2,%f0
160	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
161	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
162	.word	0x88cd0182 !camellia_f	%f20,%f2,%f0,%f4
163	.word	0x84cd8980 !camellia_f	%f22,%f0,%f4,%f2
164	.word	0x81b68d84 !fxor	%f26,%f4,%f0
165	.word	0x85b60d82 !fxor	%f24,%f2,%f2
166
167	bnz,pn		%icc, 2f
168	nop
169
170	std		%f0, [%o1 + 0]
171	retl
172	std		%f2, [%o1 + 8]
173
1742:	.word	0x93b24340 !alignaddrl	%o1,%g0,%o1
175	mov		0xff, %o5
176	srl		%o5, %o4, %o5
177
178	.word	0x89b00900 !faligndata	%f0,%f0,%f4
179	.word	0x8db00902 !faligndata	%f0,%f2,%f6
180	.word	0x91b08902 !faligndata	%f2,%f2,%f8
181
182	stda		%f4, [%o1 + %o5]0xc0	! partial store
183	std		%f6, [%o1 + 8]
184	add		%o1, 16, %o1
185	orn		%g0, %o5, %o5
186	retl
187	stda		%f8, [%o1 + %o5]0xc0	! partial store
188.type	cmll_t4_decrypt,#function
189.size	cmll_t4_decrypt,.-cmll_t4_decrypt
190.globl	cmll_t4_set_key
191.align	32
192cmll_t4_set_key:
193	and		%o0, 7, %o3
194	.word	0x91b20300 !alignaddr	%o0,%g0,%o0
195	cmp		%o1, 192
196	ldd		[%o0 + 0], %f0
197	bl,pt		%icc,.L128
198	ldd		[%o0 + 8], %f2
199
200	be,pt		%icc,.L192
201	ldd		[%o0 + 16], %f4
202
203	brz,pt		%o3, .L256aligned
204	ldd		[%o0 + 24], %f6
205
206	ldd		[%o0 + 32], %f8
207	.word	0x81b00902 !faligndata	%f0,%f2,%f0
208	.word	0x85b08904 !faligndata	%f2,%f4,%f2
209	.word	0x89b10906 !faligndata	%f4,%f6,%f4
210	b		.L256aligned
211	.word	0x8db18908 !faligndata	%f6,%f8,%f6
212
213.align	16
214.L192:
215	brz,a,pt	%o3, .L256aligned
216	.word	0x8db00cc4 !fnot2	%f0,%f4,%f6
217
218	ldd		[%o0 + 24], %f6
219	nop
220	.word	0x81b00902 !faligndata	%f0,%f2,%f0
221	.word	0x85b08904 !faligndata	%f2,%f4,%f2
222	.word	0x89b10906 !faligndata	%f4,%f6,%f4
223	.word	0x8db00cc4 !fnot2	%f0,%f4,%f6
224
225.L256aligned:
226	std		%f0, [%o2 + 0]		! k[0, 1]
227	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
228	std		%f2, [%o2 + 8]		! k[2, 3]
229	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
230	.word	0x81b10d80 !fxor	%f4,%f0,%f0
231	b		.L128key
232	.word	0x85b18d82 !fxor	%f6,%f2,%f2
233
234.align	16
235.L128:
236	brz,pt		%o3, .L128aligned
237	nop
238
239	ldd		[%o0 + 16], %f4
240	nop
241	.word	0x81b00902 !faligndata	%f0,%f2,%f0
242	.word	0x85b08904 !faligndata	%f2,%f4,%f2
243
244.L128aligned:
245	std		%f0, [%o2 + 0]		! k[0, 1]
246	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
247	std		%f2, [%o2 + 8]		! k[2, 3]
248	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
249
250.L128key:
251	mov		%o7, %o5
2521:	call		.+8
253	add		%o7, SIGMA-1b, %o4
254	mov		%o5, %o7
255
256	ldd		[%o4 + 0], %f16
257	ldd		[%o4 + 8], %f18
258	ldd		[%o4 + 16], %f20
259	ldd		[%o4 + 24], %f22
260
261	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
262	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
263	.word	0x81b70d80 !fxor	%f28,%f0,%f0
264	.word	0x85b78d82 !fxor	%f30,%f2,%f2
265	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
266	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
267
268	bge,pn		%icc, .L256key
269	nop
270	std	%f0, [%o2 + 0x10]	! k[ 4,  5]
271	std	%f2, [%o2 + 0x18]	! k[ 6,  7]
272
273	.word	0x99b02200 !movdtox	%f0,%o4
274	.word	0x9bb02202 !movdtox	%f2,%o5
275	srlx	%o4, 64-15, %g4
276	sllx	%o4, 15, %o4
277	srlx	%o5, 64-15, %g5
278	sllx	%o5, 15, %o5
279	or	%o4, %g5, %o4
280	or	%o5, %g4, %o5
281	stx	%o4, [%o2 + 0x30]	! k[12, 13]
282	stx	%o5, [%o2 + 0x38]	! k[14, 15]
283	srlx	%o4, 64-15, %g4
284	sllx	%o4, 15, %o4
285	srlx	%o5, 64-15, %g5
286	sllx	%o5, 15, %o5
287	or	%o4, %g5, %o4
288	or	%o5, %g4, %o5
289	stx	%o4, [%o2 + 0x40]	! k[16, 17]
290	stx	%o5, [%o2 + 0x48]	! k[18, 19]
291	srlx	%o4, 64-15, %g4
292	sllx	%o4, 15, %o4
293	srlx	%o5, 64-15, %g5
294	sllx	%o5, 15, %o5
295	or	%o4, %g5, %o4
296	or	%o5, %g4, %o5
297	stx	%o4, [%o2 + 0x60]	! k[24, 25]
298	srlx	%o4, 64-15, %g4
299	sllx	%o4, 15, %o4
300	srlx	%o5, 64-15, %g5
301	sllx	%o5, 15, %o5
302	or	%o4, %g5, %o4
303	or	%o5, %g4, %o5
304	stx	%o4, [%o2 + 0x70]	! k[28, 29]
305	stx	%o5, [%o2 + 0x78]	! k[30, 31]
306	srlx	%o4, 64-34, %g4
307	sllx	%o4, 34, %o4
308	srlx	%o5, 64-34, %g5
309	sllx	%o5, 34, %o5
310	or	%o4, %g5, %o4
311	or	%o5, %g4, %o5
312	stx	%o4, [%o2 + 0xa0]	! k[40, 41]
313	stx	%o5, [%o2 + 0xa8]	! k[42, 43]
314	srlx	%o4, 64-17, %g4
315	sllx	%o4, 17, %o4
316	srlx	%o5, 64-17, %g5
317	sllx	%o5, 17, %o5
318	or	%o4, %g5, %o4
319	or	%o5, %g4, %o5
320	stx	%o4, [%o2 + 0xc0]	! k[48, 49]
321	stx	%o5, [%o2 + 0xc8]	! k[50, 51]
322
323	.word	0x99b0221c !movdtox	%f28,%o4		! k[ 0,  1]
324	.word	0x9bb0221e !movdtox	%f30,%o5		! k[ 2,  3]
325	srlx	%o4, 64-15, %g4
326	sllx	%o4, 15, %o4
327	srlx	%o5, 64-15, %g5
328	sllx	%o5, 15, %o5
329	or	%o4, %g5, %o4
330	or	%o5, %g4, %o5
331	stx	%o4, [%o2 + 0x20]	! k[ 8,  9]
332	stx	%o5, [%o2 + 0x28]	! k[10, 11]
333	srlx	%o4, 64-30, %g4
334	sllx	%o4, 30, %o4
335	srlx	%o5, 64-30, %g5
336	sllx	%o5, 30, %o5
337	or	%o4, %g5, %o4
338	or	%o5, %g4, %o5
339	stx	%o4, [%o2 + 0x50]	! k[20, 21]
340	stx	%o5, [%o2 + 0x58]	! k[22, 23]
341	srlx	%o4, 64-15, %g4
342	sllx	%o4, 15, %o4
343	srlx	%o5, 64-15, %g5
344	sllx	%o5, 15, %o5
345	or	%o4, %g5, %o4
346	or	%o5, %g4, %o5
347	stx	%o5, [%o2 + 0x68]	! k[26, 27]
348	srlx	%o4, 64-17, %g4
349	sllx	%o4, 17, %o4
350	srlx	%o5, 64-17, %g5
351	sllx	%o5, 17, %o5
352	or	%o4, %g5, %o4
353	or	%o5, %g4, %o5
354	stx	%o4, [%o2 + 0x80]	! k[32, 33]
355	stx	%o5, [%o2 + 0x88]	! k[34, 35]
356	srlx	%o4, 64-17, %g4
357	sllx	%o4, 17, %o4
358	srlx	%o5, 64-17, %g5
359	sllx	%o5, 17, %o5
360	or	%o4, %g5, %o4
361	or	%o5, %g4, %o5
362	stx	%o4, [%o2 + 0x90]	! k[36, 37]
363	stx	%o5, [%o2 + 0x98]	! k[38, 39]
364	srlx	%o4, 64-17, %g4
365	sllx	%o4, 17, %o4
366	srlx	%o5, 64-17, %g5
367	sllx	%o5, 17, %o5
368	or	%o4, %g5, %o4
369	or	%o5, %g4, %o5
370	stx	%o4, [%o2 + 0xb0]	! k[44, 45]
371	stx	%o5, [%o2 + 0xb8]	! k[46, 47]
372
373	mov		3, %o3
374	st		%o3, [%o2 + 0x110]
375	retl
376	xor		%o0, %o0, %o0
377
378.align	16
379.L256key:
380	ldd		[%o4 + 32], %f24
381	ldd		[%o4 + 40], %f26
382
383	std		%f0, [%o2 + 0x30]	! k[12, 13]
384	std		%f2, [%o2 + 0x38]	! k[14, 15]
385
386	.word	0x81b10d80 !fxor	%f4,%f0,%f0
387	.word	0x85b18d82 !fxor	%f6,%f2,%f2
388	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
389	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
390
391	std	%f0, [%o2 + 0x10]	! k[ 4,  5]
392	std	%f2, [%o2 + 0x18]	! k[ 6,  7]
393
394	.word	0x99b02200 !movdtox	%f0,%o4
395	.word	0x9bb02202 !movdtox	%f2,%o5
396	srlx	%o4, 64-30, %g4
397	sllx	%o4, 30, %o4
398	srlx	%o5, 64-30, %g5
399	sllx	%o5, 30, %o5
400	or	%o4, %g5, %o4
401	or	%o5, %g4, %o5
402	stx	%o4, [%o2 + 0x50]	! k[20, 21]
403	stx	%o5, [%o2 + 0x58]	! k[22, 23]
404	srlx	%o4, 64-30, %g4
405	sllx	%o4, 30, %o4
406	srlx	%o5, 64-30, %g5
407	sllx	%o5, 30, %o5
408	or	%o4, %g5, %o4
409	or	%o5, %g4, %o5
410	stx	%o4, [%o2 + 0xa0]	! k[40, 41]
411	stx	%o5, [%o2 + 0xa8]	! k[42, 43]
412	srlx	%o4, 64-51, %g4
413	sllx	%o4, 51, %o4
414	srlx	%o5, 64-51, %g5
415	sllx	%o5, 51, %o5
416	or	%o4, %g5, %o4
417	or	%o5, %g4, %o5
418	stx	%o4, [%o2 + 0x100]	! k[64, 65]
419	stx	%o5, [%o2 + 0x108]	! k[66, 67]
420
421	.word	0x99b02204 !movdtox	%f4,%o4		! k[ 8,  9]
422	.word	0x9bb02206 !movdtox	%f6,%o5		! k[10, 11]
423	srlx	%o4, 64-15, %g4
424	sllx	%o4, 15, %o4
425	srlx	%o5, 64-15, %g5
426	sllx	%o5, 15, %o5
427	or	%o4, %g5, %o4
428	or	%o5, %g4, %o5
429	stx	%o4, [%o2 + 0x20]	! k[ 8,  9]
430	stx	%o5, [%o2 + 0x28]	! k[10, 11]
431	srlx	%o4, 64-15, %g4
432	sllx	%o4, 15, %o4
433	srlx	%o5, 64-15, %g5
434	sllx	%o5, 15, %o5
435	or	%o4, %g5, %o4
436	or	%o5, %g4, %o5
437	stx	%o4, [%o2 + 0x40]	! k[16, 17]
438	stx	%o5, [%o2 + 0x48]	! k[18, 19]
439	srlx	%o4, 64-30, %g4
440	sllx	%o4, 30, %o4
441	srlx	%o5, 64-30, %g5
442	sllx	%o5, 30, %o5
443	or	%o4, %g5, %o4
444	or	%o5, %g4, %o5
445	stx	%o4, [%o2 + 0x90]	! k[36, 37]
446	stx	%o5, [%o2 + 0x98]	! k[38, 39]
447	srlx	%o4, 64-34, %g4
448	sllx	%o4, 34, %o4
449	srlx	%o5, 64-34, %g5
450	sllx	%o5, 34, %o5
451	or	%o4, %g5, %o4
452	or	%o5, %g4, %o5
453	stx	%o4, [%o2 + 0xd0]	! k[52, 53]
454	stx	%o5, [%o2 + 0xd8]	! k[54, 55]
455	ldx	[%o2 + 0x30], %o4	! k[12, 13]
456	ldx	[%o2 + 0x38], %o5	! k[14, 15]
457	srlx	%o4, 64-15, %g4
458	sllx	%o4, 15, %o4
459	srlx	%o5, 64-15, %g5
460	sllx	%o5, 15, %o5
461	or	%o4, %g5, %o4
462	or	%o5, %g4, %o5
463	stx	%o4, [%o2 + 0x30]	! k[12, 13]
464	stx	%o5, [%o2 + 0x38]	! k[14, 15]
465	srlx	%o4, 64-30, %g4
466	sllx	%o4, 30, %o4
467	srlx	%o5, 64-30, %g5
468	sllx	%o5, 30, %o5
469	or	%o4, %g5, %o4
470	or	%o5, %g4, %o5
471	stx	%o4, [%o2 + 0x70]	! k[28, 29]
472	stx	%o5, [%o2 + 0x78]	! k[30, 31]
473	srlx	%o4, 32, %g4
474	srlx	%o5, 32, %g5
475	st	%o4, [%o2 + 0xc0]	! k[48]
476	st	%g5, [%o2 + 0xc4]	! k[49]
477	st	%o5, [%o2 + 0xc8]	! k[50]
478	st	%g4, [%o2 + 0xcc]	! k[51]
479	srlx	%o4, 64-49, %g4
480	sllx	%o4, 49, %o4
481	srlx	%o5, 64-49, %g5
482	sllx	%o5, 49, %o5
483	or	%o4, %g5, %o4
484	or	%o5, %g4, %o5
485	stx	%o4, [%o2 + 0xe0]	! k[56, 57]
486	stx	%o5, [%o2 + 0xe8]	! k[58, 59]
487
488	.word	0x99b0221c !movdtox	%f28,%o4		! k[ 0,  1]
489	.word	0x9bb0221e !movdtox	%f30,%o5		! k[ 2,  3]
490	srlx	%o4, 64-45, %g4
491	sllx	%o4, 45, %o4
492	srlx	%o5, 64-45, %g5
493	sllx	%o5, 45, %o5
494	or	%o4, %g5, %o4
495	or	%o5, %g4, %o5
496	stx	%o4, [%o2 + 0x60]	! k[24, 25]
497	stx	%o5, [%o2 + 0x68]	! k[26, 27]
498	srlx	%o4, 64-15, %g4
499	sllx	%o4, 15, %o4
500	srlx	%o5, 64-15, %g5
501	sllx	%o5, 15, %o5
502	or	%o4, %g5, %o4
503	or	%o5, %g4, %o5
504	stx	%o4, [%o2 + 0x80]	! k[32, 33]
505	stx	%o5, [%o2 + 0x88]	! k[34, 35]
506	srlx	%o4, 64-17, %g4
507	sllx	%o4, 17, %o4
508	srlx	%o5, 64-17, %g5
509	sllx	%o5, 17, %o5
510	or	%o4, %g5, %o4
511	or	%o5, %g4, %o5
512	stx	%o4, [%o2 + 0xb0]	! k[44, 45]
513	stx	%o5, [%o2 + 0xb8]	! k[46, 47]
514	srlx	%o4, 64-34, %g4
515	sllx	%o4, 34, %o4
516	srlx	%o5, 64-34, %g5
517	sllx	%o5, 34, %o5
518	or	%o4, %g5, %o4
519	or	%o5, %g4, %o5
520	stx	%o4, [%o2 + 0xf0]	! k[60, 61]
521	stx	%o5, [%o2 + 0xf8]	! k[62, 63]
522
523	mov		4, %o3
524	st		%o3, [%o2 + 0x110]
525	retl
526	xor		%o0, %o0, %o0
527.type	cmll_t4_set_key,#function
528.size	cmll_t4_set_key,.-cmll_t4_set_key
529.align	32
530SIGMA:
531	.long	0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
532	.long	0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
533	.long	0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
534.type	SIGMA,#object
535.size	SIGMA,.-SIGMA
536.asciz	"Camellia for SPARC T4, David S. Miller, Andy Polyakov"
537.align	32
538_cmll128_load_enckey:
539	ldx		[%i3 + 0], %g4
540	ldx		[%i3 + 8], %g5
541	ldd		[%i3 + 16], %f16
542	ldd		[%i3 + 24], %f18
543	ldd		[%i3 + 32], %f20
544	ldd		[%i3 + 40], %f22
545	ldd		[%i3 + 48], %f24
546	ldd		[%i3 + 56], %f26
547	ldd		[%i3 + 64], %f28
548	ldd		[%i3 + 72], %f30
549	ldd		[%i3 + 80], %f32
550	ldd		[%i3 + 88], %f34
551	ldd		[%i3 + 96], %f36
552	ldd		[%i3 + 104], %f38
553	ldd		[%i3 + 112], %f40
554	ldd		[%i3 + 120], %f42
555	ldd		[%i3 + 128], %f44
556	ldd		[%i3 + 136], %f46
557	ldd		[%i3 + 144], %f48
558	ldd		[%i3 + 152], %f50
559	ldd		[%i3 + 160], %f52
560	ldd		[%i3 + 168], %f54
561	ldd		[%i3 + 176], %f56
562	ldd		[%i3 + 184], %f58
563	ldd		[%i3 + 192], %f60
564	ldd		[%i3 + 200], %f62
565	retl
566	nop
567.type	_cmll128_load_enckey,#function
568.size	_cmll128_load_enckey,.-_cmll128_load_enckey
569_cmll256_load_enckey=_cmll128_load_enckey
570
571.align	32
572_cmll256_load_deckey:
573	ldd		[%i3 + 64], %f62
574	ldd		[%i3 + 72], %f60
575	b		.Load_deckey
576	add		%i3, 64, %i3
577_cmll128_load_deckey:
578	ldd		[%i3 + 0], %f60
579	ldd		[%i3 + 8], %f62
580.Load_deckey:
581	ldd		[%i3 + 16], %f58
582	ldd		[%i3 + 24], %f56
583	ldd		[%i3 + 32], %f54
584	ldd		[%i3 + 40], %f52
585	ldd		[%i3 + 48], %f50
586	ldd		[%i3 + 56], %f48
587	ldd		[%i3 + 64], %f46
588	ldd		[%i3 + 72], %f44
589	ldd		[%i3 + 80], %f42
590	ldd		[%i3 + 88], %f40
591	ldd		[%i3 + 96], %f38
592	ldd		[%i3 + 104], %f36
593	ldd		[%i3 + 112], %f34
594	ldd		[%i3 + 120], %f32
595	ldd		[%i3 + 128], %f30
596	ldd		[%i3 + 136], %f28
597	ldd		[%i3 + 144], %f26
598	ldd		[%i3 + 152], %f24
599	ldd		[%i3 + 160], %f22
600	ldd		[%i3 + 168], %f20
601	ldd		[%i3 + 176], %f18
602	ldd		[%i3 + 184], %f16
603	ldx		[%i3 + 192], %g4
604	retl
605	ldx		[%i3 + 200], %g5
606.type	_cmll256_load_deckey,#function
607.size	_cmll256_load_deckey,.-_cmll256_load_deckey
608
609.align	32
610_cmll128_encrypt_1x:
611	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
612	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
613	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
614	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
615	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
616	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
617	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
618	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
619	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
620	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
621	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
622	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
623	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
624	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
625	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
626	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
627	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
628	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
629	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
630	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
631	.word	0x88ce4182 !camellia_f	%f56,%f2,%f0,%f4
632	.word	0x84cec980 !camellia_f	%f58,%f0,%f4,%f2
633	.word	0x81b74d84 !fxor	%f60,%f4,%f0
634	retl
635	.word	0x85b7cd82 !fxor	%f62,%f2,%f2
636.type	_cmll128_encrypt_1x,#function
637.size	_cmll128_encrypt_1x,.-_cmll128_encrypt_1x
638_cmll128_decrypt_1x=_cmll128_encrypt_1x
639
640.align	32
641_cmll128_encrypt_2x:
642	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
643	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
644	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
645	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
646	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
647	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
648	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
649	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
650	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
651	.word	0x8cce0986 !camellia_f	%f24,%f6,%f4,%f6
652	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
653	.word	0x88ce8d84 !camellia_f	%f26,%f4,%f6,%f4
654	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
655	.word	0x89b72784 !camellia_fl	%f28,%f4,%f4
656	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
657	.word	0x8db7a7a6 !camellia_fli	%f30,%f6,%f6
658	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
659	.word	0x8cc84986 !camellia_f	%f32,%f6,%f4,%f6
660	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
661	.word	0x88c8cd84 !camellia_f	%f34,%f4,%f6,%f4
662	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
663	.word	0x8cc94986 !camellia_f	%f36,%f6,%f4,%f6
664	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
665	.word	0x88c9cd84 !camellia_f	%f38,%f4,%f6,%f4
666	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
667	.word	0x8cca4986 !camellia_f	%f40,%f6,%f4,%f6
668	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
669	.word	0x88cacd84 !camellia_f	%f42,%f4,%f6,%f4
670	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
671	.word	0x89b36784 !camellia_fl	%f44,%f4,%f4
672	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
673	.word	0x8db3e7a6 !camellia_fli	%f46,%f6,%f6
674	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
675	.word	0x8ccc4986 !camellia_f	%f48,%f6,%f4,%f6
676	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
677	.word	0x88cccd84 !camellia_f	%f50,%f4,%f6,%f4
678	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
679	.word	0x8ccd4986 !camellia_f	%f52,%f6,%f4,%f6
680	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
681	.word	0x88cdcd84 !camellia_f	%f54,%f4,%f6,%f4
682	.word	0x90ce4182 !camellia_f	%f56,%f2,%f0,%f8
683	.word	0x94ce4986 !camellia_f	%f56,%f6,%f4,%f10
684	.word	0x84ced180 !camellia_f	%f58,%f0,%f8,%f2
685	.word	0x8cced584 !camellia_f	%f58,%f4,%f10,%f6
686	.word	0x81b74d88 !fxor	%f60,%f8,%f0
687	.word	0x89b74d8a !fxor	%f60,%f10,%f4
688	.word	0x85b7cd82 !fxor	%f62,%f2,%f2
689	retl
690	.word	0x8db7cd86 !fxor	%f62,%f6,%f6
691.type	_cmll128_encrypt_2x,#function
692.size	_cmll128_encrypt_2x,.-_cmll128_encrypt_2x
693_cmll128_decrypt_2x=_cmll128_encrypt_2x
694
695.align	32
696_cmll256_encrypt_1x:
697	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
698	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
699	ldd		[%i3 + 208], %f16
700	ldd		[%i3 + 216], %f18
701	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
702	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
703	ldd		[%i3 + 224], %f20
704	ldd		[%i3 + 232], %f22
705	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
706	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
707	ldd		[%i3 + 240], %f24
708	ldd		[%i3 + 248], %f26
709	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
710	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
711	ldd		[%i3 + 256], %f28
712	ldd		[%i3 + 264], %f30
713	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
714	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
715	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
716	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
717	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
718	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
719	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
720	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
721	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
722	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
723	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
724	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
725	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
726	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
727	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
728	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
729	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
730	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
731	ldd		[%i3 + 16], %f16
732	ldd		[%i3 + 24], %f18
733	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
734	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
735	ldd		[%i3 + 32], %f20
736	ldd		[%i3 + 40], %f22
737	.word	0x88ce0182 !camellia_f	%f24,%f2,%f0,%f4
738	.word	0x84ce8980 !camellia_f	%f26,%f0,%f4,%f2
739	ldd		[%i3 + 48], %f24
740	ldd		[%i3 + 56], %f26
741	.word	0x81b70d84 !fxor	%f28,%f4,%f0
742	.word	0x85b78d82 !fxor	%f30,%f2,%f2
743	ldd		[%i3 + 64], %f28
744	retl
745	ldd		[%i3 + 72], %f30
746.type	_cmll256_encrypt_1x,#function
747.size	_cmll256_encrypt_1x,.-_cmll256_encrypt_1x
748
749.align	32
750_cmll256_encrypt_2x:
751	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
752	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
753	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
754	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
755	ldd		[%i3 + 208], %f16
756	ldd		[%i3 + 216], %f18
757	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
758	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
759	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
760	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
761	ldd		[%i3 + 224], %f20
762	ldd		[%i3 + 232], %f22
763	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
764	.word	0x8cce0986 !camellia_f	%f24,%f6,%f4,%f6
765	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
766	.word	0x88ce8d84 !camellia_f	%f26,%f4,%f6,%f4
767	ldd		[%i3 + 240], %f24
768	ldd		[%i3 + 248], %f26
769	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
770	.word	0x89b72784 !camellia_fl	%f28,%f4,%f4
771	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
772	.word	0x8db7a7a6 !camellia_fli	%f30,%f6,%f6
773	ldd		[%i3 + 256], %f28
774	ldd		[%i3 + 264], %f30
775	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
776	.word	0x8cc84986 !camellia_f	%f32,%f6,%f4,%f6
777	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
778	.word	0x88c8cd84 !camellia_f	%f34,%f4,%f6,%f4
779	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
780	.word	0x8cc94986 !camellia_f	%f36,%f6,%f4,%f6
781	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
782	.word	0x88c9cd84 !camellia_f	%f38,%f4,%f6,%f4
783	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
784	.word	0x8cca4986 !camellia_f	%f40,%f6,%f4,%f6
785	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
786	.word	0x88cacd84 !camellia_f	%f42,%f4,%f6,%f4
787	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
788	.word	0x89b36784 !camellia_fl	%f44,%f4,%f4
789	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
790	.word	0x8db3e7a6 !camellia_fli	%f46,%f6,%f6
791	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
792	.word	0x8ccc4986 !camellia_f	%f48,%f6,%f4,%f6
793	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
794	.word	0x88cccd84 !camellia_f	%f50,%f4,%f6,%f4
795	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
796	.word	0x8ccd4986 !camellia_f	%f52,%f6,%f4,%f6
797	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
798	.word	0x88cdcd84 !camellia_f	%f54,%f4,%f6,%f4
799	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
800	.word	0x8cce4986 !camellia_f	%f56,%f6,%f4,%f6
801	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
802	.word	0x88cecd84 !camellia_f	%f58,%f4,%f6,%f4
803	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
804	.word	0x89b76784 !camellia_fl	%f60,%f4,%f4
805	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
806	.word	0x8db7e7a6 !camellia_fli	%f62,%f6,%f6
807	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
808	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
809	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
810	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
811	ldd		[%i3 + 16], %f16
812	ldd		[%i3 + 24], %f18
813	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
814	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
815	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
816	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
817	ldd		[%i3 + 32], %f20
818	ldd		[%i3 + 40], %f22
819	.word	0x90ce0182 !camellia_f	%f24,%f2,%f0,%f8
820	.word	0x94ce0986 !camellia_f	%f24,%f6,%f4,%f10
821	.word	0x84ce9180 !camellia_f	%f26,%f0,%f8,%f2
822	.word	0x8cce9584 !camellia_f	%f26,%f4,%f10,%f6
823	ldd		[%i3 + 48], %f24
824	ldd		[%i3 + 56], %f26
825	.word	0x81b70d88 !fxor	%f28,%f8,%f0
826	.word	0x89b70d8a !fxor	%f28,%f10,%f4
827	.word	0x85b78d82 !fxor	%f30,%f2,%f2
828	.word	0x8db78d86 !fxor	%f30,%f6,%f6
829	ldd		[%i3 + 64], %f28
830	retl
831	ldd		[%i3 + 72], %f30
832.type	_cmll256_encrypt_2x,#function
833.size	_cmll256_encrypt_2x,.-_cmll256_encrypt_2x
834
835.align	32
836_cmll256_decrypt_1x:
837	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
838	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
839	ldd		[%i3 - 8], %f16
840	ldd		[%i3 - 16], %f18
841	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
842	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
843	ldd		[%i3 - 24], %f20
844	ldd		[%i3 - 32], %f22
845	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
846	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
847	ldd		[%i3 - 40], %f24
848	ldd		[%i3 - 48], %f26
849	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
850	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
851	ldd		[%i3 - 56], %f28
852	ldd		[%i3 - 64], %f30
853	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
854	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
855	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
856	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
857	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
858	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
859	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
860	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
861	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
862	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
863	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
864	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
865	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
866	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
867	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
868	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
869	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
870	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
871	ldd		[%i3 + 184], %f16
872	ldd		[%i3 + 176], %f18
873	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
874	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
875	ldd		[%i3 + 168], %f20
876	ldd		[%i3 + 160], %f22
877	.word	0x88ce0182 !camellia_f	%f24,%f2,%f0,%f4
878	.word	0x84ce8980 !camellia_f	%f26,%f0,%f4,%f2
879	ldd		[%i3 + 152], %f24
880	ldd		[%i3 + 144], %f26
881	.word	0x81b78d84 !fxor	%f30,%f4,%f0
882	.word	0x85b70d82 !fxor	%f28,%f2,%f2
883	ldd		[%i3 + 136], %f28
884	retl
885	ldd		[%i3 + 128], %f30
886.type	_cmll256_decrypt_1x,#function
887.size	_cmll256_decrypt_1x,.-_cmll256_decrypt_1x
888
889.align	32
890_cmll256_decrypt_2x:
891	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
892	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
893	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
894	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
895	ldd		[%i3 - 8], %f16
896	ldd		[%i3 - 16], %f18
897	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
898	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
899	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
900	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
901	ldd		[%i3 - 24], %f20
902	ldd		[%i3 - 32], %f22
903	.word	0x84ce0182 !camellia_f	%f24,%f2,%f0,%f2
904	.word	0x8cce0986 !camellia_f	%f24,%f6,%f4,%f6
905	.word	0x80ce8580 !camellia_f	%f26,%f0,%f2,%f0
906	.word	0x88ce8d84 !camellia_f	%f26,%f4,%f6,%f4
907	ldd		[%i3 - 40], %f24
908	ldd		[%i3 - 48], %f26
909	.word	0x81b72780 !camellia_fl	%f28,%f0,%f0
910	.word	0x89b72784 !camellia_fl	%f28,%f4,%f4
911	.word	0x85b7a7a2 !camellia_fli	%f30,%f2,%f2
912	.word	0x8db7a7a6 !camellia_fli	%f30,%f6,%f6
913	ldd		[%i3 - 56], %f28
914	ldd		[%i3 - 64], %f30
915	.word	0x84c84182 !camellia_f	%f32,%f2,%f0,%f2
916	.word	0x8cc84986 !camellia_f	%f32,%f6,%f4,%f6
917	.word	0x80c8c580 !camellia_f	%f34,%f0,%f2,%f0
918	.word	0x88c8cd84 !camellia_f	%f34,%f4,%f6,%f4
919	.word	0x84c94182 !camellia_f	%f36,%f2,%f0,%f2
920	.word	0x8cc94986 !camellia_f	%f36,%f6,%f4,%f6
921	.word	0x80c9c580 !camellia_f	%f38,%f0,%f2,%f0
922	.word	0x88c9cd84 !camellia_f	%f38,%f4,%f6,%f4
923	.word	0x84ca4182 !camellia_f	%f40,%f2,%f0,%f2
924	.word	0x8cca4986 !camellia_f	%f40,%f6,%f4,%f6
925	.word	0x80cac580 !camellia_f	%f42,%f0,%f2,%f0
926	.word	0x88cacd84 !camellia_f	%f42,%f4,%f6,%f4
927	.word	0x81b36780 !camellia_fl	%f44,%f0,%f0
928	.word	0x89b36784 !camellia_fl	%f44,%f4,%f4
929	.word	0x85b3e7a2 !camellia_fli	%f46,%f2,%f2
930	.word	0x8db3e7a6 !camellia_fli	%f46,%f6,%f6
931	.word	0x84cc4182 !camellia_f	%f48,%f2,%f0,%f2
932	.word	0x8ccc4986 !camellia_f	%f48,%f6,%f4,%f6
933	.word	0x80ccc580 !camellia_f	%f50,%f0,%f2,%f0
934	.word	0x88cccd84 !camellia_f	%f50,%f4,%f6,%f4
935	.word	0x84cd4182 !camellia_f	%f52,%f2,%f0,%f2
936	.word	0x8ccd4986 !camellia_f	%f52,%f6,%f4,%f6
937	.word	0x80cdc580 !camellia_f	%f54,%f0,%f2,%f0
938	.word	0x88cdcd84 !camellia_f	%f54,%f4,%f6,%f4
939	.word	0x84ce4182 !camellia_f	%f56,%f2,%f0,%f2
940	.word	0x8cce4986 !camellia_f	%f56,%f6,%f4,%f6
941	.word	0x80cec580 !camellia_f	%f58,%f0,%f2,%f0
942	.word	0x88cecd84 !camellia_f	%f58,%f4,%f6,%f4
943	.word	0x81b76780 !camellia_fl	%f60,%f0,%f0
944	.word	0x89b76784 !camellia_fl	%f60,%f4,%f4
945	.word	0x85b7e7a2 !camellia_fli	%f62,%f2,%f2
946	.word	0x8db7e7a6 !camellia_fli	%f62,%f6,%f6
947	.word	0x84cc0182 !camellia_f	%f16,%f2,%f0,%f2
948	.word	0x8ccc0986 !camellia_f	%f16,%f6,%f4,%f6
949	.word	0x80cc8580 !camellia_f	%f18,%f0,%f2,%f0
950	.word	0x88cc8d84 !camellia_f	%f18,%f4,%f6,%f4
951	ldd		[%i3 + 184], %f16
952	ldd		[%i3 + 176], %f18
953	.word	0x84cd0182 !camellia_f	%f20,%f2,%f0,%f2
954	.word	0x8ccd0986 !camellia_f	%f20,%f6,%f4,%f6
955	.word	0x80cd8580 !camellia_f	%f22,%f0,%f2,%f0
956	.word	0x88cd8d84 !camellia_f	%f22,%f4,%f6,%f4
957	ldd		[%i3 + 168], %f20
958	ldd		[%i3 + 160], %f22
959	.word	0x90ce0182 !camellia_f	%f24,%f2,%f0,%f8
960	.word	0x94ce0986 !camellia_f	%f24,%f6,%f4,%f10
961	.word	0x84ce9180 !camellia_f	%f26,%f0,%f8,%f2
962	.word	0x8cce9584 !camellia_f	%f26,%f4,%f10,%f6
963	ldd		[%i3 + 152], %f24
964	ldd		[%i3 + 144], %f26
965	.word	0x81b78d88 !fxor	%f30,%f8,%f0
966	.word	0x89b78d8a !fxor	%f30,%f10,%f4
967	.word	0x85b70d82 !fxor	%f28,%f2,%f2
968	.word	0x8db70d86 !fxor	%f28,%f6,%f6
969	ldd		[%i3 + 136], %f28
970	retl
971	ldd		[%i3 + 128], %f30
972.type	_cmll256_decrypt_2x,#function
973.size	_cmll256_decrypt_2x,.-_cmll256_decrypt_2x
974.globl	cmll128_t4_cbc_encrypt
975.align	32
976cmll128_t4_cbc_encrypt:
977	save		%sp, -192, %sp
978	cmp		%i2, 0
979	be,pn		%xcc, .L128_cbc_enc_abort
980	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
981	sub		%i0, %i1, %l5	! %i0!=%i1
982	ld		[%i4 + 0], %f0
983	ld		[%i4 + 4], %f1
984	ld		[%i4 + 8], %f2
985	ld		[%i4 + 12], %f3
986	prefetch	[%i0], 20
987	prefetch	[%i0 + 63], 20
988	call		_cmll128_load_enckey
989	and		%i0, 7, %l0
990	andn		%i0, 7, %i0
991	sll		%l0, 3, %l0
992	mov		64, %l1
993	mov		0xff, %l3
994	sub		%l1, %l0, %l1
995	and		%i1, 7, %l2
996	cmp		%i2, 127
997	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
998	movleu		%xcc, 0, %l5	!	%i2<128 ||
999	brnz,pn		%l5, .L128cbc_enc_blk	!	%i0==%i1)
1000	srl		%l3, %l2, %l3
1001
1002	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1003	srlx		%i2, 4, %i2
1004	prefetch	[%i1], 22
1005
1006.L128_cbc_enc_loop:
1007	ldx		[%i0 + 0], %o0
1008	brz,pt		%l0, 4f
1009	ldx		[%i0 + 8], %o1
1010
1011	ldx		[%i0 + 16], %o2
1012	sllx		%o0, %l0, %o0
1013	srlx		%o1, %l1, %g1
1014	sllx		%o1, %l0, %o1
1015	or		%g1, %o0, %o0
1016	srlx		%o2, %l1, %o2
1017	or		%o2, %o1, %o1
10184:
1019	xor		%g4, %o0, %o0		! ^= rk[0]
1020	xor		%g5, %o1, %o1
1021	.word	0x99b02308 !movxtod	%o0,%f12
1022	.word	0x9db02309 !movxtod	%o1,%f14
1023
1024	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1025	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1026	prefetch	[%i1 + 63], 22
1027	prefetch	[%i0 + 16+63], 20
1028	call		_cmll128_encrypt_1x
1029	add		%i0, 16, %i0
1030
1031	brnz,pn		%l2, 2f
1032	sub		%i2, 1, %i2
1033
1034	std		%f0, [%i1 + 0]
1035	std		%f2, [%i1 + 8]
1036	brnz,pt		%i2, .L128_cbc_enc_loop
1037	add		%i1, 16, %i1
1038	st		%f0, [%i4 + 0]
1039	st		%f1, [%i4 + 4]
1040	st		%f2, [%i4 + 8]
1041	st		%f3, [%i4 + 12]
1042.L128_cbc_enc_abort:
1043	ret
1044	restore
1045
1046.align	16
10472:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1048						! and ~3x deterioration
1049						! in inp==out case
1050	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1051	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1052	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1053
1054	stda		%f4, [%i1 + %l3]0xc0	! partial store
1055	std		%f6, [%i1 + 8]
1056	add		%i1, 16, %i1
1057	orn		%g0, %l3, %l3
1058	stda		%f8, [%i1 + %l3]0xc0	! partial store
1059
1060	brnz,pt		%i2, .L128_cbc_enc_loop+4
1061	orn		%g0, %l3, %l3
1062	st		%f0, [%i4 + 0]
1063	st		%f1, [%i4 + 4]
1064	st		%f2, [%i4 + 8]
1065	st		%f3, [%i4 + 12]
1066	ret
1067	restore
1068
1069!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1070.align	32
1071.L128cbc_enc_blk:
1072	add	%i1, %i2, %l5
1073	and	%l5, 63, %l5	! tail
1074	sub	%i2, %l5, %i2
1075	add	%l5, 15, %l5	! round up to 16n
1076	srlx	%i2, 4, %i2
1077	srl	%l5, 4, %l5
1078
1079.L128_cbc_enc_blk_loop:
1080	ldx		[%i0 + 0], %o0
1081	brz,pt		%l0, 5f
1082	ldx		[%i0 + 8], %o1
1083
1084	ldx		[%i0 + 16], %o2
1085	sllx		%o0, %l0, %o0
1086	srlx		%o1, %l1, %g1
1087	sllx		%o1, %l0, %o1
1088	or		%g1, %o0, %o0
1089	srlx		%o2, %l1, %o2
1090	or		%o2, %o1, %o1
10915:
1092	xor		%g4, %o0, %o0		! ^= rk[0]
1093	xor		%g5, %o1, %o1
1094	.word	0x99b02308 !movxtod	%o0,%f12
1095	.word	0x9db02309 !movxtod	%o1,%f14
1096
1097	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1098	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1099	prefetch	[%i0 + 16+63], 20
1100	call		_cmll128_encrypt_1x
1101	add		%i0, 16, %i0
1102	sub		%i2, 1, %i2
1103
1104	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1105	add		%i1, 8, %i1
1106	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1107	brnz,pt		%i2, .L128_cbc_enc_blk_loop
1108	add		%i1, 8, %i1
1109
1110	membar		#StoreLoad|#StoreStore
1111	brnz,pt		%l5, .L128_cbc_enc_loop
1112	mov		%l5, %i2
1113	st		%f0, [%i4 + 0]
1114	st		%f1, [%i4 + 4]
1115	st		%f2, [%i4 + 8]
1116	st		%f3, [%i4 + 12]
1117	ret
1118	restore
1119.type	cmll128_t4_cbc_encrypt,#function
1120.size	cmll128_t4_cbc_encrypt,.-cmll128_t4_cbc_encrypt
1121.globl	cmll256_t4_cbc_encrypt
1122.align	32
1123cmll256_t4_cbc_encrypt:
1124	save		%sp, -192, %sp
1125	cmp		%i2, 0
1126	be,pn		%xcc, .L256_cbc_enc_abort
1127	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1128	sub		%i0, %i1, %l5	! %i0!=%i1
1129	ld		[%i4 + 0], %f0
1130	ld		[%i4 + 4], %f1
1131	ld		[%i4 + 8], %f2
1132	ld		[%i4 + 12], %f3
1133	prefetch	[%i0], 20
1134	prefetch	[%i0 + 63], 20
1135	call		_cmll256_load_enckey
1136	and		%i0, 7, %l0
1137	andn		%i0, 7, %i0
1138	sll		%l0, 3, %l0
1139	mov		64, %l1
1140	mov		0xff, %l3
1141	sub		%l1, %l0, %l1
1142	and		%i1, 7, %l2
1143	cmp		%i2, 127
1144	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1145	movleu		%xcc, 0, %l5	!	%i2<128 ||
1146	brnz,pn		%l5, .L256cbc_enc_blk	!	%i0==%i1)
1147	srl		%l3, %l2, %l3
1148
1149	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1150	srlx		%i2, 4, %i2
1151	prefetch	[%i1], 22
1152
1153.L256_cbc_enc_loop:
1154	ldx		[%i0 + 0], %o0
1155	brz,pt		%l0, 4f
1156	ldx		[%i0 + 8], %o1
1157
1158	ldx		[%i0 + 16], %o2
1159	sllx		%o0, %l0, %o0
1160	srlx		%o1, %l1, %g1
1161	sllx		%o1, %l0, %o1
1162	or		%g1, %o0, %o0
1163	srlx		%o2, %l1, %o2
1164	or		%o2, %o1, %o1
11654:
1166	xor		%g4, %o0, %o0		! ^= rk[0]
1167	xor		%g5, %o1, %o1
1168	.word	0x99b02308 !movxtod	%o0,%f12
1169	.word	0x9db02309 !movxtod	%o1,%f14
1170
1171	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1172	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1173	prefetch	[%i1 + 63], 22
1174	prefetch	[%i0 + 16+63], 20
1175	call		_cmll256_encrypt_1x
1176	add		%i0, 16, %i0
1177
1178	brnz,pn		%l2, 2f
1179	sub		%i2, 1, %i2
1180
1181	std		%f0, [%i1 + 0]
1182	std		%f2, [%i1 + 8]
1183	brnz,pt		%i2, .L256_cbc_enc_loop
1184	add		%i1, 16, %i1
1185	st		%f0, [%i4 + 0]
1186	st		%f1, [%i4 + 4]
1187	st		%f2, [%i4 + 8]
1188	st		%f3, [%i4 + 12]
1189.L256_cbc_enc_abort:
1190	ret
1191	restore
1192
1193.align	16
11942:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1195						! and ~3x deterioration
1196						! in inp==out case
1197	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1198	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1199	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1200
1201	stda		%f4, [%i1 + %l3]0xc0	! partial store
1202	std		%f6, [%i1 + 8]
1203	add		%i1, 16, %i1
1204	orn		%g0, %l3, %l3
1205	stda		%f8, [%i1 + %l3]0xc0	! partial store
1206
1207	brnz,pt		%i2, .L256_cbc_enc_loop+4
1208	orn		%g0, %l3, %l3
1209	st		%f0, [%i4 + 0]
1210	st		%f1, [%i4 + 4]
1211	st		%f2, [%i4 + 8]
1212	st		%f3, [%i4 + 12]
1213	ret
1214	restore
1215
1216!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1217.align	32
1218.L256cbc_enc_blk:
1219	add	%i1, %i2, %l5
1220	and	%l5, 63, %l5	! tail
1221	sub	%i2, %l5, %i2
1222	add	%l5, 15, %l5	! round up to 16n
1223	srlx	%i2, 4, %i2
1224	srl	%l5, 4, %l5
1225
1226.L256_cbc_enc_blk_loop:
1227	ldx		[%i0 + 0], %o0
1228	brz,pt		%l0, 5f
1229	ldx		[%i0 + 8], %o1
1230
1231	ldx		[%i0 + 16], %o2
1232	sllx		%o0, %l0, %o0
1233	srlx		%o1, %l1, %g1
1234	sllx		%o1, %l0, %o1
1235	or		%g1, %o0, %o0
1236	srlx		%o2, %l1, %o2
1237	or		%o2, %o1, %o1
12385:
1239	xor		%g4, %o0, %o0		! ^= rk[0]
1240	xor		%g5, %o1, %o1
1241	.word	0x99b02308 !movxtod	%o0,%f12
1242	.word	0x9db02309 !movxtod	%o1,%f14
1243
1244	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1245	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1246	prefetch	[%i0 + 16+63], 20
1247	call		_cmll256_encrypt_1x
1248	add		%i0, 16, %i0
1249	sub		%i2, 1, %i2
1250
1251	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1252	add		%i1, 8, %i1
1253	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1254	brnz,pt		%i2, .L256_cbc_enc_blk_loop
1255	add		%i1, 8, %i1
1256
1257	membar		#StoreLoad|#StoreStore
1258	brnz,pt		%l5, .L256_cbc_enc_loop
1259	mov		%l5, %i2
1260	st		%f0, [%i4 + 0]
1261	st		%f1, [%i4 + 4]
1262	st		%f2, [%i4 + 8]
1263	st		%f3, [%i4 + 12]
1264	ret
1265	restore
1266.type	cmll256_t4_cbc_encrypt,#function
1267.size	cmll256_t4_cbc_encrypt,.-cmll256_t4_cbc_encrypt
1268.globl	cmll128_t4_cbc_decrypt
1269.align	32
1270cmll128_t4_cbc_decrypt:
1271	save		%sp, -192, %sp
1272	cmp		%i2, 0
1273	be,pn		%xcc, .L128_cbc_dec_abort
1274	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1275	sub		%i0, %i1, %l5	! %i0!=%i1
1276	ld		[%i4 + 0], %f12	! load ivec
1277	ld		[%i4 + 4], %f13
1278	ld		[%i4 + 8], %f14
1279	ld		[%i4 + 12], %f15
1280	prefetch	[%i0], 20
1281	prefetch	[%i0 + 63], 20
1282	call		_cmll128_load_deckey
1283	and		%i0, 7, %l0
1284	andn		%i0, 7, %i0
1285	sll		%l0, 3, %l0
1286	mov		64, %l1
1287	mov		0xff, %l3
1288	sub		%l1, %l0, %l1
1289	and		%i1, 7, %l2
1290	cmp		%i2, 255
1291	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1292	movleu		%xcc, 0, %l5	!	%i2<256 ||
1293	brnz,pn		%l5, .L128cbc_dec_blk	!	%i0==%i1)
1294	srl		%l3, %l2, %l3
1295
1296	andcc		%i2, 16, %g0		! is number of blocks even?
1297	srlx		%i2, 4, %i2
1298	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1299	bz		%icc, .L128_cbc_dec_loop2x
1300	prefetch	[%i1], 22
1301.L128_cbc_dec_loop:
1302	ldx		[%i0 + 0], %o0
1303	brz,pt		%l0, 4f
1304	ldx		[%i0 + 8], %o1
1305
1306	ldx		[%i0 + 16], %o2
1307	sllx		%o0, %l0, %o0
1308	srlx		%o1, %l1, %g1
1309	sllx		%o1, %l0, %o1
1310	or		%g1, %o0, %o0
1311	srlx		%o2, %l1, %o2
1312	or		%o2, %o1, %o1
13134:
1314	xor		%g4, %o0, %o2		! ^= rk[0]
1315	xor		%g5, %o1, %o3
1316	.word	0x81b0230a !movxtod	%o2,%f0
1317	.word	0x85b0230b !movxtod	%o3,%f2
1318
1319	prefetch	[%i1 + 63], 22
1320	prefetch	[%i0 + 16+63], 20
1321	call		_cmll128_decrypt_1x
1322	add		%i0, 16, %i0
1323
1324	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1325	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1326	.word	0x99b02308 !movxtod	%o0,%f12
1327	.word	0x9db02309 !movxtod	%o1,%f14
1328
1329	brnz,pn		%l2, 2f
1330	sub		%i2, 1, %i2
1331
1332	std		%f0, [%i1 + 0]
1333	std		%f2, [%i1 + 8]
1334	brnz,pt		%i2, .L128_cbc_dec_loop2x
1335	add		%i1, 16, %i1
1336	st		%f12, [%i4 + 0]
1337	st		%f13, [%i4 + 4]
1338	st		%f14, [%i4 + 8]
1339	st		%f15, [%i4 + 12]
1340.L128_cbc_dec_abort:
1341	ret
1342	restore
1343
1344.align	16
13452:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1346						! and ~3x deterioration
1347						! in inp==out case
1348	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1349	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1350	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1351
1352	stda		%f4, [%i1 + %l3]0xc0	! partial store
1353	std		%f6, [%i1 + 8]
1354	add		%i1, 16, %i1
1355	orn		%g0, %l3, %l3
1356	stda		%f8, [%i1 + %l3]0xc0	! partial store
1357
1358	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1359	orn		%g0, %l3, %l3
1360	st		%f12, [%i4 + 0]
1361	st		%f13, [%i4 + 4]
1362	st		%f14, [%i4 + 8]
1363	st		%f15, [%i4 + 12]
1364	ret
1365	restore
1366
1367!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1368.align	32
1369.L128_cbc_dec_loop2x:
1370	ldx		[%i0 + 0], %o0
1371	ldx		[%i0 + 8], %o1
1372	ldx		[%i0 + 16], %o2
1373	brz,pt		%l0, 4f
1374	ldx		[%i0 + 24], %o3
1375
1376	ldx		[%i0 + 32], %o4
1377	sllx		%o0, %l0, %o0
1378	srlx		%o1, %l1, %g1
1379	or		%g1, %o0, %o0
1380	sllx		%o1, %l0, %o1
1381	srlx		%o2, %l1, %g1
1382	or		%g1, %o1, %o1
1383	sllx		%o2, %l0, %o2
1384	srlx		%o3, %l1, %g1
1385	or		%g1, %o2, %o2
1386	sllx		%o3, %l0, %o3
1387	srlx		%o4, %l1, %o4
1388	or		%o4, %o3, %o3
13894:
1390	xor		%g4, %o0, %o4		! ^= rk[0]
1391	xor		%g5, %o1, %o5
1392	.word	0x81b0230c !movxtod	%o4,%f0
1393	.word	0x85b0230d !movxtod	%o5,%f2
1394	xor		%g4, %o2, %o4
1395	xor		%g5, %o3, %o5
1396	.word	0x89b0230c !movxtod	%o4,%f4
1397	.word	0x8db0230d !movxtod	%o5,%f6
1398
1399	prefetch	[%i1 + 63], 22
1400	prefetch	[%i0 + 32+63], 20
1401	call		_cmll128_decrypt_2x
1402	add		%i0, 32, %i0
1403
1404	.word	0x91b02308 !movxtod	%o0,%f8
1405	.word	0x95b02309 !movxtod	%o1,%f10
1406	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1407	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1408	.word	0x99b0230a !movxtod	%o2,%f12
1409	.word	0x9db0230b !movxtod	%o3,%f14
1410	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1411	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1412
1413	brnz,pn		%l2, 2f
1414	sub		%i2, 2, %i2
1415
1416	std		%f0, [%i1 + 0]
1417	std		%f2, [%i1 + 8]
1418	std		%f4, [%i1 + 16]
1419	std		%f6, [%i1 + 24]
1420	brnz,pt		%i2, .L128_cbc_dec_loop2x
1421	add		%i1, 32, %i1
1422	st		%f12, [%i4 + 0]
1423	st		%f13, [%i4 + 4]
1424	st		%f14, [%i4 + 8]
1425	st		%f15, [%i4 + 12]
1426	ret
1427	restore
1428
1429.align	16
14302:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1431						! and ~3x deterioration
1432						! in inp==out case
1433	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1434	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1435	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1436	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1437	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1438	stda		%f8, [%i1 + %l3]0xc0	! partial store
1439	std		%f0, [%i1 + 8]
1440	std		%f2, [%i1 + 16]
1441	std		%f4, [%i1 + 24]
1442	add		%i1, 32, %i1
1443	orn		%g0, %l3, %l3
1444	stda		%f6, [%i1 + %l3]0xc0	! partial store
1445
1446	brnz,pt		%i2, .L128_cbc_dec_loop2x+4
1447	orn		%g0, %l3, %l3
1448	st		%f12, [%i4 + 0]
1449	st		%f13, [%i4 + 4]
1450	st		%f14, [%i4 + 8]
1451	st		%f15, [%i4 + 12]
1452	ret
1453	restore
1454
1455!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1456.align	32
1457.L128cbc_dec_blk:
1458	add	%i1, %i2, %l5
1459	and	%l5, 63, %l5	! tail
1460	sub	%i2, %l5, %i2
1461	add	%l5, 15, %l5	! round up to 16n
1462	srlx	%i2, 4, %i2
1463	srl	%l5, 4, %l5
1464	sub	%i2, 1, %i2
1465	add	%l5, 1, %l5
1466
1467.L128_cbc_dec_blk_loop2x:
1468	ldx		[%i0 + 0], %o0
1469	ldx		[%i0 + 8], %o1
1470	ldx		[%i0 + 16], %o2
1471	brz,pt		%l0, 5f
1472	ldx		[%i0 + 24], %o3
1473
1474	ldx		[%i0 + 32], %o4
1475	sllx		%o0, %l0, %o0
1476	srlx		%o1, %l1, %g1
1477	or		%g1, %o0, %o0
1478	sllx		%o1, %l0, %o1
1479	srlx		%o2, %l1, %g1
1480	or		%g1, %o1, %o1
1481	sllx		%o2, %l0, %o2
1482	srlx		%o3, %l1, %g1
1483	or		%g1, %o2, %o2
1484	sllx		%o3, %l0, %o3
1485	srlx		%o4, %l1, %o4
1486	or		%o4, %o3, %o3
14875:
1488	xor		%g4, %o0, %o4		! ^= rk[0]
1489	xor		%g5, %o1, %o5
1490	.word	0x81b0230c !movxtod	%o4,%f0
1491	.word	0x85b0230d !movxtod	%o5,%f2
1492	xor		%g4, %o2, %o4
1493	xor		%g5, %o3, %o5
1494	.word	0x89b0230c !movxtod	%o4,%f4
1495	.word	0x8db0230d !movxtod	%o5,%f6
1496
1497	prefetch	[%i0 + 32+63], 20
1498	call		_cmll128_decrypt_2x
1499	add		%i0, 32, %i0
1500	subcc		%i2, 2, %i2
1501
1502	.word	0x91b02308 !movxtod	%o0,%f8
1503	.word	0x95b02309 !movxtod	%o1,%f10
1504	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1505	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1506	.word	0x99b0230a !movxtod	%o2,%f12
1507	.word	0x9db0230b !movxtod	%o3,%f14
1508	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1509	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1510
1511	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1512	add		%i1, 8, %i1
1513	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1514	add		%i1, 8, %i1
1515	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1516	add		%i1, 8, %i1
1517	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1518	bgu,pt		%xcc, .L128_cbc_dec_blk_loop2x
1519	add		%i1, 8, %i1
1520
1521	add		%l5, %i2, %i2
1522	andcc		%i2, 1, %g0		! is number of blocks even?
1523	membar		#StoreLoad|#StoreStore
1524	bnz,pt		%icc, .L128_cbc_dec_loop
1525	srl		%i2, 0, %i2
1526	brnz,pn		%i2, .L128_cbc_dec_loop2x
1527	nop
1528	st		%f12, [%i4 + 0]	! write out ivec
1529	st		%f13, [%i4 + 4]
1530	st		%f14, [%i4 + 8]
1531	st		%f15, [%i4 + 12]
1532	ret
1533	restore
1534.type	cmll128_t4_cbc_decrypt,#function
1535.size	cmll128_t4_cbc_decrypt,.-cmll128_t4_cbc_decrypt
1536.globl	cmll256_t4_cbc_decrypt
1537.align	32
1538cmll256_t4_cbc_decrypt:
1539	save		%sp, -192, %sp
1540	cmp		%i2, 0
1541	be,pn		%xcc, .L256_cbc_dec_abort
1542	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1543	sub		%i0, %i1, %l5	! %i0!=%i1
1544	ld		[%i4 + 0], %f12	! load ivec
1545	ld		[%i4 + 4], %f13
1546	ld		[%i4 + 8], %f14
1547	ld		[%i4 + 12], %f15
1548	prefetch	[%i0], 20
1549	prefetch	[%i0 + 63], 20
1550	call		_cmll256_load_deckey
1551	and		%i0, 7, %l0
1552	andn		%i0, 7, %i0
1553	sll		%l0, 3, %l0
1554	mov		64, %l1
1555	mov		0xff, %l3
1556	sub		%l1, %l0, %l1
1557	and		%i1, 7, %l2
1558	cmp		%i2, 255
1559	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1560	movleu		%xcc, 0, %l5	!	%i2<256 ||
1561	brnz,pn		%l5, .L256cbc_dec_blk	!	%i0==%i1)
1562	srl		%l3, %l2, %l3
1563
1564	andcc		%i2, 16, %g0		! is number of blocks even?
1565	srlx		%i2, 4, %i2
1566	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1567	bz		%icc, .L256_cbc_dec_loop2x
1568	prefetch	[%i1], 22
1569.L256_cbc_dec_loop:
1570	ldx		[%i0 + 0], %o0
1571	brz,pt		%l0, 4f
1572	ldx		[%i0 + 8], %o1
1573
1574	ldx		[%i0 + 16], %o2
1575	sllx		%o0, %l0, %o0
1576	srlx		%o1, %l1, %g1
1577	sllx		%o1, %l0, %o1
1578	or		%g1, %o0, %o0
1579	srlx		%o2, %l1, %o2
1580	or		%o2, %o1, %o1
15814:
1582	xor		%g4, %o0, %o2		! ^= rk[0]
1583	xor		%g5, %o1, %o3
1584	.word	0x81b0230a !movxtod	%o2,%f0
1585	.word	0x85b0230b !movxtod	%o3,%f2
1586
1587	prefetch	[%i1 + 63], 22
1588	prefetch	[%i0 + 16+63], 20
1589	call		_cmll256_decrypt_1x
1590	add		%i0, 16, %i0
1591
1592	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1593	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1594	.word	0x99b02308 !movxtod	%o0,%f12
1595	.word	0x9db02309 !movxtod	%o1,%f14
1596
1597	brnz,pn		%l2, 2f
1598	sub		%i2, 1, %i2
1599
1600	std		%f0, [%i1 + 0]
1601	std		%f2, [%i1 + 8]
1602	brnz,pt		%i2, .L256_cbc_dec_loop2x
1603	add		%i1, 16, %i1
1604	st		%f12, [%i4 + 0]
1605	st		%f13, [%i4 + 4]
1606	st		%f14, [%i4 + 8]
1607	st		%f15, [%i4 + 12]
1608.L256_cbc_dec_abort:
1609	ret
1610	restore
1611
1612.align	16
16132:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1614						! and ~3x deterioration
1615						! in inp==out case
1616	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1617	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1618	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1619
1620	stda		%f4, [%i1 + %l3]0xc0	! partial store
1621	std		%f6, [%i1 + 8]
1622	add		%i1, 16, %i1
1623	orn		%g0, %l3, %l3
1624	stda		%f8, [%i1 + %l3]0xc0	! partial store
1625
1626	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
1627	orn		%g0, %l3, %l3
1628	st		%f12, [%i4 + 0]
1629	st		%f13, [%i4 + 4]
1630	st		%f14, [%i4 + 8]
1631	st		%f15, [%i4 + 12]
1632	ret
1633	restore
1634
1635!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1636.align	32
1637.L256_cbc_dec_loop2x:
1638	ldx		[%i0 + 0], %o0
1639	ldx		[%i0 + 8], %o1
1640	ldx		[%i0 + 16], %o2
1641	brz,pt		%l0, 4f
1642	ldx		[%i0 + 24], %o3
1643
1644	ldx		[%i0 + 32], %o4
1645	sllx		%o0, %l0, %o0
1646	srlx		%o1, %l1, %g1
1647	or		%g1, %o0, %o0
1648	sllx		%o1, %l0, %o1
1649	srlx		%o2, %l1, %g1
1650	or		%g1, %o1, %o1
1651	sllx		%o2, %l0, %o2
1652	srlx		%o3, %l1, %g1
1653	or		%g1, %o2, %o2
1654	sllx		%o3, %l0, %o3
1655	srlx		%o4, %l1, %o4
1656	or		%o4, %o3, %o3
16574:
1658	xor		%g4, %o0, %o4		! ^= rk[0]
1659	xor		%g5, %o1, %o5
1660	.word	0x81b0230c !movxtod	%o4,%f0
1661	.word	0x85b0230d !movxtod	%o5,%f2
1662	xor		%g4, %o2, %o4
1663	xor		%g5, %o3, %o5
1664	.word	0x89b0230c !movxtod	%o4,%f4
1665	.word	0x8db0230d !movxtod	%o5,%f6
1666
1667	prefetch	[%i1 + 63], 22
1668	prefetch	[%i0 + 32+63], 20
1669	call		_cmll256_decrypt_2x
1670	add		%i0, 32, %i0
1671
1672	.word	0x91b02308 !movxtod	%o0,%f8
1673	.word	0x95b02309 !movxtod	%o1,%f10
1674	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1675	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1676	.word	0x99b0230a !movxtod	%o2,%f12
1677	.word	0x9db0230b !movxtod	%o3,%f14
1678	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1679	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1680
1681	brnz,pn		%l2, 2f
1682	sub		%i2, 2, %i2
1683
1684	std		%f0, [%i1 + 0]
1685	std		%f2, [%i1 + 8]
1686	std		%f4, [%i1 + 16]
1687	std		%f6, [%i1 + 24]
1688	brnz,pt		%i2, .L256_cbc_dec_loop2x
1689	add		%i1, 32, %i1
1690	st		%f12, [%i4 + 0]
1691	st		%f13, [%i4 + 4]
1692	st		%f14, [%i4 + 8]
1693	st		%f15, [%i4 + 12]
1694	ret
1695	restore
1696
1697.align	16
16982:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1699						! and ~3x deterioration
1700						! in inp==out case
1701	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1702	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1703	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1704	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1705	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1706	stda		%f8, [%i1 + %l3]0xc0	! partial store
1707	std		%f0, [%i1 + 8]
1708	std		%f2, [%i1 + 16]
1709	std		%f4, [%i1 + 24]
1710	add		%i1, 32, %i1
1711	orn		%g0, %l3, %l3
1712	stda		%f6, [%i1 + %l3]0xc0	! partial store
1713
1714	brnz,pt		%i2, .L256_cbc_dec_loop2x+4
1715	orn		%g0, %l3, %l3
1716	st		%f12, [%i4 + 0]
1717	st		%f13, [%i4 + 4]
1718	st		%f14, [%i4 + 8]
1719	st		%f15, [%i4 + 12]
1720	ret
1721	restore
1722
1723!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1724.align	32
1725.L256cbc_dec_blk:
1726	add	%i1, %i2, %l5
1727	and	%l5, 63, %l5	! tail
1728	sub	%i2, %l5, %i2
1729	add	%l5, 15, %l5	! round up to 16n
1730	srlx	%i2, 4, %i2
1731	srl	%l5, 4, %l5
1732	sub	%i2, 1, %i2
1733	add	%l5, 1, %l5
1734
1735.L256_cbc_dec_blk_loop2x:
1736	ldx		[%i0 + 0], %o0
1737	ldx		[%i0 + 8], %o1
1738	ldx		[%i0 + 16], %o2
1739	brz,pt		%l0, 5f
1740	ldx		[%i0 + 24], %o3
1741
1742	ldx		[%i0 + 32], %o4
1743	sllx		%o0, %l0, %o0
1744	srlx		%o1, %l1, %g1
1745	or		%g1, %o0, %o0
1746	sllx		%o1, %l0, %o1
1747	srlx		%o2, %l1, %g1
1748	or		%g1, %o1, %o1
1749	sllx		%o2, %l0, %o2
1750	srlx		%o3, %l1, %g1
1751	or		%g1, %o2, %o2
1752	sllx		%o3, %l0, %o3
1753	srlx		%o4, %l1, %o4
1754	or		%o4, %o3, %o3
17555:
1756	xor		%g4, %o0, %o4		! ^= rk[0]
1757	xor		%g5, %o1, %o5
1758	.word	0x81b0230c !movxtod	%o4,%f0
1759	.word	0x85b0230d !movxtod	%o5,%f2
1760	xor		%g4, %o2, %o4
1761	xor		%g5, %o3, %o5
1762	.word	0x89b0230c !movxtod	%o4,%f4
1763	.word	0x8db0230d !movxtod	%o5,%f6
1764
1765	prefetch	[%i0 + 32+63], 20
1766	call		_cmll256_decrypt_2x
1767	add		%i0, 32, %i0
1768	subcc		%i2, 2, %i2
1769
1770	.word	0x91b02308 !movxtod	%o0,%f8
1771	.word	0x95b02309 !movxtod	%o1,%f10
1772	.word	0x81b30d80 !fxor	%f12,%f0,%f0		! ^= ivec
1773	.word	0x85b38d82 !fxor	%f14,%f2,%f2
1774	.word	0x99b0230a !movxtod	%o2,%f12
1775	.word	0x9db0230b !movxtod	%o3,%f14
1776	.word	0x89b20d84 !fxor	%f8,%f4,%f4
1777	.word	0x8db28d86 !fxor	%f10,%f6,%f6
1778
1779	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1780	add		%i1, 8, %i1
1781	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1782	add		%i1, 8, %i1
1783	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1784	add		%i1, 8, %i1
1785	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
1786	bgu,pt		%xcc, .L256_cbc_dec_blk_loop2x
1787	add		%i1, 8, %i1
1788
1789	add		%l5, %i2, %i2
1790	andcc		%i2, 1, %g0		! is number of blocks even?
1791	membar		#StoreLoad|#StoreStore
1792	bnz,pt		%icc, .L256_cbc_dec_loop
1793	srl		%i2, 0, %i2
1794	brnz,pn		%i2, .L256_cbc_dec_loop2x
1795	nop
1796	st		%f12, [%i4 + 0]	! write out ivec
1797	st		%f13, [%i4 + 4]
1798	st		%f14, [%i4 + 8]
1799	st		%f15, [%i4 + 12]
1800	ret
1801	restore
1802.type	cmll256_t4_cbc_decrypt,#function
1803.size	cmll256_t4_cbc_decrypt,.-cmll256_t4_cbc_decrypt
1804.globl	cmll128_t4_ctr32_encrypt
1805.align	32
1806cmll128_t4_ctr32_encrypt:
1807	save		%sp, -192, %sp
1808	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
1809
1810	prefetch	[%i0], 20
1811	prefetch	[%i0 + 63], 20
1812	call		_cmll128_load_enckey
1813	sllx		%i2, 4, %i2
1814
1815	ld		[%i4 + 0], %l4	! counter
1816	ld		[%i4 + 4], %l5
1817	ld		[%i4 + 8], %l6
1818	ld		[%i4 + 12], %l7
1819
1820	sllx		%l4, 32, %o5
1821	or		%l5, %o5, %o5
1822	sllx		%l6, 32, %g1
1823	xor		%o5, %g4, %g4		! ^= rk[0]
1824	xor		%g1, %g5, %g5
1825	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
1826
1827	sub		%i0, %i1, %l5	! %i0!=%i1
1828	and		%i0, 7, %l0
1829	andn		%i0, 7, %i0
1830	sll		%l0, 3, %l0
1831	mov		64, %l1
1832	mov		0xff, %l3
1833	sub		%l1, %l0, %l1
1834	and		%i1, 7, %l2
1835	cmp		%i2, 255
1836	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
1837	movleu		%xcc, 0, %l5	!	%i2<256 ||
1838	brnz,pn		%l5, .L128_ctr32_blk	!	%i0==%i1)
1839	srl		%l3, %l2, %l3
1840
1841	andcc		%i2, 16, %g0		! is number of blocks even?
1842	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
1843	bz		%icc, .L128_ctr32_loop2x
1844	srlx		%i2, 4, %i2
1845.L128_ctr32_loop:
1846	ldx		[%i0 + 0], %o0
1847	brz,pt		%l0, 4f
1848	ldx		[%i0 + 8], %o1
1849
1850	ldx		[%i0 + 16], %o2
1851	sllx		%o0, %l0, %o0
1852	srlx		%o1, %l1, %g1
1853	sllx		%o1, %l0, %o1
1854	or		%g1, %o0, %o0
1855	srlx		%o2, %l1, %o2
1856	or		%o2, %o1, %o1
18574:
1858	xor		%g5, %l7, %g1		! ^= rk[0]
1859	add		%l7, 1, %l7
1860	.word	0x85b02301 !movxtod	%g1,%f2
1861	srl		%l7, 0, %l7		! clruw
1862	prefetch	[%i1 + 63], 22
1863	prefetch	[%i0 + 16+63], 20
1864	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
1865	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
1866	call		_cmll128_encrypt_1x+8
1867	add		%i0, 16, %i0
1868
1869	.word	0x95b02308 !movxtod	%o0,%f10
1870	.word	0x99b02309 !movxtod	%o1,%f12
1871	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
1872	.word	0x85b30d82 !fxor	%f12,%f2,%f2
1873
1874	brnz,pn		%l2, 2f
1875	sub		%i2, 1, %i2
1876
1877	std		%f0, [%i1 + 0]
1878	std		%f2, [%i1 + 8]
1879	brnz,pt		%i2, .L128_ctr32_loop2x
1880	add		%i1, 16, %i1
1881
1882	ret
1883	restore
1884
1885.align	16
18862:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1887						! and ~3x deterioration
1888						! in inp==out case
1889	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
1890	.word	0x8db00902 !faligndata	%f0,%f2,%f6
1891	.word	0x91b08902 !faligndata	%f2,%f2,%f8
1892	stda		%f4, [%i1 + %l3]0xc0	! partial store
1893	std		%f6, [%i1 + 8]
1894	add		%i1, 16, %i1
1895	orn		%g0, %l3, %l3
1896	stda		%f8, [%i1 + %l3]0xc0	! partial store
1897
1898	brnz,pt		%i2, .L128_ctr32_loop2x+4
1899	orn		%g0, %l3, %l3
1900
1901	ret
1902	restore
1903
1904!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1905.align	32
1906.L128_ctr32_loop2x:
1907	ldx		[%i0 + 0], %o0
1908	ldx		[%i0 + 8], %o1
1909	ldx		[%i0 + 16], %o2
1910	brz,pt		%l0, 4f
1911	ldx		[%i0 + 24], %o3
1912
1913	ldx		[%i0 + 32], %o4
1914	sllx		%o0, %l0, %o0
1915	srlx		%o1, %l1, %g1
1916	or		%g1, %o0, %o0
1917	sllx		%o1, %l0, %o1
1918	srlx		%o2, %l1, %g1
1919	or		%g1, %o1, %o1
1920	sllx		%o2, %l0, %o2
1921	srlx		%o3, %l1, %g1
1922	or		%g1, %o2, %o2
1923	sllx		%o3, %l0, %o3
1924	srlx		%o4, %l1, %o4
1925	or		%o4, %o3, %o3
19264:
1927	xor		%g5, %l7, %g1		! ^= rk[0]
1928	add		%l7, 1, %l7
1929	.word	0x85b02301 !movxtod	%g1,%f2
1930	srl		%l7, 0, %l7		! clruw
1931	xor		%g5, %l7, %g1
1932	add		%l7, 1, %l7
1933	.word	0x8db02301 !movxtod	%g1,%f6
1934	srl		%l7, 0, %l7		! clruw
1935	prefetch	[%i1 + 63], 22
1936	prefetch	[%i0 + 32+63], 20
1937	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
1938	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
1939	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
1940	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
1941	call		_cmll128_encrypt_2x+16
1942	add		%i0, 32, %i0
1943
1944	.word	0x91b02308 !movxtod	%o0,%f8
1945	.word	0x95b02309 !movxtod	%o1,%f10
1946	.word	0x99b0230a !movxtod	%o2,%f12
1947	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
1948	.word	0x91b0230b !movxtod	%o3,%f8
1949	.word	0x85b28d82 !fxor	%f10,%f2,%f2
1950	.word	0x89b30d84 !fxor	%f12,%f4,%f4
1951	.word	0x8db20d86 !fxor	%f8,%f6,%f6
1952
1953	brnz,pn		%l2, 2f
1954	sub		%i2, 2, %i2
1955
1956	std		%f0, [%i1 + 0]
1957	std		%f2, [%i1 + 8]
1958	std		%f4, [%i1 + 16]
1959	std		%f6, [%i1 + 24]
1960	brnz,pt		%i2, .L128_ctr32_loop2x
1961	add		%i1, 32, %i1
1962
1963	ret
1964	restore
1965
1966.align	16
19672:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
1968						! and ~3x deterioration
1969						! in inp==out case
1970	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
1971	.word	0x81b00902 !faligndata	%f0,%f2,%f0
1972	.word	0x85b08904 !faligndata	%f2,%f4,%f2
1973	.word	0x89b10906 !faligndata	%f4,%f6,%f4
1974	.word	0x8db18906 !faligndata	%f6,%f6,%f6
1975
1976	stda		%f8, [%i1 + %l3]0xc0	! partial store
1977	std		%f0, [%i1 + 8]
1978	std		%f2, [%i1 + 16]
1979	std		%f4, [%i1 + 24]
1980	add		%i1, 32, %i1
1981	orn		%g0, %l3, %l3
1982	stda		%f6, [%i1 + %l3]0xc0	! partial store
1983
1984	brnz,pt		%i2, .L128_ctr32_loop2x+4
1985	orn		%g0, %l3, %l3
1986
1987	ret
1988	restore
1989
1990!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1991.align	32
1992.L128_ctr32_blk:
1993	add	%i1, %i2, %l5
1994	and	%l5, 63, %l5	! tail
1995	sub	%i2, %l5, %i2
1996	add	%l5, 15, %l5	! round up to 16n
1997	srlx	%i2, 4, %i2
1998	srl	%l5, 4, %l5
1999	sub	%i2, 1, %i2
2000	add	%l5, 1, %l5
2001
2002.L128_ctr32_blk_loop2x:
2003	ldx		[%i0 + 0], %o0
2004	ldx		[%i0 + 8], %o1
2005	ldx		[%i0 + 16], %o2
2006	brz,pt		%l0, 5f
2007	ldx		[%i0 + 24], %o3
2008
2009	ldx		[%i0 + 32], %o4
2010	sllx		%o0, %l0, %o0
2011	srlx		%o1, %l1, %g1
2012	or		%g1, %o0, %o0
2013	sllx		%o1, %l0, %o1
2014	srlx		%o2, %l1, %g1
2015	or		%g1, %o1, %o1
2016	sllx		%o2, %l0, %o2
2017	srlx		%o3, %l1, %g1
2018	or		%g1, %o2, %o2
2019	sllx		%o3, %l0, %o3
2020	srlx		%o4, %l1, %o4
2021	or		%o4, %o3, %o3
20225:
2023	xor		%g5, %l7, %g1		! ^= rk[0]
2024	add		%l7, 1, %l7
2025	.word	0x85b02301 !movxtod	%g1,%f2
2026	srl		%l7, 0, %l7		! clruw
2027	xor		%g5, %l7, %g1
2028	add		%l7, 1, %l7
2029	.word	0x8db02301 !movxtod	%g1,%f6
2030	srl		%l7, 0, %l7		! clruw
2031	prefetch	[%i0 + 32+63], 20
2032	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2033	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
2034	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2035	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
2036	call		_cmll128_encrypt_2x+16
2037	add		%i0, 32, %i0
2038	subcc		%i2, 2, %i2
2039
2040	.word	0x91b02308 !movxtod	%o0,%f8
2041	.word	0x95b02309 !movxtod	%o1,%f10
2042	.word	0x99b0230a !movxtod	%o2,%f12
2043	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2044	.word	0x91b0230b !movxtod	%o3,%f8
2045	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2046	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2047	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2048
2049	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2050	add		%i1, 8, %i1
2051	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2052	add		%i1, 8, %i1
2053	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2054	add		%i1, 8, %i1
2055	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2056	bgu,pt		%xcc, .L128_ctr32_blk_loop2x
2057	add		%i1, 8, %i1
2058
2059	add		%l5, %i2, %i2
2060	andcc		%i2, 1, %g0		! is number of blocks even?
2061	membar		#StoreLoad|#StoreStore
2062	bnz,pt		%icc, .L128_ctr32_loop
2063	srl		%i2, 0, %i2
2064	brnz,pn		%i2, .L128_ctr32_loop2x
2065	nop
2066
2067	ret
2068	restore
2069.type	cmll128_t4_ctr32_encrypt,#function
2070.size	cmll128_t4_ctr32_encrypt,.-cmll128_t4_ctr32_encrypt
2071.globl	cmll256_t4_ctr32_encrypt
2072.align	32
2073cmll256_t4_ctr32_encrypt:
2074	save		%sp, -192, %sp
2075	srln		%i2, 0, %i2		! needed on v8+, "nop" on v9
2076
2077	prefetch	[%i0], 20
2078	prefetch	[%i0 + 63], 20
2079	call		_cmll256_load_enckey
2080	sllx		%i2, 4, %i2
2081
2082	ld		[%i4 + 0], %l4	! counter
2083	ld		[%i4 + 4], %l5
2084	ld		[%i4 + 8], %l6
2085	ld		[%i4 + 12], %l7
2086
2087	sllx		%l4, 32, %o5
2088	or		%l5, %o5, %o5
2089	sllx		%l6, 32, %g1
2090	xor		%o5, %g4, %g4		! ^= rk[0]
2091	xor		%g1, %g5, %g5
2092	.word	0x9db02304 !movxtod	%g4,%f14		! most significant 64 bits
2093
2094	sub		%i0, %i1, %l5	! %i0!=%i1
2095	and		%i0, 7, %l0
2096	andn		%i0, 7, %i0
2097	sll		%l0, 3, %l0
2098	mov		64, %l1
2099	mov		0xff, %l3
2100	sub		%l1, %l0, %l1
2101	and		%i1, 7, %l2
2102	cmp		%i2, 255
2103	movrnz		%l2, 0, %l5		! if (	%i1&7 ||
2104	movleu		%xcc, 0, %l5	!	%i2<256 ||
2105	brnz,pn		%l5, .L256_ctr32_blk	!	%i0==%i1)
2106	srl		%l3, %l2, %l3
2107
2108	andcc		%i2, 16, %g0		! is number of blocks even?
2109	.word	0xb3b64340 !alignaddrl	%i1,%g0,%i1
2110	bz		%icc, .L256_ctr32_loop2x
2111	srlx		%i2, 4, %i2
2112.L256_ctr32_loop:
2113	ldx		[%i0 + 0], %o0
2114	brz,pt		%l0, 4f
2115	ldx		[%i0 + 8], %o1
2116
2117	ldx		[%i0 + 16], %o2
2118	sllx		%o0, %l0, %o0
2119	srlx		%o1, %l1, %g1
2120	sllx		%o1, %l0, %o1
2121	or		%g1, %o0, %o0
2122	srlx		%o2, %l1, %o2
2123	or		%o2, %o1, %o1
21244:
2125	xor		%g5, %l7, %g1		! ^= rk[0]
2126	add		%l7, 1, %l7
2127	.word	0x85b02301 !movxtod	%g1,%f2
2128	srl		%l7, 0, %l7		! clruw
2129	prefetch	[%i1 + 63], 22
2130	prefetch	[%i0 + 16+63], 20
2131	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2132	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2133	call		_cmll256_encrypt_1x+8
2134	add		%i0, 16, %i0
2135
2136	.word	0x95b02308 !movxtod	%o0,%f10
2137	.word	0x99b02309 !movxtod	%o1,%f12
2138	.word	0x81b28d80 !fxor	%f10,%f0,%f0		! ^= inp
2139	.word	0x85b30d82 !fxor	%f12,%f2,%f2
2140
2141	brnz,pn		%l2, 2f
2142	sub		%i2, 1, %i2
2143
2144	std		%f0, [%i1 + 0]
2145	std		%f2, [%i1 + 8]
2146	brnz,pt		%i2, .L256_ctr32_loop2x
2147	add		%i1, 16, %i1
2148
2149	ret
2150	restore
2151
2152.align	16
21532:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2154						! and ~3x deterioration
2155						! in inp==out case
2156	.word	0x89b00900 !faligndata	%f0,%f0,%f4		! handle unaligned output
2157	.word	0x8db00902 !faligndata	%f0,%f2,%f6
2158	.word	0x91b08902 !faligndata	%f2,%f2,%f8
2159	stda		%f4, [%i1 + %l3]0xc0	! partial store
2160	std		%f6, [%i1 + 8]
2161	add		%i1, 16, %i1
2162	orn		%g0, %l3, %l3
2163	stda		%f8, [%i1 + %l3]0xc0	! partial store
2164
2165	brnz,pt		%i2, .L256_ctr32_loop2x+4
2166	orn		%g0, %l3, %l3
2167
2168	ret
2169	restore
2170
2171!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2172.align	32
2173.L256_ctr32_loop2x:
2174	ldx		[%i0 + 0], %o0
2175	ldx		[%i0 + 8], %o1
2176	ldx		[%i0 + 16], %o2
2177	brz,pt		%l0, 4f
2178	ldx		[%i0 + 24], %o3
2179
2180	ldx		[%i0 + 32], %o4
2181	sllx		%o0, %l0, %o0
2182	srlx		%o1, %l1, %g1
2183	or		%g1, %o0, %o0
2184	sllx		%o1, %l0, %o1
2185	srlx		%o2, %l1, %g1
2186	or		%g1, %o1, %o1
2187	sllx		%o2, %l0, %o2
2188	srlx		%o3, %l1, %g1
2189	or		%g1, %o2, %o2
2190	sllx		%o3, %l0, %o3
2191	srlx		%o4, %l1, %o4
2192	or		%o4, %o3, %o3
21934:
2194	xor		%g5, %l7, %g1		! ^= rk[0]
2195	add		%l7, 1, %l7
2196	.word	0x85b02301 !movxtod	%g1,%f2
2197	srl		%l7, 0, %l7		! clruw
2198	xor		%g5, %l7, %g1
2199	add		%l7, 1, %l7
2200	.word	0x8db02301 !movxtod	%g1,%f6
2201	srl		%l7, 0, %l7		! clruw
2202	prefetch	[%i1 + 63], 22
2203	prefetch	[%i0 + 32+63], 20
2204	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2205	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
2206	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2207	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
2208	call		_cmll256_encrypt_2x+16
2209	add		%i0, 32, %i0
2210
2211	.word	0x91b02308 !movxtod	%o0,%f8
2212	.word	0x95b02309 !movxtod	%o1,%f10
2213	.word	0x99b0230a !movxtod	%o2,%f12
2214	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2215	.word	0x91b0230b !movxtod	%o3,%f8
2216	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2217	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2218	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2219
2220	brnz,pn		%l2, 2f
2221	sub		%i2, 2, %i2
2222
2223	std		%f0, [%i1 + 0]
2224	std		%f2, [%i1 + 8]
2225	std		%f4, [%i1 + 16]
2226	std		%f6, [%i1 + 24]
2227	brnz,pt		%i2, .L256_ctr32_loop2x
2228	add		%i1, 32, %i1
2229
2230	ret
2231	restore
2232
2233.align	16
22342:	ldxa		[%i0]0x82, %o0		! avoid read-after-write hazard
2235						! and ~3x deterioration
2236						! in inp==out case
2237	.word	0x91b00900 !faligndata	%f0,%f0,%f8		! handle unaligned output
2238	.word	0x81b00902 !faligndata	%f0,%f2,%f0
2239	.word	0x85b08904 !faligndata	%f2,%f4,%f2
2240	.word	0x89b10906 !faligndata	%f4,%f6,%f4
2241	.word	0x8db18906 !faligndata	%f6,%f6,%f6
2242
2243	stda		%f8, [%i1 + %l3]0xc0	! partial store
2244	std		%f0, [%i1 + 8]
2245	std		%f2, [%i1 + 16]
2246	std		%f4, [%i1 + 24]
2247	add		%i1, 32, %i1
2248	orn		%g0, %l3, %l3
2249	stda		%f6, [%i1 + %l3]0xc0	! partial store
2250
2251	brnz,pt		%i2, .L256_ctr32_loop2x+4
2252	orn		%g0, %l3, %l3
2253
2254	ret
2255	restore
2256
2257!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2258.align	32
2259.L256_ctr32_blk:
2260	add	%i1, %i2, %l5
2261	and	%l5, 63, %l5	! tail
2262	sub	%i2, %l5, %i2
2263	add	%l5, 15, %l5	! round up to 16n
2264	srlx	%i2, 4, %i2
2265	srl	%l5, 4, %l5
2266	sub	%i2, 1, %i2
2267	add	%l5, 1, %l5
2268
2269.L256_ctr32_blk_loop2x:
2270	ldx		[%i0 + 0], %o0
2271	ldx		[%i0 + 8], %o1
2272	ldx		[%i0 + 16], %o2
2273	brz,pt		%l0, 5f
2274	ldx		[%i0 + 24], %o3
2275
2276	ldx		[%i0 + 32], %o4
2277	sllx		%o0, %l0, %o0
2278	srlx		%o1, %l1, %g1
2279	or		%g1, %o0, %o0
2280	sllx		%o1, %l0, %o1
2281	srlx		%o2, %l1, %g1
2282	or		%g1, %o1, %o1
2283	sllx		%o2, %l0, %o2
2284	srlx		%o3, %l1, %g1
2285	or		%g1, %o2, %o2
2286	sllx		%o3, %l0, %o3
2287	srlx		%o4, %l1, %o4
2288	or		%o4, %o3, %o3
22895:
2290	xor		%g5, %l7, %g1		! ^= rk[0]
2291	add		%l7, 1, %l7
2292	.word	0x85b02301 !movxtod	%g1,%f2
2293	srl		%l7, 0, %l7		! clruw
2294	xor		%g5, %l7, %g1
2295	add		%l7, 1, %l7
2296	.word	0x8db02301 !movxtod	%g1,%f6
2297	srl		%l7, 0, %l7		! clruw
2298	prefetch	[%i0 + 32+63], 20
2299	.word	0x84cc1d82 !camellia_f	%f16,%f2,%f14,%f2
2300	.word	0x8ccc1d86 !camellia_f	%f16,%f6,%f14,%f6
2301	.word	0x80cc858e !camellia_f	%f18,%f14,%f2,%f0
2302	.word	0x88cc8d8e !camellia_f	%f18,%f14,%f6,%f4
2303	call		_cmll256_encrypt_2x+16
2304	add		%i0, 32, %i0
2305	subcc		%i2, 2, %i2
2306
2307	.word	0x91b02308 !movxtod	%o0,%f8
2308	.word	0x95b02309 !movxtod	%o1,%f10
2309	.word	0x99b0230a !movxtod	%o2,%f12
2310	.word	0x81b20d80 !fxor	%f8,%f0,%f0		! ^= inp
2311	.word	0x91b0230b !movxtod	%o3,%f8
2312	.word	0x85b28d82 !fxor	%f10,%f2,%f2
2313	.word	0x89b30d84 !fxor	%f12,%f4,%f4
2314	.word	0x8db20d86 !fxor	%f8,%f6,%f6
2315
2316	stda		%f0, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2317	add		%i1, 8, %i1
2318	stda		%f2, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2319	add		%i1, 8, %i1
2320	stda		%f4, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2321	add		%i1, 8, %i1
2322	stda		%f6, [%i1]0xe2		! ASI_BLK_INIT, T4-specific
2323	bgu,pt		%xcc, .L256_ctr32_blk_loop2x
2324	add		%i1, 8, %i1
2325
2326	add		%l5, %i2, %i2
2327	andcc		%i2, 1, %g0		! is number of blocks even?
2328	membar		#StoreLoad|#StoreStore
2329	bnz,pt		%icc, .L256_ctr32_loop
2330	srl		%i2, 0, %i2
2331	brnz,pn		%i2, .L256_ctr32_loop2x
2332	nop
2333
2334	ret
2335	restore
2336.type	cmll256_t4_ctr32_encrypt,#function
2337.size	cmll256_t4_ctr32_encrypt,.-cmll256_t4_ctr32_encrypt
2338