xref: /freebsd/sys/crypto/openssl/arm/aesv8-armx.S (revision d0b2dbfa)
1/* Do not modify. This file is auto-generated from aesv8-armx.pl. */
2#include "arm_arch.h"
3
4#if __ARM_MAX_ARCH__>=7
5.text
6.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
7.fpu	neon
8.code	32
9#undef	__thumb2__
10.align	5
11.Lrcon:
12.long	0x01,0x01,0x01,0x01
13.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
14.long	0x1b,0x1b,0x1b,0x1b
15
16.globl	aes_v8_set_encrypt_key
17.type	aes_v8_set_encrypt_key,%function
18.align	5
19aes_v8_set_encrypt_key:
20.Lenc_key:
21	mov	r3,#-1
22	cmp	r0,#0
23	beq	.Lenc_key_abort
24	cmp	r2,#0
25	beq	.Lenc_key_abort
26	mov	r3,#-2
27	cmp	r1,#128
28	blt	.Lenc_key_abort
29	cmp	r1,#256
30	bgt	.Lenc_key_abort
31	tst	r1,#0x3f
32	bne	.Lenc_key_abort
33
34	adr	r3,.Lrcon
35	cmp	r1,#192
36
37	veor	q0,q0,q0
38	vld1.8	{q3},[r0]!
39	mov	r1,#8		@ reuse r1
40	vld1.32	{q1,q2},[r3]!
41
42	blt	.Loop128
43	beq	.L192
44	b	.L256
45
46.align	4
47.Loop128:
48	vtbl.8	d20,{q3},d4
49	vtbl.8	d21,{q3},d5
50	vext.8	q9,q0,q3,#12
51	vst1.32	{q3},[r2]!
52.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
53	subs	r1,r1,#1
54
55	veor	q3,q3,q9
56	vext.8	q9,q0,q9,#12
57	veor	q3,q3,q9
58	vext.8	q9,q0,q9,#12
59	veor	q10,q10,q1
60	veor	q3,q3,q9
61	vshl.u8	q1,q1,#1
62	veor	q3,q3,q10
63	bne	.Loop128
64
65	vld1.32	{q1},[r3]
66
67	vtbl.8	d20,{q3},d4
68	vtbl.8	d21,{q3},d5
69	vext.8	q9,q0,q3,#12
70	vst1.32	{q3},[r2]!
71.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
72
73	veor	q3,q3,q9
74	vext.8	q9,q0,q9,#12
75	veor	q3,q3,q9
76	vext.8	q9,q0,q9,#12
77	veor	q10,q10,q1
78	veor	q3,q3,q9
79	vshl.u8	q1,q1,#1
80	veor	q3,q3,q10
81
82	vtbl.8	d20,{q3},d4
83	vtbl.8	d21,{q3},d5
84	vext.8	q9,q0,q3,#12
85	vst1.32	{q3},[r2]!
86.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
87
88	veor	q3,q3,q9
89	vext.8	q9,q0,q9,#12
90	veor	q3,q3,q9
91	vext.8	q9,q0,q9,#12
92	veor	q10,q10,q1
93	veor	q3,q3,q9
94	veor	q3,q3,q10
95	vst1.32	{q3},[r2]
96	add	r2,r2,#0x50
97
98	mov	r12,#10
99	b	.Ldone
100
101.align	4
102.L192:
103	vld1.8	{d16},[r0]!
104	vmov.i8	q10,#8			@ borrow q10
105	vst1.32	{q3},[r2]!
106	vsub.i8	q2,q2,q10	@ adjust the mask
107
108.Loop192:
109	vtbl.8	d20,{q8},d4
110	vtbl.8	d21,{q8},d5
111	vext.8	q9,q0,q3,#12
112#ifdef __ARMEB__
113	vst1.32	{q8},[r2]!
114	sub	r2,r2,#8
115#else
116	vst1.32	{d16},[r2]!
117#endif
118.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
119	subs	r1,r1,#1
120
121	veor	q3,q3,q9
122	vext.8	q9,q0,q9,#12
123	veor	q3,q3,q9
124	vext.8	q9,q0,q9,#12
125	veor	q3,q3,q9
126
127	vdup.32	q9,d7[1]
128	veor	q9,q9,q8
129	veor	q10,q10,q1
130	vext.8	q8,q0,q8,#12
131	vshl.u8	q1,q1,#1
132	veor	q8,q8,q9
133	veor	q3,q3,q10
134	veor	q8,q8,q10
135	vst1.32	{q3},[r2]!
136	bne	.Loop192
137
138	mov	r12,#12
139	add	r2,r2,#0x20
140	b	.Ldone
141
142.align	4
143.L256:
144	vld1.8	{q8},[r0]
145	mov	r1,#7
146	mov	r12,#14
147	vst1.32	{q3},[r2]!
148
149.Loop256:
150	vtbl.8	d20,{q8},d4
151	vtbl.8	d21,{q8},d5
152	vext.8	q9,q0,q3,#12
153	vst1.32	{q8},[r2]!
154.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
155	subs	r1,r1,#1
156
157	veor	q3,q3,q9
158	vext.8	q9,q0,q9,#12
159	veor	q3,q3,q9
160	vext.8	q9,q0,q9,#12
161	veor	q10,q10,q1
162	veor	q3,q3,q9
163	vshl.u8	q1,q1,#1
164	veor	q3,q3,q10
165	vst1.32	{q3},[r2]!
166	beq	.Ldone
167
168	vdup.32	q10,d7[1]
169	vext.8	q9,q0,q8,#12
170.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
171
172	veor	q8,q8,q9
173	vext.8	q9,q0,q9,#12
174	veor	q8,q8,q9
175	vext.8	q9,q0,q9,#12
176	veor	q8,q8,q9
177
178	veor	q8,q8,q10
179	b	.Loop256
180
181.Ldone:
182	str	r12,[r2]
183	mov	r3,#0
184
185.Lenc_key_abort:
186	mov	r0,r3			@ return value
187
188	bx	lr
189.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
190
191.globl	aes_v8_set_decrypt_key
192.type	aes_v8_set_decrypt_key,%function
193.align	5
194aes_v8_set_decrypt_key:
195	stmdb	sp!,{r4,lr}
196	bl	.Lenc_key
197
198	cmp	r0,#0
199	bne	.Ldec_key_abort
200
201	sub	r2,r2,#240		@ restore original r2
202	mov	r4,#-16
203	add	r0,r2,r12,lsl#4	@ end of key schedule
204
205	vld1.32	{q0},[r2]
206	vld1.32	{q1},[r0]
207	vst1.32	{q0},[r0],r4
208	vst1.32	{q1},[r2]!
209
210.Loop_imc:
211	vld1.32	{q0},[r2]
212	vld1.32	{q1},[r0]
213.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
214.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
215	vst1.32	{q0},[r0],r4
216	vst1.32	{q1},[r2]!
217	cmp	r0,r2
218	bhi	.Loop_imc
219
220	vld1.32	{q0},[r2]
221.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
222	vst1.32	{q0},[r0]
223
224	eor	r0,r0,r0		@ return value
225.Ldec_key_abort:
226	ldmia	sp!,{r4,pc}
227.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
228.globl	aes_v8_encrypt
229.type	aes_v8_encrypt,%function
230.align	5
231aes_v8_encrypt:
232	ldr	r3,[r2,#240]
233	vld1.32	{q0},[r2]!
234	vld1.8	{q2},[r0]
235	sub	r3,r3,#2
236	vld1.32	{q1},[r2]!
237
238.Loop_enc:
239.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
240.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
241	vld1.32	{q0},[r2]!
242	subs	r3,r3,#2
243.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
244.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
245	vld1.32	{q1},[r2]!
246	bgt	.Loop_enc
247
248.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
249.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
250	vld1.32	{q0},[r2]
251.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
252	veor	q2,q2,q0
253
254	vst1.8	{q2},[r1]
255	bx	lr
256.size	aes_v8_encrypt,.-aes_v8_encrypt
257.globl	aes_v8_decrypt
258.type	aes_v8_decrypt,%function
259.align	5
260aes_v8_decrypt:
261	ldr	r3,[r2,#240]
262	vld1.32	{q0},[r2]!
263	vld1.8	{q2},[r0]
264	sub	r3,r3,#2
265	vld1.32	{q1},[r2]!
266
267.Loop_dec:
268.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
269.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
270	vld1.32	{q0},[r2]!
271	subs	r3,r3,#2
272.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
273.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
274	vld1.32	{q1},[r2]!
275	bgt	.Loop_dec
276
277.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
278.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
279	vld1.32	{q0},[r2]
280.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
281	veor	q2,q2,q0
282
283	vst1.8	{q2},[r1]
284	bx	lr
285.size	aes_v8_decrypt,.-aes_v8_decrypt
286.globl	aes_v8_cbc_encrypt
287.type	aes_v8_cbc_encrypt,%function
288.align	5
289aes_v8_cbc_encrypt:
290	mov	ip,sp
291	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
292	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
293	ldmia	ip,{r4,r5}		@ load remaining args
294	subs	r2,r2,#16
295	mov	r8,#16
296	blo	.Lcbc_abort
297	moveq	r8,#0
298
299	cmp	r5,#0			@ en- or decrypting?
300	ldr	r5,[r3,#240]
301	and	r2,r2,#-16
302	vld1.8	{q6},[r4]
303	vld1.8	{q0},[r0],r8
304
305	vld1.32	{q8,q9},[r3]		@ load key schedule...
306	sub	r5,r5,#6
307	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
308	sub	r5,r5,#2
309	vld1.32	{q10,q11},[r7]!
310	vld1.32	{q12,q13},[r7]!
311	vld1.32	{q14,q15},[r7]!
312	vld1.32	{q7},[r7]
313
314	add	r7,r3,#32
315	mov	r6,r5
316	beq	.Lcbc_dec
317
318	cmp	r5,#2
319	veor	q0,q0,q6
320	veor	q5,q8,q7
321	beq	.Lcbc_enc128
322
323	vld1.32	{q2,q3},[r7]
324	add	r7,r3,#16
325	add	r6,r3,#16*4
326	add	r12,r3,#16*5
327.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
328.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
329	add	r14,r3,#16*6
330	add	r3,r3,#16*7
331	b	.Lenter_cbc_enc
332
333.align	4
334.Loop_cbc_enc:
335.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
336.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
337	vst1.8	{q6},[r1]!
338.Lenter_cbc_enc:
339.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
340.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
341.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
342.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
343	vld1.32	{q8},[r6]
344	cmp	r5,#4
345.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
346.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
347	vld1.32	{q9},[r12]
348	beq	.Lcbc_enc192
349
350.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
351.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
352	vld1.32	{q8},[r14]
353.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
354.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
355	vld1.32	{q9},[r3]
356	nop
357
358.Lcbc_enc192:
359.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
360.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
361	subs	r2,r2,#16
362.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
363.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
364	moveq	r8,#0
365.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
366.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
367.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
368.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
369	vld1.8	{q8},[r0],r8
370.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
371.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
372	veor	q8,q8,q5
373.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
374.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
375	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
376.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
377.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
378.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
379	veor	q6,q0,q7
380	bhs	.Loop_cbc_enc
381
382	vst1.8	{q6},[r1]!
383	b	.Lcbc_done
384
385.align	5
386.Lcbc_enc128:
387	vld1.32	{q2,q3},[r7]
388.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
389.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
390	b	.Lenter_cbc_enc128
391.Loop_cbc_enc128:
392.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
393.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
394	vst1.8	{q6},[r1]!
395.Lenter_cbc_enc128:
396.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
397.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
398	subs	r2,r2,#16
399.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
400.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
401	moveq	r8,#0
402.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
403.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
404.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
405.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
406.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
407.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
408	vld1.8	{q8},[r0],r8
409.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
410.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
411.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
412.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
413.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
414.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
415	veor	q8,q8,q5
416.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
417	veor	q6,q0,q7
418	bhs	.Loop_cbc_enc128
419
420	vst1.8	{q6},[r1]!
421	b	.Lcbc_done
422.align	5
423.Lcbc_dec:
424	vld1.8	{q10},[r0]!
425	subs	r2,r2,#32		@ bias
426	add	r6,r5,#2
427	vorr	q3,q0,q0
428	vorr	q1,q0,q0
429	vorr	q11,q10,q10
430	blo	.Lcbc_dec_tail
431
432	vorr	q1,q10,q10
433	vld1.8	{q10},[r0]!
434	vorr	q2,q0,q0
435	vorr	q3,q1,q1
436	vorr	q11,q10,q10
437
438.Loop3x_cbc_dec:
439.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
440.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
441.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
442.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
443.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
444.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
445	vld1.32	{q8},[r7]!
446	subs	r6,r6,#2
447.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
448.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
449.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
450.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
451.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
452.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
453	vld1.32	{q9},[r7]!
454	bgt	.Loop3x_cbc_dec
455
456.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
457.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
458.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
459.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
460.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
461.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
462	veor	q4,q6,q7
463	subs	r2,r2,#0x30
464	veor	q5,q2,q7
465	movlo	r6,r2			@ r6, r6, is zero at this point
466.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
467.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
468.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
469.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
470.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
471.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
472	veor	q9,q3,q7
473	add	r0,r0,r6		@ r0 is adjusted in such way that
474					@ at exit from the loop q1-q10
475					@ are loaded with last "words"
476	vorr	q6,q11,q11
477	mov	r7,r3
478.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
479.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
480.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
481.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
482.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
483.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
484	vld1.8	{q2},[r0]!
485.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
486.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
487.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
488.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
489.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
490.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
491	vld1.8	{q3},[r0]!
492.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
493.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
494.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
495.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
496.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
497.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
498	vld1.8	{q11},[r0]!
499.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
500.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
501.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
502	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
503	add	r6,r5,#2
504	veor	q4,q4,q0
505	veor	q5,q5,q1
506	veor	q10,q10,q9
507	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
508	vst1.8	{q4},[r1]!
509	vorr	q0,q2,q2
510	vst1.8	{q5},[r1]!
511	vorr	q1,q3,q3
512	vst1.8	{q10},[r1]!
513	vorr	q10,q11,q11
514	bhs	.Loop3x_cbc_dec
515
516	cmn	r2,#0x30
517	beq	.Lcbc_done
518	nop
519
520.Lcbc_dec_tail:
521.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
522.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
523.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
524.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
525	vld1.32	{q8},[r7]!
526	subs	r6,r6,#2
527.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
528.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
529.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
530.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
531	vld1.32	{q9},[r7]!
532	bgt	.Lcbc_dec_tail
533
534.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
535.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
536.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
537.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
538.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
539.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
540.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
541.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
542.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
543.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
544.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
545.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
546	cmn	r2,#0x20
547.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
548.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
549.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
550.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
551	veor	q5,q6,q7
552.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
553.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
554.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
555.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
556	veor	q9,q3,q7
557.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
558.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
559	beq	.Lcbc_dec_one
560	veor	q5,q5,q1
561	veor	q9,q9,q10
562	vorr	q6,q11,q11
563	vst1.8	{q5},[r1]!
564	vst1.8	{q9},[r1]!
565	b	.Lcbc_done
566
567.Lcbc_dec_one:
568	veor	q5,q5,q10
569	vorr	q6,q11,q11
570	vst1.8	{q5},[r1]!
571
572.Lcbc_done:
573	vst1.8	{q6},[r4]
574.Lcbc_abort:
575	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
576	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
577.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
578.globl	aes_v8_ctr32_encrypt_blocks
579.type	aes_v8_ctr32_encrypt_blocks,%function
580.align	5
581aes_v8_ctr32_encrypt_blocks:
582	mov	ip,sp
583	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
584	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
585	ldr	r4, [ip]		@ load remaining arg
586	ldr	r5,[r3,#240]
587
588	ldr	r8, [r4, #12]
589#ifdef __ARMEB__
590	vld1.8	{q0},[r4]
591#else
592	vld1.32	{q0},[r4]
593#endif
594	vld1.32	{q8,q9},[r3]		@ load key schedule...
595	sub	r5,r5,#4
596	mov	r12,#16
597	cmp	r2,#2
598	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
599	sub	r5,r5,#2
600	vld1.32	{q12,q13},[r7]!
601	vld1.32	{q14,q15},[r7]!
602	vld1.32	{q7},[r7]
603	add	r7,r3,#32
604	mov	r6,r5
605	movlo	r12,#0
606#ifndef __ARMEB__
607	rev	r8, r8
608#endif
609	add	r10, r8, #1
610	vorr	q6,q0,q0
611	rev	r10, r10
612	vmov.32	d13[1],r10
613	add	r8, r8, #2
614	vorr	q1,q6,q6
615	bls	.Lctr32_tail
616	rev	r12, r8
617	vmov.32	d13[1],r12
618	sub	r2,r2,#3		@ bias
619	vorr	q10,q6,q6
620	b	.Loop3x_ctr32
621
622.align	4
623.Loop3x_ctr32:
624.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
625.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
626.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
627.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
628.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
629.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
630	vld1.32	{q8},[r7]!
631	subs	r6,r6,#2
632.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
633.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
634.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
635.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
636.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
637.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
638	vld1.32	{q9},[r7]!
639	bgt	.Loop3x_ctr32
640
641.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
642.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
643.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
644.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
645	vld1.8	{q2},[r0]!
646	add	r9,r8,#1
647.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
648.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
649	vld1.8	{q3},[r0]!
650	rev	r9,r9
651.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
652.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
653.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
654.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
655	vld1.8	{q11},[r0]!
656	mov	r7,r3
657.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
658.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
659.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
660.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
661.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
662.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
663	veor	q2,q2,q7
664	add	r10,r8,#2
665.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
666.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
667	veor	q3,q3,q7
668	add	r8,r8,#3
669.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
670.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
671.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
672.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
673	veor	q11,q11,q7
674	vmov.32	d13[1], r9
675.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
676.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
677	vorr	q0,q6,q6
678	rev	r10,r10
679.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
680.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
681	vmov.32	d13[1], r10
682	rev	r12,r8
683.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
684.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
685	vorr	q1,q6,q6
686	vmov.32	d13[1], r12
687.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
688.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
689	vorr	q10,q6,q6
690	subs	r2,r2,#3
691.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
692.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
693.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
694
695	veor	q2,q2,q4
696	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
697	vst1.8	{q2},[r1]!
698	veor	q3,q3,q5
699	mov	r6,r5
700	vst1.8	{q3},[r1]!
701	veor	q11,q11,q9
702	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
703	vst1.8	{q11},[r1]!
704	bhs	.Loop3x_ctr32
705
706	adds	r2,r2,#3
707	beq	.Lctr32_done
708	cmp	r2,#1
709	mov	r12,#16
710	moveq	r12,#0
711
712.Lctr32_tail:
713.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
714.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
715.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
716.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
717	vld1.32	{q8},[r7]!
718	subs	r6,r6,#2
719.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
720.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
721.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
722.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
723	vld1.32	{q9},[r7]!
724	bgt	.Lctr32_tail
725
726.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
727.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
728.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
729.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
730.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
731.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
732.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
733.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
734	vld1.8	{q2},[r0],r12
735.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
736.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
737.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
738.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
739	vld1.8	{q3},[r0]
740.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
741.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
742.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
743.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
744	veor	q2,q2,q7
745.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
746.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
747.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
748.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
749	veor	q3,q3,q7
750.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
751.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
752
753	cmp	r2,#1
754	veor	q2,q2,q0
755	veor	q3,q3,q1
756	vst1.8	{q2},[r1]!
757	beq	.Lctr32_done
758	vst1.8	{q3},[r1]
759
760.Lctr32_done:
761	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
762	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
763.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
764#endif
765