1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
15#include <openssl/arm_arch.h>
16
17#if __ARM_MAX_ARCH__>=7
18.text
19
20
21.code	32
22#undef	__thumb2__
23.align	5
24Lrcon:
25.long	0x01,0x01,0x01,0x01
26.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
27.long	0x1b,0x1b,0x1b,0x1b
28
29.text
30
31.globl	_aes_hw_set_encrypt_key
32.private_extern	_aes_hw_set_encrypt_key
33#ifdef __thumb2__
34.thumb_func	_aes_hw_set_encrypt_key
35#endif
36.align	5
37_aes_hw_set_encrypt_key:
38Lenc_key:
39	mov	r3,#-1
40	cmp	r0,#0
41	beq	Lenc_key_abort
42	cmp	r2,#0
43	beq	Lenc_key_abort
44	mov	r3,#-2
45	cmp	r1,#128
46	blt	Lenc_key_abort
47	cmp	r1,#256
48	bgt	Lenc_key_abort
49	tst	r1,#0x3f
50	bne	Lenc_key_abort
51
52	adr	r3,Lrcon
53	cmp	r1,#192
54
55	veor	q0,q0,q0
56	vld1.8	{q3},[r0]!
57	mov	r1,#8		@ reuse r1
58	vld1.32	{q1,q2},[r3]!
59
60	blt	Loop128
61	beq	L192
62	b	L256
63
64.align	4
65Loop128:
66	vtbl.8	d20,{q3},d4
67	vtbl.8	d21,{q3},d5
68	vext.8	q9,q0,q3,#12
69	vst1.32	{q3},[r2]!
70.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
71	subs	r1,r1,#1
72
73	veor	q3,q3,q9
74	vext.8	q9,q0,q9,#12
75	veor	q3,q3,q9
76	vext.8	q9,q0,q9,#12
77	veor	q10,q10,q1
78	veor	q3,q3,q9
79	vshl.u8	q1,q1,#1
80	veor	q3,q3,q10
81	bne	Loop128
82
83	vld1.32	{q1},[r3]
84
85	vtbl.8	d20,{q3},d4
86	vtbl.8	d21,{q3},d5
87	vext.8	q9,q0,q3,#12
88	vst1.32	{q3},[r2]!
89.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
90
91	veor	q3,q3,q9
92	vext.8	q9,q0,q9,#12
93	veor	q3,q3,q9
94	vext.8	q9,q0,q9,#12
95	veor	q10,q10,q1
96	veor	q3,q3,q9
97	vshl.u8	q1,q1,#1
98	veor	q3,q3,q10
99
100	vtbl.8	d20,{q3},d4
101	vtbl.8	d21,{q3},d5
102	vext.8	q9,q0,q3,#12
103	vst1.32	{q3},[r2]!
104.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
105
106	veor	q3,q3,q9
107	vext.8	q9,q0,q9,#12
108	veor	q3,q3,q9
109	vext.8	q9,q0,q9,#12
110	veor	q10,q10,q1
111	veor	q3,q3,q9
112	veor	q3,q3,q10
113	vst1.32	{q3},[r2]
114	add	r2,r2,#0x50
115
116	mov	r12,#10
117	b	Ldone
118
119.align	4
120L192:
121	vld1.8	{d16},[r0]!
122	vmov.i8	q10,#8			@ borrow q10
123	vst1.32	{q3},[r2]!
124	vsub.i8	q2,q2,q10	@ adjust the mask
125
126Loop192:
127	vtbl.8	d20,{q8},d4
128	vtbl.8	d21,{q8},d5
129	vext.8	q9,q0,q3,#12
130	vst1.32	{d16},[r2]!
131.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
132	subs	r1,r1,#1
133
134	veor	q3,q3,q9
135	vext.8	q9,q0,q9,#12
136	veor	q3,q3,q9
137	vext.8	q9,q0,q9,#12
138	veor	q3,q3,q9
139
140	vdup.32	q9,d7[1]
141	veor	q9,q9,q8
142	veor	q10,q10,q1
143	vext.8	q8,q0,q8,#12
144	vshl.u8	q1,q1,#1
145	veor	q8,q8,q9
146	veor	q3,q3,q10
147	veor	q8,q8,q10
148	vst1.32	{q3},[r2]!
149	bne	Loop192
150
151	mov	r12,#12
152	add	r2,r2,#0x20
153	b	Ldone
154
155.align	4
156L256:
157	vld1.8	{q8},[r0]
158	mov	r1,#7
159	mov	r12,#14
160	vst1.32	{q3},[r2]!
161
162Loop256:
163	vtbl.8	d20,{q8},d4
164	vtbl.8	d21,{q8},d5
165	vext.8	q9,q0,q3,#12
166	vst1.32	{q8},[r2]!
167.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
168	subs	r1,r1,#1
169
170	veor	q3,q3,q9
171	vext.8	q9,q0,q9,#12
172	veor	q3,q3,q9
173	vext.8	q9,q0,q9,#12
174	veor	q10,q10,q1
175	veor	q3,q3,q9
176	vshl.u8	q1,q1,#1
177	veor	q3,q3,q10
178	vst1.32	{q3},[r2]!
179	beq	Ldone
180
181	vdup.32	q10,d7[1]
182	vext.8	q9,q0,q8,#12
183.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
184
185	veor	q8,q8,q9
186	vext.8	q9,q0,q9,#12
187	veor	q8,q8,q9
188	vext.8	q9,q0,q9,#12
189	veor	q8,q8,q9
190
191	veor	q8,q8,q10
192	b	Loop256
193
194Ldone:
195	str	r12,[r2]
196	mov	r3,#0
197
198Lenc_key_abort:
199	mov	r0,r3			@ return value
200
201	bx	lr
202
203
204.globl	_aes_hw_set_decrypt_key
205.private_extern	_aes_hw_set_decrypt_key
206#ifdef __thumb2__
207.thumb_func	_aes_hw_set_decrypt_key
208#endif
209.align	5
210_aes_hw_set_decrypt_key:
211	stmdb	sp!,{r4,lr}
212	bl	Lenc_key
213
214	cmp	r0,#0
215	bne	Ldec_key_abort
216
217	sub	r2,r2,#240		@ restore original r2
218	mov	r4,#-16
219	add	r0,r2,r12,lsl#4	@ end of key schedule
220
221	vld1.32	{q0},[r2]
222	vld1.32	{q1},[r0]
223	vst1.32	{q0},[r0],r4
224	vst1.32	{q1},[r2]!
225
226Loop_imc:
227	vld1.32	{q0},[r2]
228	vld1.32	{q1},[r0]
229.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
230.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
231	vst1.32	{q0},[r0],r4
232	vst1.32	{q1},[r2]!
233	cmp	r0,r2
234	bhi	Loop_imc
235
236	vld1.32	{q0},[r2]
237.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
238	vst1.32	{q0},[r0]
239
240	eor	r0,r0,r0		@ return value
241Ldec_key_abort:
242	ldmia	sp!,{r4,pc}
243
244.globl	_aes_hw_encrypt
245.private_extern	_aes_hw_encrypt
246#ifdef __thumb2__
247.thumb_func	_aes_hw_encrypt
248#endif
249.align	5
250_aes_hw_encrypt:
251	ldr	r3,[r2,#240]
252	vld1.32	{q0},[r2]!
253	vld1.8	{q2},[r0]
254	sub	r3,r3,#2
255	vld1.32	{q1},[r2]!
256
257Loop_enc:
258.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
259.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
260	vld1.32	{q0},[r2]!
261	subs	r3,r3,#2
262.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
263.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
264	vld1.32	{q1},[r2]!
265	bgt	Loop_enc
266
267.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
268.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
269	vld1.32	{q0},[r2]
270.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
271	veor	q2,q2,q0
272
273	vst1.8	{q2},[r1]
274	bx	lr
275
276.globl	_aes_hw_decrypt
277.private_extern	_aes_hw_decrypt
278#ifdef __thumb2__
279.thumb_func	_aes_hw_decrypt
280#endif
281.align	5
282_aes_hw_decrypt:
283	ldr	r3,[r2,#240]
284	vld1.32	{q0},[r2]!
285	vld1.8	{q2},[r0]
286	sub	r3,r3,#2
287	vld1.32	{q1},[r2]!
288
289Loop_dec:
290.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
291.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
292	vld1.32	{q0},[r2]!
293	subs	r3,r3,#2
294.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
295.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
296	vld1.32	{q1},[r2]!
297	bgt	Loop_dec
298
299.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
300.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
301	vld1.32	{q0},[r2]
302.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
303	veor	q2,q2,q0
304
305	vst1.8	{q2},[r1]
306	bx	lr
307
308.globl	_aes_hw_cbc_encrypt
309.private_extern	_aes_hw_cbc_encrypt
310#ifdef __thumb2__
311.thumb_func	_aes_hw_cbc_encrypt
312#endif
313.align	5
314_aes_hw_cbc_encrypt:
315	mov	ip,sp
316	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
317	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
318	ldmia	ip,{r4,r5}		@ load remaining args
319	subs	r2,r2,#16
320	mov	r8,#16
321	blo	Lcbc_abort
322	moveq	r8,#0
323
324	cmp	r5,#0			@ en- or decrypting?
325	ldr	r5,[r3,#240]
326	and	r2,r2,#-16
327	vld1.8	{q6},[r4]
328	vld1.8	{q0},[r0],r8
329
330	vld1.32	{q8,q9},[r3]		@ load key schedule...
331	sub	r5,r5,#6
332	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
333	sub	r5,r5,#2
334	vld1.32	{q10,q11},[r7]!
335	vld1.32	{q12,q13},[r7]!
336	vld1.32	{q14,q15},[r7]!
337	vld1.32	{q7},[r7]
338
339	add	r7,r3,#32
340	mov	r6,r5
341	beq	Lcbc_dec
342
343	cmp	r5,#2
344	veor	q0,q0,q6
345	veor	q5,q8,q7
346	beq	Lcbc_enc128
347
348	vld1.32	{q2,q3},[r7]
349	add	r7,r3,#16
350	add	r6,r3,#16*4
351	add	r12,r3,#16*5
352.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
353.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
354	add	r14,r3,#16*6
355	add	r3,r3,#16*7
356	b	Lenter_cbc_enc
357
358.align	4
359Loop_cbc_enc:
360.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
361.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
362	vst1.8	{q6},[r1]!
363Lenter_cbc_enc:
364.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
365.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
366.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
367.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
368	vld1.32	{q8},[r6]
369	cmp	r5,#4
370.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
371.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
372	vld1.32	{q9},[r12]
373	beq	Lcbc_enc192
374
375.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
376.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
377	vld1.32	{q8},[r14]
378.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
379.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
380	vld1.32	{q9},[r3]
381	nop
382
383Lcbc_enc192:
384.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
385.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
386	subs	r2,r2,#16
387.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
388.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
389	moveq	r8,#0
390.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
391.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
392.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
393.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
394	vld1.8	{q8},[r0],r8
395.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
396.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
397	veor	q8,q8,q5
398.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
399.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
400	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
401.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
402.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
403.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
404	veor	q6,q0,q7
405	bhs	Loop_cbc_enc
406
407	vst1.8	{q6},[r1]!
408	b	Lcbc_done
409
410.align	5
411Lcbc_enc128:
412	vld1.32	{q2,q3},[r7]
413.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
414.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
415	b	Lenter_cbc_enc128
416Loop_cbc_enc128:
417.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
418.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
419	vst1.8	{q6},[r1]!
420Lenter_cbc_enc128:
421.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
422.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
423	subs	r2,r2,#16
424.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
425.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
426	moveq	r8,#0
427.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
428.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
429.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
430.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
431.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
432.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
433	vld1.8	{q8},[r0],r8
434.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
435.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
436.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
437.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
438.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
439.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
440	veor	q8,q8,q5
441.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
442	veor	q6,q0,q7
443	bhs	Loop_cbc_enc128
444
445	vst1.8	{q6},[r1]!
446	b	Lcbc_done
447.align	5
448Lcbc_dec:
449	vld1.8	{q10},[r0]!
450	subs	r2,r2,#32		@ bias
451	add	r6,r5,#2
452	vorr	q3,q0,q0
453	vorr	q1,q0,q0
454	vorr	q11,q10,q10
455	blo	Lcbc_dec_tail
456
457	vorr	q1,q10,q10
458	vld1.8	{q10},[r0]!
459	vorr	q2,q0,q0
460	vorr	q3,q1,q1
461	vorr	q11,q10,q10
462
463Loop3x_cbc_dec:
464.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
465.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
466.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
467.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
468.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
469.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
470	vld1.32	{q8},[r7]!
471	subs	r6,r6,#2
472.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
473.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
474.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
475.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
476.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
477.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
478	vld1.32	{q9},[r7]!
479	bgt	Loop3x_cbc_dec
480
481.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
482.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
483.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
484.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
485.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
486.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
487	veor	q4,q6,q7
488	subs	r2,r2,#0x30
489	veor	q5,q2,q7
490	movlo	r6,r2			@ r6, r6, is zero at this point
491.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
492.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
493.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
494.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
495.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
496.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
497	veor	q9,q3,q7
498	add	r0,r0,r6		@ r0 is adjusted in such way that
499					@ at exit from the loop q1-q10
500					@ are loaded with last "words"
501	vorr	q6,q11,q11
502	mov	r7,r3
503.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
504.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
505.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
506.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
507.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
508.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
509	vld1.8	{q2},[r0]!
510.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
511.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
512.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
513.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
514.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
515.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
516	vld1.8	{q3},[r0]!
517.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
518.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
519.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
520.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
521.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
522.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
523	vld1.8	{q11},[r0]!
524.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
525.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
526.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
527	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
528	add	r6,r5,#2
529	veor	q4,q4,q0
530	veor	q5,q5,q1
531	veor	q10,q10,q9
532	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
533	vst1.8	{q4},[r1]!
534	vorr	q0,q2,q2
535	vst1.8	{q5},[r1]!
536	vorr	q1,q3,q3
537	vst1.8	{q10},[r1]!
538	vorr	q10,q11,q11
539	bhs	Loop3x_cbc_dec
540
541	cmn	r2,#0x30
542	beq	Lcbc_done
543	nop
544
545Lcbc_dec_tail:
546.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
547.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
548.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
549.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
550	vld1.32	{q8},[r7]!
551	subs	r6,r6,#2
552.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
553.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
554.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
555.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
556	vld1.32	{q9},[r7]!
557	bgt	Lcbc_dec_tail
558
559.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
560.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
561.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
562.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
563.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
564.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
565.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
566.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
567.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
568.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
569.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
570.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
571	cmn	r2,#0x20
572.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
573.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
574.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
575.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
576	veor	q5,q6,q7
577.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
578.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
579.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
580.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
581	veor	q9,q3,q7
582.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
583.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
584	beq	Lcbc_dec_one
585	veor	q5,q5,q1
586	veor	q9,q9,q10
587	vorr	q6,q11,q11
588	vst1.8	{q5},[r1]!
589	vst1.8	{q9},[r1]!
590	b	Lcbc_done
591
592Lcbc_dec_one:
593	veor	q5,q5,q10
594	vorr	q6,q11,q11
595	vst1.8	{q5},[r1]!
596
597Lcbc_done:
598	vst1.8	{q6},[r4]
599Lcbc_abort:
600	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
601	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
602
603.globl	_aes_hw_ctr32_encrypt_blocks
604.private_extern	_aes_hw_ctr32_encrypt_blocks
605#ifdef __thumb2__
606.thumb_func	_aes_hw_ctr32_encrypt_blocks
607#endif
608.align	5
609_aes_hw_ctr32_encrypt_blocks:
610	mov	ip,sp
611	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
612	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
613	ldr	r4, [ip]		@ load remaining arg
614	ldr	r5,[r3,#240]
615
616	ldr	r8, [r4, #12]
617	vld1.32	{q0},[r4]
618
619	vld1.32	{q8,q9},[r3]		@ load key schedule...
620	sub	r5,r5,#4
621	mov	r12,#16
622	cmp	r2,#2
623	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
624	sub	r5,r5,#2
625	vld1.32	{q12,q13},[r7]!
626	vld1.32	{q14,q15},[r7]!
627	vld1.32	{q7},[r7]
628	add	r7,r3,#32
629	mov	r6,r5
630	movlo	r12,#0
631#ifndef __ARMEB__
632	rev	r8, r8
633#endif
634	vorr	q1,q0,q0
635	add	r10, r8, #1
636	vorr	q10,q0,q0
637	add	r8, r8, #2
638	vorr	q6,q0,q0
639	rev	r10, r10
640	vmov.32	d3[1],r10
641	bls	Lctr32_tail
642	rev	r12, r8
643	sub	r2,r2,#3		@ bias
644	vmov.32	d21[1],r12
645	b	Loop3x_ctr32
646
647.align	4
648Loop3x_ctr32:
649.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
650.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
651.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
652.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
653.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
654.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
655	vld1.32	{q8},[r7]!
656	subs	r6,r6,#2
657.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
658.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
659.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
660.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
661.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
662.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
663	vld1.32	{q9},[r7]!
664	bgt	Loop3x_ctr32
665
666.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
667.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
668.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
669.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
670	vld1.8	{q2},[r0]!
671	vorr	q0,q6,q6
672.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
673.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
674	vld1.8	{q3},[r0]!
675	vorr	q1,q6,q6
676.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
677.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
678.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
679.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
680	vld1.8	{q11},[r0]!
681	mov	r7,r3
682.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
683.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
684	vorr	q10,q6,q6
685	add	r9,r8,#1
686.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
687.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
688.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
689.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
690	veor	q2,q2,q7
691	add	r10,r8,#2
692.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
693.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
694	veor	q3,q3,q7
695	add	r8,r8,#3
696.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
697.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
698.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
699.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
700	veor	q11,q11,q7
701	rev	r9,r9
702.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
703.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
704	vmov.32	d1[1], r9
705	rev	r10,r10
706.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
707.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
708.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
709.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
710	vmov.32	d3[1], r10
711	rev	r12,r8
712.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
713.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
714	vmov.32	d21[1], r12
715	subs	r2,r2,#3
716.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
717.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
718.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
719
720	veor	q2,q2,q4
721	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
722	vst1.8	{q2},[r1]!
723	veor	q3,q3,q5
724	mov	r6,r5
725	vst1.8	{q3},[r1]!
726	veor	q11,q11,q9
727	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
728	vst1.8	{q11},[r1]!
729	bhs	Loop3x_ctr32
730
731	adds	r2,r2,#3
732	beq	Lctr32_done
733	cmp	r2,#1
734	mov	r12,#16
735	moveq	r12,#0
736
737Lctr32_tail:
738.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
739.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
740.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
741.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
742	vld1.32	{q8},[r7]!
743	subs	r6,r6,#2
744.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
745.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
746.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
747.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
748	vld1.32	{q9},[r7]!
749	bgt	Lctr32_tail
750
751.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
752.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
753.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
754.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
755.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
756.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
757.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
758.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
759	vld1.8	{q2},[r0],r12
760.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
761.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
762.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
763.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
764	vld1.8	{q3},[r0]
765.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
766.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
767.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
768.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
769	veor	q2,q2,q7
770.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
771.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
772.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
773.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
774	veor	q3,q3,q7
775.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
776.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
777
778	cmp	r2,#1
779	veor	q2,q2,q0
780	veor	q3,q3,q1
781	vst1.8	{q2},[r1]!
782	beq	Lctr32_done
783	vst1.8	{q3},[r1]
784
785Lctr32_done:
786	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
787	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
788
789#endif
790#endif  // !OPENSSL_NO_ASM
791