1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#include <GFp/arm_arch.h>
13
14#if __ARM_MAX_ARCH__>=7
15.text
16
17
18.code	32
19#undef	__thumb2__
20.align	5
21Lrcon:
22.long	0x01,0x01,0x01,0x01
23.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
24.long	0x1b,0x1b,0x1b,0x1b
25
26.text
27
28.globl	_GFp_aes_hw_set_encrypt_key
29.private_extern	_GFp_aes_hw_set_encrypt_key
30#ifdef __thumb2__
31.thumb_func	_GFp_aes_hw_set_encrypt_key
32#endif
33.align	5
34_GFp_aes_hw_set_encrypt_key:
35Lenc_key:
36	mov	r3,#-1
37	cmp	r0,#0
38	beq	Lenc_key_abort
39	cmp	r2,#0
40	beq	Lenc_key_abort
41	mov	r3,#-2
42	cmp	r1,#128
43	blt	Lenc_key_abort
44	cmp	r1,#256
45	bgt	Lenc_key_abort
46	tst	r1,#0x3f
47	bne	Lenc_key_abort
48
49	adr	r3,Lrcon
50	cmp	r1,#192
51
52	veor	q0,q0,q0
53	vld1.8	{q3},[r0]!
54	mov	r1,#8		@ reuse r1
55	vld1.32	{q1,q2},[r3]!
56
57	blt	Loop128
58	@ 192-bit key support was removed.
59	b	L256
60
61.align	4
62Loop128:
63	vtbl.8	d20,{q3},d4
64	vtbl.8	d21,{q3},d5
65	vext.8	q9,q0,q3,#12
66	vst1.32	{q3},[r2]!
67.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
68	subs	r1,r1,#1
69
70	veor	q3,q3,q9
71	vext.8	q9,q0,q9,#12
72	veor	q3,q3,q9
73	vext.8	q9,q0,q9,#12
74	veor	q10,q10,q1
75	veor	q3,q3,q9
76	vshl.u8	q1,q1,#1
77	veor	q3,q3,q10
78	bne	Loop128
79
80	vld1.32	{q1},[r3]
81
82	vtbl.8	d20,{q3},d4
83	vtbl.8	d21,{q3},d5
84	vext.8	q9,q0,q3,#12
85	vst1.32	{q3},[r2]!
86.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
87
88	veor	q3,q3,q9
89	vext.8	q9,q0,q9,#12
90	veor	q3,q3,q9
91	vext.8	q9,q0,q9,#12
92	veor	q10,q10,q1
93	veor	q3,q3,q9
94	vshl.u8	q1,q1,#1
95	veor	q3,q3,q10
96
97	vtbl.8	d20,{q3},d4
98	vtbl.8	d21,{q3},d5
99	vext.8	q9,q0,q3,#12
100	vst1.32	{q3},[r2]!
101.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
102
103	veor	q3,q3,q9
104	vext.8	q9,q0,q9,#12
105	veor	q3,q3,q9
106	vext.8	q9,q0,q9,#12
107	veor	q10,q10,q1
108	veor	q3,q3,q9
109	veor	q3,q3,q10
110	vst1.32	{q3},[r2]
111	add	r2,r2,#0x50
112
113	mov	r12,#10
114	b	Ldone
115
116@ 192-bit key support was removed.
117
118.align	4
119L256:
120	vld1.8	{q8},[r0]
121	mov	r1,#7
122	mov	r12,#14
123	vst1.32	{q3},[r2]!
124
125Loop256:
126	vtbl.8	d20,{q8},d4
127	vtbl.8	d21,{q8},d5
128	vext.8	q9,q0,q3,#12
129	vst1.32	{q8},[r2]!
130.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
131	subs	r1,r1,#1
132
133	veor	q3,q3,q9
134	vext.8	q9,q0,q9,#12
135	veor	q3,q3,q9
136	vext.8	q9,q0,q9,#12
137	veor	q10,q10,q1
138	veor	q3,q3,q9
139	vshl.u8	q1,q1,#1
140	veor	q3,q3,q10
141	vst1.32	{q3},[r2]!
142	beq	Ldone
143
144	vdup.32	q10,d7[1]
145	vext.8	q9,q0,q8,#12
146.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
147
148	veor	q8,q8,q9
149	vext.8	q9,q0,q9,#12
150	veor	q8,q8,q9
151	vext.8	q9,q0,q9,#12
152	veor	q8,q8,q9
153
154	veor	q8,q8,q10
155	b	Loop256
156
157Ldone:
158	str	r12,[r2]
159	mov	r3,#0
160
161Lenc_key_abort:
162	mov	r0,r3			@ return value
163
164	bx	lr
165
166.globl	_GFp_aes_hw_encrypt
167.private_extern	_GFp_aes_hw_encrypt
168#ifdef __thumb2__
169.thumb_func	_GFp_aes_hw_encrypt
170#endif
171.align	5
172_GFp_aes_hw_encrypt:
173	AARCH64_VALID_CALL_TARGET
174	ldr	r3,[r2,#240]
175	vld1.32	{q0},[r2]!
176	vld1.8	{q2},[r0]
177	sub	r3,r3,#2
178	vld1.32	{q1},[r2]!
179
180Loop_enc:
181.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
182.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
183	vld1.32	{q0},[r2]!
184	subs	r3,r3,#2
185.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
186.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
187	vld1.32	{q1},[r2]!
188	bgt	Loop_enc
189
190.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
191.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
192	vld1.32	{q0},[r2]
193.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
194	veor	q2,q2,q0
195
196	vst1.8	{q2},[r1]
197	bx	lr
198
199.globl	_GFp_aes_hw_decrypt
200.private_extern	_GFp_aes_hw_decrypt
201#ifdef __thumb2__
202.thumb_func	_GFp_aes_hw_decrypt
203#endif
204.align	5
205_GFp_aes_hw_decrypt:
206	AARCH64_VALID_CALL_TARGET
207	ldr	r3,[r2,#240]
208	vld1.32	{q0},[r2]!
209	vld1.8	{q2},[r0]
210	sub	r3,r3,#2
211	vld1.32	{q1},[r2]!
212
213Loop_dec:
214.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
215.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
216	vld1.32	{q0},[r2]!
217	subs	r3,r3,#2
218.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
219.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
220	vld1.32	{q1},[r2]!
221	bgt	Loop_dec
222
223.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
224.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
225	vld1.32	{q0},[r2]
226.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
227	veor	q2,q2,q0
228
229	vst1.8	{q2},[r1]
230	bx	lr
231
232.globl	_GFp_aes_hw_ctr32_encrypt_blocks
233.private_extern	_GFp_aes_hw_ctr32_encrypt_blocks
234#ifdef __thumb2__
235.thumb_func	_GFp_aes_hw_ctr32_encrypt_blocks
236#endif
237.align	5
238_GFp_aes_hw_ctr32_encrypt_blocks:
239	mov	ip,sp
240	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
241	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
242	ldr	r4, [ip]		@ load remaining arg
243	ldr	r5,[r3,#240]
244
245	ldr	r8, [r4, #12]
246	vld1.32	{q0},[r4]
247
248	vld1.32	{q8,q9},[r3]		@ load key schedule...
249	sub	r5,r5,#4
250	mov	r12,#16
251	cmp	r2,#2
252	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
253	sub	r5,r5,#2
254	vld1.32	{q12,q13},[r7]!
255	vld1.32	{q14,q15},[r7]!
256	vld1.32	{q7},[r7]
257	add	r7,r3,#32
258	mov	r6,r5
259	movlo	r12,#0
260
261	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
262	@ affected by silicon errata #1742098 [0] and #1655431 [1],
263	@ respectively, where the second instruction of an aese/aesmc
264	@ instruction pair may execute twice if an interrupt is taken right
265	@ after the first instruction consumes an input register of which a
266	@ single 32-bit lane has been updated the last time it was modified.
267	@
268	@ This function uses a counter in one 32-bit lane. The
269	@ could write to q1 and q10 directly, but that trips this bugs.
270	@ We write to q6 and copy to the final register as a workaround.
271	@
272	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
273	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
274#ifndef __ARMEB__
275	rev	r8, r8
276#endif
277	add	r10, r8, #1
278	vorr	q6,q0,q0
279	rev	r10, r10
280	vmov.32	d13[1],r10
281	add	r8, r8, #2
282	vorr	q1,q6,q6
283	bls	Lctr32_tail
284	rev	r12, r8
285	vmov.32	d13[1],r12
286	sub	r2,r2,#3		@ bias
287	vorr	q10,q6,q6
288	b	Loop3x_ctr32
289
290.align	4
291Loop3x_ctr32:
292.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
293.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
294.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
295.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
296.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
297.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
298	vld1.32	{q8},[r7]!
299	subs	r6,r6,#2
300.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
301.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
302.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
303.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
304.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
305.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
306	vld1.32	{q9},[r7]!
307	bgt	Loop3x_ctr32
308
309.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
310.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
311.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
312.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
313	vld1.8	{q2},[r0]!
314	add	r9,r8,#1
315.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
316.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
317	vld1.8	{q3},[r0]!
318	rev	r9,r9
319.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
320.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
321.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
322.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
323	vld1.8	{q11},[r0]!
324	mov	r7,r3
325.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
326.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
327.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
328.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
329.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
330.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
331	veor	q2,q2,q7
332	add	r10,r8,#2
333.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
334.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
335	veor	q3,q3,q7
336	add	r8,r8,#3
337.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
338.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
339.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
340.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
341	 @ Note the logic to update q0, q1, and q1 is written to work
342	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
343	 @ 32-bit mode. See the comment above.
344	veor	q11,q11,q7
345	vmov.32	d13[1], r9
346.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
347.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
348	vorr	q0,q6,q6
349	rev	r10,r10
350.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
351.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
352	vmov.32	d13[1], r10
353	rev	r12,r8
354.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
355.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
356	vorr	q1,q6,q6
357	vmov.32	d13[1], r12
358.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
359.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
360	vorr	q10,q6,q6
361	subs	r2,r2,#3
362.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
363.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
364.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
365
366	veor	q2,q2,q4
367	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
368	vst1.8	{q2},[r1]!
369	veor	q3,q3,q5
370	mov	r6,r5
371	vst1.8	{q3},[r1]!
372	veor	q11,q11,q9
373	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
374	vst1.8	{q11},[r1]!
375	bhs	Loop3x_ctr32
376
377	adds	r2,r2,#3
378	beq	Lctr32_done
379	cmp	r2,#1
380	mov	r12,#16
381	moveq	r12,#0
382
383Lctr32_tail:
384.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
385.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
386.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
387.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
388	vld1.32	{q8},[r7]!
389	subs	r6,r6,#2
390.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
391.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
392.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
393.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
394	vld1.32	{q9},[r7]!
395	bgt	Lctr32_tail
396
397.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
398.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
399.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
400.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
401.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
402.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
403.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
404.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
405	vld1.8	{q2},[r0],r12
406.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
407.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
408.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
409.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
410	vld1.8	{q3},[r0]
411.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
412.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
413.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
414.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
415	veor	q2,q2,q7
416.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
417.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
418.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
419.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
420	veor	q3,q3,q7
421.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
422.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
423
424	cmp	r2,#1
425	veor	q2,q2,q0
426	veor	q3,q3,q1
427	vst1.8	{q2},[r1]!
428	beq	Lctr32_done
429	vst1.8	{q3},[r1]
430
431Lctr32_done:
432	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
433	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
434
435#endif
436#endif  // !OPENSSL_NO_ASM
437