1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__aarch64__)
13#include <GFp/arm_arch.h>
14
15#if __ARM_MAX_ARCH__>=7
16.text
17.arch	armv8-a+crypto
18.section	.rodata
19.align	5
20.Lrcon:
21.long	0x01,0x01,0x01,0x01
22.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
23.long	0x1b,0x1b,0x1b,0x1b
24
25.text
26
27.globl	GFp_aes_hw_set_encrypt_key
28.hidden	GFp_aes_hw_set_encrypt_key
29.type	GFp_aes_hw_set_encrypt_key,%function
30.align	5
31GFp_aes_hw_set_encrypt_key:
32.Lenc_key:
33	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
34	AARCH64_VALID_CALL_TARGET
35	stp	x29,x30,[sp,#-16]!
36	add	x29,sp,#0
37	mov	x3,#-1
38	cmp	x0,#0
39	b.eq	.Lenc_key_abort
40	cmp	x2,#0
41	b.eq	.Lenc_key_abort
42	mov	x3,#-2
43	cmp	w1,#128
44	b.lt	.Lenc_key_abort
45	cmp	w1,#256
46	b.gt	.Lenc_key_abort
47	tst	w1,#0x3f
48	b.ne	.Lenc_key_abort
49
50	adrp	x3,.Lrcon
51	add	x3,x3,:lo12:.Lrcon
52	cmp	w1,#192
53
54	eor	v0.16b,v0.16b,v0.16b
55	ld1	{v3.16b},[x0],#16
56	mov	w1,#8		// reuse w1
57	ld1	{v1.4s,v2.4s},[x3],#32
58
59	b.lt	.Loop128
60	// 192-bit key support was removed.
61	b	.L256
62
63.align	4
64.Loop128:
65	tbl	v6.16b,{v3.16b},v2.16b
66	ext	v5.16b,v0.16b,v3.16b,#12
67	st1	{v3.4s},[x2],#16
68	aese	v6.16b,v0.16b
69	subs	w1,w1,#1
70
71	eor	v3.16b,v3.16b,v5.16b
72	ext	v5.16b,v0.16b,v5.16b,#12
73	eor	v3.16b,v3.16b,v5.16b
74	ext	v5.16b,v0.16b,v5.16b,#12
75	eor	v6.16b,v6.16b,v1.16b
76	eor	v3.16b,v3.16b,v5.16b
77	shl	v1.16b,v1.16b,#1
78	eor	v3.16b,v3.16b,v6.16b
79	b.ne	.Loop128
80
81	ld1	{v1.4s},[x3]
82
83	tbl	v6.16b,{v3.16b},v2.16b
84	ext	v5.16b,v0.16b,v3.16b,#12
85	st1	{v3.4s},[x2],#16
86	aese	v6.16b,v0.16b
87
88	eor	v3.16b,v3.16b,v5.16b
89	ext	v5.16b,v0.16b,v5.16b,#12
90	eor	v3.16b,v3.16b,v5.16b
91	ext	v5.16b,v0.16b,v5.16b,#12
92	eor	v6.16b,v6.16b,v1.16b
93	eor	v3.16b,v3.16b,v5.16b
94	shl	v1.16b,v1.16b,#1
95	eor	v3.16b,v3.16b,v6.16b
96
97	tbl	v6.16b,{v3.16b},v2.16b
98	ext	v5.16b,v0.16b,v3.16b,#12
99	st1	{v3.4s},[x2],#16
100	aese	v6.16b,v0.16b
101
102	eor	v3.16b,v3.16b,v5.16b
103	ext	v5.16b,v0.16b,v5.16b,#12
104	eor	v3.16b,v3.16b,v5.16b
105	ext	v5.16b,v0.16b,v5.16b,#12
106	eor	v6.16b,v6.16b,v1.16b
107	eor	v3.16b,v3.16b,v5.16b
108	eor	v3.16b,v3.16b,v6.16b
109	st1	{v3.4s},[x2]
110	add	x2,x2,#0x50
111
112	mov	w12,#10
113	b	.Ldone
114
115// 192-bit key support was removed.
116
117.align	4
118.L256:
119	ld1	{v4.16b},[x0]
120	mov	w1,#7
121	mov	w12,#14
122	st1	{v3.4s},[x2],#16
123
124.Loop256:
125	tbl	v6.16b,{v4.16b},v2.16b
126	ext	v5.16b,v0.16b,v3.16b,#12
127	st1	{v4.4s},[x2],#16
128	aese	v6.16b,v0.16b
129	subs	w1,w1,#1
130
131	eor	v3.16b,v3.16b,v5.16b
132	ext	v5.16b,v0.16b,v5.16b,#12
133	eor	v3.16b,v3.16b,v5.16b
134	ext	v5.16b,v0.16b,v5.16b,#12
135	eor	v6.16b,v6.16b,v1.16b
136	eor	v3.16b,v3.16b,v5.16b
137	shl	v1.16b,v1.16b,#1
138	eor	v3.16b,v3.16b,v6.16b
139	st1	{v3.4s},[x2],#16
140	b.eq	.Ldone
141
142	dup	v6.4s,v3.s[3]		// just splat
143	ext	v5.16b,v0.16b,v4.16b,#12
144	aese	v6.16b,v0.16b
145
146	eor	v4.16b,v4.16b,v5.16b
147	ext	v5.16b,v0.16b,v5.16b,#12
148	eor	v4.16b,v4.16b,v5.16b
149	ext	v5.16b,v0.16b,v5.16b,#12
150	eor	v4.16b,v4.16b,v5.16b
151
152	eor	v4.16b,v4.16b,v6.16b
153	b	.Loop256
154
155.Ldone:
156	str	w12,[x2]
157	mov	x3,#0
158
159.Lenc_key_abort:
160	mov	x0,x3			// return value
161	ldr	x29,[sp],#16
162	ret
163.size	GFp_aes_hw_set_encrypt_key,.-GFp_aes_hw_set_encrypt_key
164.globl	GFp_aes_hw_encrypt
165.hidden	GFp_aes_hw_encrypt
166.type	GFp_aes_hw_encrypt,%function
167.align	5
168GFp_aes_hw_encrypt:
169	AARCH64_VALID_CALL_TARGET
170	ldr	w3,[x2,#240]
171	ld1	{v0.4s},[x2],#16
172	ld1	{v2.16b},[x0]
173	sub	w3,w3,#2
174	ld1	{v1.4s},[x2],#16
175
176.Loop_enc:
177	aese	v2.16b,v0.16b
178	aesmc	v2.16b,v2.16b
179	ld1	{v0.4s},[x2],#16
180	subs	w3,w3,#2
181	aese	v2.16b,v1.16b
182	aesmc	v2.16b,v2.16b
183	ld1	{v1.4s},[x2],#16
184	b.gt	.Loop_enc
185
186	aese	v2.16b,v0.16b
187	aesmc	v2.16b,v2.16b
188	ld1	{v0.4s},[x2]
189	aese	v2.16b,v1.16b
190	eor	v2.16b,v2.16b,v0.16b
191
192	st1	{v2.16b},[x1]
193	ret
194.size	GFp_aes_hw_encrypt,.-GFp_aes_hw_encrypt
195.globl	GFp_aes_hw_decrypt
196.hidden	GFp_aes_hw_decrypt
197.type	GFp_aes_hw_decrypt,%function
198.align	5
199GFp_aes_hw_decrypt:
200	AARCH64_VALID_CALL_TARGET
201	ldr	w3,[x2,#240]
202	ld1	{v0.4s},[x2],#16
203	ld1	{v2.16b},[x0]
204	sub	w3,w3,#2
205	ld1	{v1.4s},[x2],#16
206
207.Loop_dec:
208	aesd	v2.16b,v0.16b
209	aesimc	v2.16b,v2.16b
210	ld1	{v0.4s},[x2],#16
211	subs	w3,w3,#2
212	aesd	v2.16b,v1.16b
213	aesimc	v2.16b,v2.16b
214	ld1	{v1.4s},[x2],#16
215	b.gt	.Loop_dec
216
217	aesd	v2.16b,v0.16b
218	aesimc	v2.16b,v2.16b
219	ld1	{v0.4s},[x2]
220	aesd	v2.16b,v1.16b
221	eor	v2.16b,v2.16b,v0.16b
222
223	st1	{v2.16b},[x1]
224	ret
225.size	GFp_aes_hw_decrypt,.-GFp_aes_hw_decrypt
226.globl	GFp_aes_hw_ctr32_encrypt_blocks
227.hidden	GFp_aes_hw_ctr32_encrypt_blocks
228.type	GFp_aes_hw_ctr32_encrypt_blocks,%function
229.align	5
230GFp_aes_hw_ctr32_encrypt_blocks:
231	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
232	AARCH64_VALID_CALL_TARGET
233	stp	x29,x30,[sp,#-16]!
234	add	x29,sp,#0
235	ldr	w5,[x3,#240]
236
237	ldr	w8, [x4, #12]
238	ld1	{v0.4s},[x4]
239
240	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
241	sub	w5,w5,#4
242	mov	x12,#16
243	cmp	x2,#2
244	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
245	sub	w5,w5,#2
246	ld1	{v20.4s,v21.4s},[x7],#32
247	ld1	{v22.4s,v23.4s},[x7],#32
248	ld1	{v7.4s},[x7]
249	add	x7,x3,#32
250	mov	w6,w5
251	csel	x12,xzr,x12,lo
252
253	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
254	// affected by silicon errata #1742098 [0] and #1655431 [1],
255	// respectively, where the second instruction of an aese/aesmc
256	// instruction pair may execute twice if an interrupt is taken right
257	// after the first instruction consumes an input register of which a
258	// single 32-bit lane has been updated the last time it was modified.
259	//
260	// This function uses a counter in one 32-bit lane. The vmov lines
261	// could write to v1.16b and v18.16b directly, but that trips this bugs.
262	// We write to v6.16b and copy to the final register as a workaround.
263	//
264	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
265	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
266#ifndef __ARMEB__
267	rev	w8, w8
268#endif
269	add	w10, w8, #1
270	orr	v6.16b,v0.16b,v0.16b
271	rev	w10, w10
272	mov	v6.s[3],w10
273	add	w8, w8, #2
274	orr	v1.16b,v6.16b,v6.16b
275	b.ls	.Lctr32_tail
276	rev	w12, w8
277	mov	v6.s[3],w12
278	sub	x2,x2,#3		// bias
279	orr	v18.16b,v6.16b,v6.16b
280	b	.Loop3x_ctr32
281
282.align	4
283.Loop3x_ctr32:
284	aese	v0.16b,v16.16b
285	aesmc	v0.16b,v0.16b
286	aese	v1.16b,v16.16b
287	aesmc	v1.16b,v1.16b
288	aese	v18.16b,v16.16b
289	aesmc	v18.16b,v18.16b
290	ld1	{v16.4s},[x7],#16
291	subs	w6,w6,#2
292	aese	v0.16b,v17.16b
293	aesmc	v0.16b,v0.16b
294	aese	v1.16b,v17.16b
295	aesmc	v1.16b,v1.16b
296	aese	v18.16b,v17.16b
297	aesmc	v18.16b,v18.16b
298	ld1	{v17.4s},[x7],#16
299	b.gt	.Loop3x_ctr32
300
301	aese	v0.16b,v16.16b
302	aesmc	v4.16b,v0.16b
303	aese	v1.16b,v16.16b
304	aesmc	v5.16b,v1.16b
305	ld1	{v2.16b},[x0],#16
306	add	w9,w8,#1
307	aese	v18.16b,v16.16b
308	aesmc	v18.16b,v18.16b
309	ld1	{v3.16b},[x0],#16
310	rev	w9,w9
311	aese	v4.16b,v17.16b
312	aesmc	v4.16b,v4.16b
313	aese	v5.16b,v17.16b
314	aesmc	v5.16b,v5.16b
315	ld1	{v19.16b},[x0],#16
316	mov	x7,x3
317	aese	v18.16b,v17.16b
318	aesmc	v17.16b,v18.16b
319	aese	v4.16b,v20.16b
320	aesmc	v4.16b,v4.16b
321	aese	v5.16b,v20.16b
322	aesmc	v5.16b,v5.16b
323	eor	v2.16b,v2.16b,v7.16b
324	add	w10,w8,#2
325	aese	v17.16b,v20.16b
326	aesmc	v17.16b,v17.16b
327	eor	v3.16b,v3.16b,v7.16b
328	add	w8,w8,#3
329	aese	v4.16b,v21.16b
330	aesmc	v4.16b,v4.16b
331	aese	v5.16b,v21.16b
332	aesmc	v5.16b,v5.16b
333	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
334	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
335	 // 32-bit mode. See the comment above.
336	eor	v19.16b,v19.16b,v7.16b
337	mov	v6.s[3], w9
338	aese	v17.16b,v21.16b
339	aesmc	v17.16b,v17.16b
340	orr	v0.16b,v6.16b,v6.16b
341	rev	w10,w10
342	aese	v4.16b,v22.16b
343	aesmc	v4.16b,v4.16b
344	mov	v6.s[3], w10
345	rev	w12,w8
346	aese	v5.16b,v22.16b
347	aesmc	v5.16b,v5.16b
348	orr	v1.16b,v6.16b,v6.16b
349	mov	v6.s[3], w12
350	aese	v17.16b,v22.16b
351	aesmc	v17.16b,v17.16b
352	orr	v18.16b,v6.16b,v6.16b
353	subs	x2,x2,#3
354	aese	v4.16b,v23.16b
355	aese	v5.16b,v23.16b
356	aese	v17.16b,v23.16b
357
358	eor	v2.16b,v2.16b,v4.16b
359	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
360	st1	{v2.16b},[x1],#16
361	eor	v3.16b,v3.16b,v5.16b
362	mov	w6,w5
363	st1	{v3.16b},[x1],#16
364	eor	v19.16b,v19.16b,v17.16b
365	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
366	st1	{v19.16b},[x1],#16
367	b.hs	.Loop3x_ctr32
368
369	adds	x2,x2,#3
370	b.eq	.Lctr32_done
371	cmp	x2,#1
372	mov	x12,#16
373	csel	x12,xzr,x12,eq
374
375.Lctr32_tail:
376	aese	v0.16b,v16.16b
377	aesmc	v0.16b,v0.16b
378	aese	v1.16b,v16.16b
379	aesmc	v1.16b,v1.16b
380	ld1	{v16.4s},[x7],#16
381	subs	w6,w6,#2
382	aese	v0.16b,v17.16b
383	aesmc	v0.16b,v0.16b
384	aese	v1.16b,v17.16b
385	aesmc	v1.16b,v1.16b
386	ld1	{v17.4s},[x7],#16
387	b.gt	.Lctr32_tail
388
389	aese	v0.16b,v16.16b
390	aesmc	v0.16b,v0.16b
391	aese	v1.16b,v16.16b
392	aesmc	v1.16b,v1.16b
393	aese	v0.16b,v17.16b
394	aesmc	v0.16b,v0.16b
395	aese	v1.16b,v17.16b
396	aesmc	v1.16b,v1.16b
397	ld1	{v2.16b},[x0],x12
398	aese	v0.16b,v20.16b
399	aesmc	v0.16b,v0.16b
400	aese	v1.16b,v20.16b
401	aesmc	v1.16b,v1.16b
402	ld1	{v3.16b},[x0]
403	aese	v0.16b,v21.16b
404	aesmc	v0.16b,v0.16b
405	aese	v1.16b,v21.16b
406	aesmc	v1.16b,v1.16b
407	eor	v2.16b,v2.16b,v7.16b
408	aese	v0.16b,v22.16b
409	aesmc	v0.16b,v0.16b
410	aese	v1.16b,v22.16b
411	aesmc	v1.16b,v1.16b
412	eor	v3.16b,v3.16b,v7.16b
413	aese	v0.16b,v23.16b
414	aese	v1.16b,v23.16b
415
416	cmp	x2,#1
417	eor	v2.16b,v2.16b,v0.16b
418	eor	v3.16b,v3.16b,v1.16b
419	st1	{v2.16b},[x1],#16
420	b.eq	.Lctr32_done
421	st1	{v3.16b},[x1]
422
423.Lctr32_done:
424	ldr	x29,[sp],#16
425	ret
426.size	GFp_aes_hw_ctr32_encrypt_blocks,.-GFp_aes_hw_ctr32_encrypt_blocks
427#endif
428#endif
429#endif  // !OPENSSL_NO_ASM
430.section	.note.GNU-stack,"",%progbits
431