1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if !defined(OPENSSL_NO_ASM)
11#if defined(__aarch64__)
12#include <GFp/arm_arch.h>
13
14#if __ARM_MAX_ARCH__>=7
15.text
16.arch	armv8-a+crypto
17.section	.rodata
18.align	5
19.Lrcon:
20.long	0x01,0x01,0x01,0x01
21.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
22.long	0x1b,0x1b,0x1b,0x1b
23
24.text
25
26.globl	GFp_aes_hw_set_encrypt_key
27.hidden	GFp_aes_hw_set_encrypt_key
28.type	GFp_aes_hw_set_encrypt_key,%function
29.align	5
30GFp_aes_hw_set_encrypt_key:
31.Lenc_key:
32	stp	x29,x30,[sp,#-16]!
33	add	x29,sp,#0
34	mov	x3,#-1
35	cmp	x0,#0
36	b.eq	.Lenc_key_abort
37	cmp	x2,#0
38	b.eq	.Lenc_key_abort
39	mov	x3,#-2
40	cmp	w1,#128
41	b.lt	.Lenc_key_abort
42	cmp	w1,#256
43	b.gt	.Lenc_key_abort
44	tst	w1,#0x3f
45	b.ne	.Lenc_key_abort
46
47	adrp	x3,.Lrcon
48	add	x3,x3,:lo12:.Lrcon
49	cmp	w1,#192
50
51	eor	v0.16b,v0.16b,v0.16b
52	ld1	{v3.16b},[x0],#16
53	mov	w1,#8		// reuse w1
54	ld1	{v1.4s,v2.4s},[x3],#32
55
56	b.lt	.Loop128
57	// 192-bit key support was removed.
58	b	.L256
59
60.align	4
61.Loop128:
62	tbl	v6.16b,{v3.16b},v2.16b
63	ext	v5.16b,v0.16b,v3.16b,#12
64	st1	{v3.4s},[x2],#16
65	aese	v6.16b,v0.16b
66	subs	w1,w1,#1
67
68	eor	v3.16b,v3.16b,v5.16b
69	ext	v5.16b,v0.16b,v5.16b,#12
70	eor	v3.16b,v3.16b,v5.16b
71	ext	v5.16b,v0.16b,v5.16b,#12
72	eor	v6.16b,v6.16b,v1.16b
73	eor	v3.16b,v3.16b,v5.16b
74	shl	v1.16b,v1.16b,#1
75	eor	v3.16b,v3.16b,v6.16b
76	b.ne	.Loop128
77
78	ld1	{v1.4s},[x3]
79
80	tbl	v6.16b,{v3.16b},v2.16b
81	ext	v5.16b,v0.16b,v3.16b,#12
82	st1	{v3.4s},[x2],#16
83	aese	v6.16b,v0.16b
84
85	eor	v3.16b,v3.16b,v5.16b
86	ext	v5.16b,v0.16b,v5.16b,#12
87	eor	v3.16b,v3.16b,v5.16b
88	ext	v5.16b,v0.16b,v5.16b,#12
89	eor	v6.16b,v6.16b,v1.16b
90	eor	v3.16b,v3.16b,v5.16b
91	shl	v1.16b,v1.16b,#1
92	eor	v3.16b,v3.16b,v6.16b
93
94	tbl	v6.16b,{v3.16b},v2.16b
95	ext	v5.16b,v0.16b,v3.16b,#12
96	st1	{v3.4s},[x2],#16
97	aese	v6.16b,v0.16b
98
99	eor	v3.16b,v3.16b,v5.16b
100	ext	v5.16b,v0.16b,v5.16b,#12
101	eor	v3.16b,v3.16b,v5.16b
102	ext	v5.16b,v0.16b,v5.16b,#12
103	eor	v6.16b,v6.16b,v1.16b
104	eor	v3.16b,v3.16b,v5.16b
105	eor	v3.16b,v3.16b,v6.16b
106	st1	{v3.4s},[x2]
107	add	x2,x2,#0x50
108
109	mov	w12,#10
110	b	.Ldone
111
112// 192-bit key support was removed.
113
114.align	4
115.L256:
116	ld1	{v4.16b},[x0]
117	mov	w1,#7
118	mov	w12,#14
119	st1	{v3.4s},[x2],#16
120
121.Loop256:
122	tbl	v6.16b,{v4.16b},v2.16b
123	ext	v5.16b,v0.16b,v3.16b,#12
124	st1	{v4.4s},[x2],#16
125	aese	v6.16b,v0.16b
126	subs	w1,w1,#1
127
128	eor	v3.16b,v3.16b,v5.16b
129	ext	v5.16b,v0.16b,v5.16b,#12
130	eor	v3.16b,v3.16b,v5.16b
131	ext	v5.16b,v0.16b,v5.16b,#12
132	eor	v6.16b,v6.16b,v1.16b
133	eor	v3.16b,v3.16b,v5.16b
134	shl	v1.16b,v1.16b,#1
135	eor	v3.16b,v3.16b,v6.16b
136	st1	{v3.4s},[x2],#16
137	b.eq	.Ldone
138
139	dup	v6.4s,v3.s[3]		// just splat
140	ext	v5.16b,v0.16b,v4.16b,#12
141	aese	v6.16b,v0.16b
142
143	eor	v4.16b,v4.16b,v5.16b
144	ext	v5.16b,v0.16b,v5.16b,#12
145	eor	v4.16b,v4.16b,v5.16b
146	ext	v5.16b,v0.16b,v5.16b,#12
147	eor	v4.16b,v4.16b,v5.16b
148
149	eor	v4.16b,v4.16b,v6.16b
150	b	.Loop256
151
152.Ldone:
153	str	w12,[x2]
154	mov	x3,#0
155
156.Lenc_key_abort:
157	mov	x0,x3			// return value
158	ldr	x29,[sp],#16
159	ret
160.size	GFp_aes_hw_set_encrypt_key,.-GFp_aes_hw_set_encrypt_key
161.globl	GFp_aes_hw_encrypt
162.hidden	GFp_aes_hw_encrypt
163.type	GFp_aes_hw_encrypt,%function
164.align	5
165GFp_aes_hw_encrypt:
166	ldr	w3,[x2,#240]
167	ld1	{v0.4s},[x2],#16
168	ld1	{v2.16b},[x0]
169	sub	w3,w3,#2
170	ld1	{v1.4s},[x2],#16
171
172.Loop_enc:
173	aese	v2.16b,v0.16b
174	aesmc	v2.16b,v2.16b
175	ld1	{v0.4s},[x2],#16
176	subs	w3,w3,#2
177	aese	v2.16b,v1.16b
178	aesmc	v2.16b,v2.16b
179	ld1	{v1.4s},[x2],#16
180	b.gt	.Loop_enc
181
182	aese	v2.16b,v0.16b
183	aesmc	v2.16b,v2.16b
184	ld1	{v0.4s},[x2]
185	aese	v2.16b,v1.16b
186	eor	v2.16b,v2.16b,v0.16b
187
188	st1	{v2.16b},[x1]
189	ret
190.size	GFp_aes_hw_encrypt,.-GFp_aes_hw_encrypt
191.globl	GFp_aes_hw_decrypt
192.hidden	GFp_aes_hw_decrypt
193.type	GFp_aes_hw_decrypt,%function
194.align	5
195GFp_aes_hw_decrypt:
196	ldr	w3,[x2,#240]
197	ld1	{v0.4s},[x2],#16
198	ld1	{v2.16b},[x0]
199	sub	w3,w3,#2
200	ld1	{v1.4s},[x2],#16
201
202.Loop_dec:
203	aesd	v2.16b,v0.16b
204	aesimc	v2.16b,v2.16b
205	ld1	{v0.4s},[x2],#16
206	subs	w3,w3,#2
207	aesd	v2.16b,v1.16b
208	aesimc	v2.16b,v2.16b
209	ld1	{v1.4s},[x2],#16
210	b.gt	.Loop_dec
211
212	aesd	v2.16b,v0.16b
213	aesimc	v2.16b,v2.16b
214	ld1	{v0.4s},[x2]
215	aesd	v2.16b,v1.16b
216	eor	v2.16b,v2.16b,v0.16b
217
218	st1	{v2.16b},[x1]
219	ret
220.size	GFp_aes_hw_decrypt,.-GFp_aes_hw_decrypt
221.globl	GFp_aes_hw_ctr32_encrypt_blocks
222.hidden	GFp_aes_hw_ctr32_encrypt_blocks
223.type	GFp_aes_hw_ctr32_encrypt_blocks,%function
224.align	5
225GFp_aes_hw_ctr32_encrypt_blocks:
226	stp	x29,x30,[sp,#-16]!
227	add	x29,sp,#0
228	ldr	w5,[x3,#240]
229
230	ldr	w8, [x4, #12]
231	ld1	{v0.4s},[x4]
232
233	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
234	sub	w5,w5,#4
235	mov	x12,#16
236	cmp	x2,#2
237	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
238	sub	w5,w5,#2
239	ld1	{v20.4s,v21.4s},[x7],#32
240	ld1	{v22.4s,v23.4s},[x7],#32
241	ld1	{v7.4s},[x7]
242	add	x7,x3,#32
243	mov	w6,w5
244	csel	x12,xzr,x12,lo
245#ifndef __ARMEB__
246	rev	w8, w8
247#endif
248	orr	v1.16b,v0.16b,v0.16b
249	add	w10, w8, #1
250	orr	v18.16b,v0.16b,v0.16b
251	add	w8, w8, #2
252	orr	v6.16b,v0.16b,v0.16b
253	rev	w10, w10
254	mov	v1.s[3],w10
255	b.ls	.Lctr32_tail
256	rev	w12, w8
257	sub	x2,x2,#3		// bias
258	mov	v18.s[3],w12
259	b	.Loop3x_ctr32
260
261.align	4
262.Loop3x_ctr32:
263	aese	v0.16b,v16.16b
264	aesmc	v0.16b,v0.16b
265	aese	v1.16b,v16.16b
266	aesmc	v1.16b,v1.16b
267	aese	v18.16b,v16.16b
268	aesmc	v18.16b,v18.16b
269	ld1	{v16.4s},[x7],#16
270	subs	w6,w6,#2
271	aese	v0.16b,v17.16b
272	aesmc	v0.16b,v0.16b
273	aese	v1.16b,v17.16b
274	aesmc	v1.16b,v1.16b
275	aese	v18.16b,v17.16b
276	aesmc	v18.16b,v18.16b
277	ld1	{v17.4s},[x7],#16
278	b.gt	.Loop3x_ctr32
279
280	aese	v0.16b,v16.16b
281	aesmc	v4.16b,v0.16b
282	aese	v1.16b,v16.16b
283	aesmc	v5.16b,v1.16b
284	ld1	{v2.16b},[x0],#16
285	orr	v0.16b,v6.16b,v6.16b
286	aese	v18.16b,v16.16b
287	aesmc	v18.16b,v18.16b
288	ld1	{v3.16b},[x0],#16
289	orr	v1.16b,v6.16b,v6.16b
290	aese	v4.16b,v17.16b
291	aesmc	v4.16b,v4.16b
292	aese	v5.16b,v17.16b
293	aesmc	v5.16b,v5.16b
294	ld1	{v19.16b},[x0],#16
295	mov	x7,x3
296	aese	v18.16b,v17.16b
297	aesmc	v17.16b,v18.16b
298	orr	v18.16b,v6.16b,v6.16b
299	add	w9,w8,#1
300	aese	v4.16b,v20.16b
301	aesmc	v4.16b,v4.16b
302	aese	v5.16b,v20.16b
303	aesmc	v5.16b,v5.16b
304	eor	v2.16b,v2.16b,v7.16b
305	add	w10,w8,#2
306	aese	v17.16b,v20.16b
307	aesmc	v17.16b,v17.16b
308	eor	v3.16b,v3.16b,v7.16b
309	add	w8,w8,#3
310	aese	v4.16b,v21.16b
311	aesmc	v4.16b,v4.16b
312	aese	v5.16b,v21.16b
313	aesmc	v5.16b,v5.16b
314	eor	v19.16b,v19.16b,v7.16b
315	rev	w9,w9
316	aese	v17.16b,v21.16b
317	aesmc	v17.16b,v17.16b
318	mov	v0.s[3], w9
319	rev	w10,w10
320	aese	v4.16b,v22.16b
321	aesmc	v4.16b,v4.16b
322	aese	v5.16b,v22.16b
323	aesmc	v5.16b,v5.16b
324	mov	v1.s[3], w10
325	rev	w12,w8
326	aese	v17.16b,v22.16b
327	aesmc	v17.16b,v17.16b
328	mov	v18.s[3], w12
329	subs	x2,x2,#3
330	aese	v4.16b,v23.16b
331	aese	v5.16b,v23.16b
332	aese	v17.16b,v23.16b
333
334	eor	v2.16b,v2.16b,v4.16b
335	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
336	st1	{v2.16b},[x1],#16
337	eor	v3.16b,v3.16b,v5.16b
338	mov	w6,w5
339	st1	{v3.16b},[x1],#16
340	eor	v19.16b,v19.16b,v17.16b
341	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
342	st1	{v19.16b},[x1],#16
343	b.hs	.Loop3x_ctr32
344
345	adds	x2,x2,#3
346	b.eq	.Lctr32_done
347	cmp	x2,#1
348	mov	x12,#16
349	csel	x12,xzr,x12,eq
350
351.Lctr32_tail:
352	aese	v0.16b,v16.16b
353	aesmc	v0.16b,v0.16b
354	aese	v1.16b,v16.16b
355	aesmc	v1.16b,v1.16b
356	ld1	{v16.4s},[x7],#16
357	subs	w6,w6,#2
358	aese	v0.16b,v17.16b
359	aesmc	v0.16b,v0.16b
360	aese	v1.16b,v17.16b
361	aesmc	v1.16b,v1.16b
362	ld1	{v17.4s},[x7],#16
363	b.gt	.Lctr32_tail
364
365	aese	v0.16b,v16.16b
366	aesmc	v0.16b,v0.16b
367	aese	v1.16b,v16.16b
368	aesmc	v1.16b,v1.16b
369	aese	v0.16b,v17.16b
370	aesmc	v0.16b,v0.16b
371	aese	v1.16b,v17.16b
372	aesmc	v1.16b,v1.16b
373	ld1	{v2.16b},[x0],x12
374	aese	v0.16b,v20.16b
375	aesmc	v0.16b,v0.16b
376	aese	v1.16b,v20.16b
377	aesmc	v1.16b,v1.16b
378	ld1	{v3.16b},[x0]
379	aese	v0.16b,v21.16b
380	aesmc	v0.16b,v0.16b
381	aese	v1.16b,v21.16b
382	aesmc	v1.16b,v1.16b
383	eor	v2.16b,v2.16b,v7.16b
384	aese	v0.16b,v22.16b
385	aesmc	v0.16b,v0.16b
386	aese	v1.16b,v22.16b
387	aesmc	v1.16b,v1.16b
388	eor	v3.16b,v3.16b,v7.16b
389	aese	v0.16b,v23.16b
390	aese	v1.16b,v23.16b
391
392	cmp	x2,#1
393	eor	v2.16b,v2.16b,v0.16b
394	eor	v3.16b,v3.16b,v1.16b
395	st1	{v2.16b},[x1],#16
396	b.eq	.Lctr32_done
397	st1	{v3.16b},[x1]
398
399.Lctr32_done:
400	ldr	x29,[sp],#16
401	ret
402.size	GFp_aes_hw_ctr32_encrypt_blocks,.-GFp_aes_hw_ctr32_encrypt_blocks
403#endif
404#endif
405#endif  // !OPENSSL_NO_ASM
406