1/* Do not modify. This file is auto-generated from aesv8-armx.pl. */
2#include "arm_arch.h"
3
4#if __ARM_MAX_ARCH__>=7
5.text
6.align	5
7.Lrcon:
8.long	0x01,0x01,0x01,0x01
9.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
10.long	0x1b,0x1b,0x1b,0x1b
11
12.globl	aes_v8_set_encrypt_key
13.type	aes_v8_set_encrypt_key,%function
14.align	5
15aes_v8_set_encrypt_key:
16.Lenc_key:
17	stp	x29,x30,[sp,#-16]!
18	add	x29,sp,#0
19	mov	x3,#-1
20	cmp	x0,#0
21	b.eq	.Lenc_key_abort
22	cmp	x2,#0
23	b.eq	.Lenc_key_abort
24	mov	x3,#-2
25	cmp	w1,#128
26	b.lt	.Lenc_key_abort
27	cmp	w1,#256
28	b.gt	.Lenc_key_abort
29	tst	w1,#0x3f
30	b.ne	.Lenc_key_abort
31
32	adr	x3,.Lrcon
33	cmp	w1,#192
34
35	eor	v0.16b,v0.16b,v0.16b
36	ld1	{v3.16b},[x0],#16
37	mov	w1,#8		// reuse w1
38	ld1	{v1.4s,v2.4s},[x3],#32
39
40	b.lt	.Loop128
41	b.eq	.L192
42	b	.L256
43
44.align	4
45.Loop128:
46	tbl	v6.16b,{v3.16b},v2.16b
47	ext	v5.16b,v0.16b,v3.16b,#12
48	st1	{v3.4s},[x2],#16
49	aese	v6.16b,v0.16b
50	subs	w1,w1,#1
51
52	eor	v3.16b,v3.16b,v5.16b
53	ext	v5.16b,v0.16b,v5.16b,#12
54	eor	v3.16b,v3.16b,v5.16b
55	ext	v5.16b,v0.16b,v5.16b,#12
56	eor	v6.16b,v6.16b,v1.16b
57	eor	v3.16b,v3.16b,v5.16b
58	shl	v1.16b,v1.16b,#1
59	eor	v3.16b,v3.16b,v6.16b
60	b.ne	.Loop128
61
62	ld1	{v1.4s},[x3]
63
64	tbl	v6.16b,{v3.16b},v2.16b
65	ext	v5.16b,v0.16b,v3.16b,#12
66	st1	{v3.4s},[x2],#16
67	aese	v6.16b,v0.16b
68
69	eor	v3.16b,v3.16b,v5.16b
70	ext	v5.16b,v0.16b,v5.16b,#12
71	eor	v3.16b,v3.16b,v5.16b
72	ext	v5.16b,v0.16b,v5.16b,#12
73	eor	v6.16b,v6.16b,v1.16b
74	eor	v3.16b,v3.16b,v5.16b
75	shl	v1.16b,v1.16b,#1
76	eor	v3.16b,v3.16b,v6.16b
77
78	tbl	v6.16b,{v3.16b},v2.16b
79	ext	v5.16b,v0.16b,v3.16b,#12
80	st1	{v3.4s},[x2],#16
81	aese	v6.16b,v0.16b
82
83	eor	v3.16b,v3.16b,v5.16b
84	ext	v5.16b,v0.16b,v5.16b,#12
85	eor	v3.16b,v3.16b,v5.16b
86	ext	v5.16b,v0.16b,v5.16b,#12
87	eor	v6.16b,v6.16b,v1.16b
88	eor	v3.16b,v3.16b,v5.16b
89	eor	v3.16b,v3.16b,v6.16b
90	st1	{v3.4s},[x2]
91	add	x2,x2,#0x50
92
93	mov	w12,#10
94	b	.Ldone
95
96.align	4
97.L192:
98	ld1	{v4.8b},[x0],#8
99	movi	v6.16b,#8			// borrow v6.16b
100	st1	{v3.4s},[x2],#16
101	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
102
103.Loop192:
104	tbl	v6.16b,{v4.16b},v2.16b
105	ext	v5.16b,v0.16b,v3.16b,#12
106#ifdef __ARMEB__
107	st1	{v4.4s},[x2],#16
108	sub	x2,x2,#8
109#else
110	st1	{v4.8b},[x2],#8
111#endif
112	aese	v6.16b,v0.16b
113	subs	w1,w1,#1
114
115	eor	v3.16b,v3.16b,v5.16b
116	ext	v5.16b,v0.16b,v5.16b,#12
117	eor	v3.16b,v3.16b,v5.16b
118	ext	v5.16b,v0.16b,v5.16b,#12
119	eor	v3.16b,v3.16b,v5.16b
120
121	dup	v5.4s,v3.s[3]
122	eor	v5.16b,v5.16b,v4.16b
123	eor	v6.16b,v6.16b,v1.16b
124	ext	v4.16b,v0.16b,v4.16b,#12
125	shl	v1.16b,v1.16b,#1
126	eor	v4.16b,v4.16b,v5.16b
127	eor	v3.16b,v3.16b,v6.16b
128	eor	v4.16b,v4.16b,v6.16b
129	st1	{v3.4s},[x2],#16
130	b.ne	.Loop192
131
132	mov	w12,#12
133	add	x2,x2,#0x20
134	b	.Ldone
135
136.align	4
137.L256:
138	ld1	{v4.16b},[x0]
139	mov	w1,#7
140	mov	w12,#14
141	st1	{v3.4s},[x2],#16
142
143.Loop256:
144	tbl	v6.16b,{v4.16b},v2.16b
145	ext	v5.16b,v0.16b,v3.16b,#12
146	st1	{v4.4s},[x2],#16
147	aese	v6.16b,v0.16b
148	subs	w1,w1,#1
149
150	eor	v3.16b,v3.16b,v5.16b
151	ext	v5.16b,v0.16b,v5.16b,#12
152	eor	v3.16b,v3.16b,v5.16b
153	ext	v5.16b,v0.16b,v5.16b,#12
154	eor	v6.16b,v6.16b,v1.16b
155	eor	v3.16b,v3.16b,v5.16b
156	shl	v1.16b,v1.16b,#1
157	eor	v3.16b,v3.16b,v6.16b
158	st1	{v3.4s},[x2],#16
159	b.eq	.Ldone
160
161	dup	v6.4s,v3.s[3]		// just splat
162	ext	v5.16b,v0.16b,v4.16b,#12
163	aese	v6.16b,v0.16b
164
165	eor	v4.16b,v4.16b,v5.16b
166	ext	v5.16b,v0.16b,v5.16b,#12
167	eor	v4.16b,v4.16b,v5.16b
168	ext	v5.16b,v0.16b,v5.16b,#12
169	eor	v4.16b,v4.16b,v5.16b
170
171	eor	v4.16b,v4.16b,v6.16b
172	b	.Loop256
173
174.Ldone:
175	str	w12,[x2]
176	mov	x3,#0
177
178.Lenc_key_abort:
179	mov	x0,x3			// return value
180	ldr	x29,[sp],#16
181	ret
182.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
183
184.globl	aes_v8_set_decrypt_key
185.type	aes_v8_set_decrypt_key,%function
186.align	5
187aes_v8_set_decrypt_key:
188.inst	0xd503233f		// paciasp
189	stp	x29,x30,[sp,#-16]!
190	add	x29,sp,#0
191	bl	.Lenc_key
192
193	cmp	x0,#0
194	b.ne	.Ldec_key_abort
195
196	sub	x2,x2,#240		// restore original x2
197	mov	x4,#-16
198	add	x0,x2,x12,lsl#4	// end of key schedule
199
200	ld1	{v0.4s},[x2]
201	ld1	{v1.4s},[x0]
202	st1	{v0.4s},[x0],x4
203	st1	{v1.4s},[x2],#16
204
205.Loop_imc:
206	ld1	{v0.4s},[x2]
207	ld1	{v1.4s},[x0]
208	aesimc	v0.16b,v0.16b
209	aesimc	v1.16b,v1.16b
210	st1	{v0.4s},[x0],x4
211	st1	{v1.4s},[x2],#16
212	cmp	x0,x2
213	b.hi	.Loop_imc
214
215	ld1	{v0.4s},[x2]
216	aesimc	v0.16b,v0.16b
217	st1	{v0.4s},[x0]
218
219	eor	x0,x0,x0		// return value
220.Ldec_key_abort:
221	ldp	x29,x30,[sp],#16
222.inst	0xd50323bf		// autiasp
223	ret
224.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
225.globl	aes_v8_encrypt
226.type	aes_v8_encrypt,%function
227.align	5
228aes_v8_encrypt:
229	ldr	w3,[x2,#240]
230	ld1	{v0.4s},[x2],#16
231	ld1	{v2.16b},[x0]
232	sub	w3,w3,#2
233	ld1	{v1.4s},[x2],#16
234
235.Loop_enc:
236	aese	v2.16b,v0.16b
237	aesmc	v2.16b,v2.16b
238	ld1	{v0.4s},[x2],#16
239	subs	w3,w3,#2
240	aese	v2.16b,v1.16b
241	aesmc	v2.16b,v2.16b
242	ld1	{v1.4s},[x2],#16
243	b.gt	.Loop_enc
244
245	aese	v2.16b,v0.16b
246	aesmc	v2.16b,v2.16b
247	ld1	{v0.4s},[x2]
248	aese	v2.16b,v1.16b
249	eor	v2.16b,v2.16b,v0.16b
250
251	st1	{v2.16b},[x1]
252	ret
253.size	aes_v8_encrypt,.-aes_v8_encrypt
254.globl	aes_v8_decrypt
255.type	aes_v8_decrypt,%function
256.align	5
257aes_v8_decrypt:
258	ldr	w3,[x2,#240]
259	ld1	{v0.4s},[x2],#16
260	ld1	{v2.16b},[x0]
261	sub	w3,w3,#2
262	ld1	{v1.4s},[x2],#16
263
264.Loop_dec:
265	aesd	v2.16b,v0.16b
266	aesimc	v2.16b,v2.16b
267	ld1	{v0.4s},[x2],#16
268	subs	w3,w3,#2
269	aesd	v2.16b,v1.16b
270	aesimc	v2.16b,v2.16b
271	ld1	{v1.4s},[x2],#16
272	b.gt	.Loop_dec
273
274	aesd	v2.16b,v0.16b
275	aesimc	v2.16b,v2.16b
276	ld1	{v0.4s},[x2]
277	aesd	v2.16b,v1.16b
278	eor	v2.16b,v2.16b,v0.16b
279
280	st1	{v2.16b},[x1]
281	ret
282.size	aes_v8_decrypt,.-aes_v8_decrypt
283.globl	aes_v8_cbc_encrypt
284.type	aes_v8_cbc_encrypt,%function
285.align	5
286aes_v8_cbc_encrypt:
287	stp	x29,x30,[sp,#-16]!
288	add	x29,sp,#0
289	subs	x2,x2,#16
290	mov	x8,#16
291	b.lo	.Lcbc_abort
292	csel	x8,xzr,x8,eq
293
294	cmp	w5,#0			// en- or decrypting?
295	ldr	w5,[x3,#240]
296	and	x2,x2,#-16
297	ld1	{v6.16b},[x4]
298	ld1	{v0.16b},[x0],x8
299
300	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
301	sub	w5,w5,#6
302	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
303	sub	w5,w5,#2
304	ld1	{v18.4s,v19.4s},[x7],#32
305	ld1	{v20.4s,v21.4s},[x7],#32
306	ld1	{v22.4s,v23.4s},[x7],#32
307	ld1	{v7.4s},[x7]
308
309	add	x7,x3,#32
310	mov	w6,w5
311	b.eq	.Lcbc_dec
312
313	cmp	w5,#2
314	eor	v0.16b,v0.16b,v6.16b
315	eor	v5.16b,v16.16b,v7.16b
316	b.eq	.Lcbc_enc128
317
318	ld1	{v2.4s,v3.4s},[x7]
319	add	x7,x3,#16
320	add	x6,x3,#16*4
321	add	x12,x3,#16*5
322	aese	v0.16b,v16.16b
323	aesmc	v0.16b,v0.16b
324	add	x14,x3,#16*6
325	add	x3,x3,#16*7
326	b	.Lenter_cbc_enc
327
328.align	4
329.Loop_cbc_enc:
330	aese	v0.16b,v16.16b
331	aesmc	v0.16b,v0.16b
332	st1	{v6.16b},[x1],#16
333.Lenter_cbc_enc:
334	aese	v0.16b,v17.16b
335	aesmc	v0.16b,v0.16b
336	aese	v0.16b,v2.16b
337	aesmc	v0.16b,v0.16b
338	ld1	{v16.4s},[x6]
339	cmp	w5,#4
340	aese	v0.16b,v3.16b
341	aesmc	v0.16b,v0.16b
342	ld1	{v17.4s},[x12]
343	b.eq	.Lcbc_enc192
344
345	aese	v0.16b,v16.16b
346	aesmc	v0.16b,v0.16b
347	ld1	{v16.4s},[x14]
348	aese	v0.16b,v17.16b
349	aesmc	v0.16b,v0.16b
350	ld1	{v17.4s},[x3]
351	nop
352
353.Lcbc_enc192:
354	aese	v0.16b,v16.16b
355	aesmc	v0.16b,v0.16b
356	subs	x2,x2,#16
357	aese	v0.16b,v17.16b
358	aesmc	v0.16b,v0.16b
359	csel	x8,xzr,x8,eq
360	aese	v0.16b,v18.16b
361	aesmc	v0.16b,v0.16b
362	aese	v0.16b,v19.16b
363	aesmc	v0.16b,v0.16b
364	ld1	{v16.16b},[x0],x8
365	aese	v0.16b,v20.16b
366	aesmc	v0.16b,v0.16b
367	eor	v16.16b,v16.16b,v5.16b
368	aese	v0.16b,v21.16b
369	aesmc	v0.16b,v0.16b
370	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
371	aese	v0.16b,v22.16b
372	aesmc	v0.16b,v0.16b
373	aese	v0.16b,v23.16b
374	eor	v6.16b,v0.16b,v7.16b
375	b.hs	.Loop_cbc_enc
376
377	st1	{v6.16b},[x1],#16
378	b	.Lcbc_done
379
380.align	5
381.Lcbc_enc128:
382	ld1	{v2.4s,v3.4s},[x7]
383	aese	v0.16b,v16.16b
384	aesmc	v0.16b,v0.16b
385	b	.Lenter_cbc_enc128
386.Loop_cbc_enc128:
387	aese	v0.16b,v16.16b
388	aesmc	v0.16b,v0.16b
389	st1	{v6.16b},[x1],#16
390.Lenter_cbc_enc128:
391	aese	v0.16b,v17.16b
392	aesmc	v0.16b,v0.16b
393	subs	x2,x2,#16
394	aese	v0.16b,v2.16b
395	aesmc	v0.16b,v0.16b
396	csel	x8,xzr,x8,eq
397	aese	v0.16b,v3.16b
398	aesmc	v0.16b,v0.16b
399	aese	v0.16b,v18.16b
400	aesmc	v0.16b,v0.16b
401	aese	v0.16b,v19.16b
402	aesmc	v0.16b,v0.16b
403	ld1	{v16.16b},[x0],x8
404	aese	v0.16b,v20.16b
405	aesmc	v0.16b,v0.16b
406	aese	v0.16b,v21.16b
407	aesmc	v0.16b,v0.16b
408	aese	v0.16b,v22.16b
409	aesmc	v0.16b,v0.16b
410	eor	v16.16b,v16.16b,v5.16b
411	aese	v0.16b,v23.16b
412	eor	v6.16b,v0.16b,v7.16b
413	b.hs	.Loop_cbc_enc128
414
415	st1	{v6.16b},[x1],#16
416	b	.Lcbc_done
417.align	5
418.Lcbc_dec:
419	ld1	{v18.16b},[x0],#16
420	subs	x2,x2,#32		// bias
421	add	w6,w5,#2
422	orr	v3.16b,v0.16b,v0.16b
423	orr	v1.16b,v0.16b,v0.16b
424	orr	v19.16b,v18.16b,v18.16b
425	b.lo	.Lcbc_dec_tail
426
427	orr	v1.16b,v18.16b,v18.16b
428	ld1	{v18.16b},[x0],#16
429	orr	v2.16b,v0.16b,v0.16b
430	orr	v3.16b,v1.16b,v1.16b
431	orr	v19.16b,v18.16b,v18.16b
432
433.Loop3x_cbc_dec:
434	aesd	v0.16b,v16.16b
435	aesimc	v0.16b,v0.16b
436	aesd	v1.16b,v16.16b
437	aesimc	v1.16b,v1.16b
438	aesd	v18.16b,v16.16b
439	aesimc	v18.16b,v18.16b
440	ld1	{v16.4s},[x7],#16
441	subs	w6,w6,#2
442	aesd	v0.16b,v17.16b
443	aesimc	v0.16b,v0.16b
444	aesd	v1.16b,v17.16b
445	aesimc	v1.16b,v1.16b
446	aesd	v18.16b,v17.16b
447	aesimc	v18.16b,v18.16b
448	ld1	{v17.4s},[x7],#16
449	b.gt	.Loop3x_cbc_dec
450
451	aesd	v0.16b,v16.16b
452	aesimc	v0.16b,v0.16b
453	aesd	v1.16b,v16.16b
454	aesimc	v1.16b,v1.16b
455	aesd	v18.16b,v16.16b
456	aesimc	v18.16b,v18.16b
457	eor	v4.16b,v6.16b,v7.16b
458	subs	x2,x2,#0x30
459	eor	v5.16b,v2.16b,v7.16b
460	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
461	aesd	v0.16b,v17.16b
462	aesimc	v0.16b,v0.16b
463	aesd	v1.16b,v17.16b
464	aesimc	v1.16b,v1.16b
465	aesd	v18.16b,v17.16b
466	aesimc	v18.16b,v18.16b
467	eor	v17.16b,v3.16b,v7.16b
468	add	x0,x0,x6		// x0 is adjusted in such way that
469					// at exit from the loop v1.16b-v18.16b
470					// are loaded with last "words"
471	orr	v6.16b,v19.16b,v19.16b
472	mov	x7,x3
473	aesd	v0.16b,v20.16b
474	aesimc	v0.16b,v0.16b
475	aesd	v1.16b,v20.16b
476	aesimc	v1.16b,v1.16b
477	aesd	v18.16b,v20.16b
478	aesimc	v18.16b,v18.16b
479	ld1	{v2.16b},[x0],#16
480	aesd	v0.16b,v21.16b
481	aesimc	v0.16b,v0.16b
482	aesd	v1.16b,v21.16b
483	aesimc	v1.16b,v1.16b
484	aesd	v18.16b,v21.16b
485	aesimc	v18.16b,v18.16b
486	ld1	{v3.16b},[x0],#16
487	aesd	v0.16b,v22.16b
488	aesimc	v0.16b,v0.16b
489	aesd	v1.16b,v22.16b
490	aesimc	v1.16b,v1.16b
491	aesd	v18.16b,v22.16b
492	aesimc	v18.16b,v18.16b
493	ld1	{v19.16b},[x0],#16
494	aesd	v0.16b,v23.16b
495	aesd	v1.16b,v23.16b
496	aesd	v18.16b,v23.16b
497	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
498	add	w6,w5,#2
499	eor	v4.16b,v4.16b,v0.16b
500	eor	v5.16b,v5.16b,v1.16b
501	eor	v18.16b,v18.16b,v17.16b
502	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
503	st1	{v4.16b},[x1],#16
504	orr	v0.16b,v2.16b,v2.16b
505	st1	{v5.16b},[x1],#16
506	orr	v1.16b,v3.16b,v3.16b
507	st1	{v18.16b},[x1],#16
508	orr	v18.16b,v19.16b,v19.16b
509	b.hs	.Loop3x_cbc_dec
510
511	cmn	x2,#0x30
512	b.eq	.Lcbc_done
513	nop
514
515.Lcbc_dec_tail:
516	aesd	v1.16b,v16.16b
517	aesimc	v1.16b,v1.16b
518	aesd	v18.16b,v16.16b
519	aesimc	v18.16b,v18.16b
520	ld1	{v16.4s},[x7],#16
521	subs	w6,w6,#2
522	aesd	v1.16b,v17.16b
523	aesimc	v1.16b,v1.16b
524	aesd	v18.16b,v17.16b
525	aesimc	v18.16b,v18.16b
526	ld1	{v17.4s},[x7],#16
527	b.gt	.Lcbc_dec_tail
528
529	aesd	v1.16b,v16.16b
530	aesimc	v1.16b,v1.16b
531	aesd	v18.16b,v16.16b
532	aesimc	v18.16b,v18.16b
533	aesd	v1.16b,v17.16b
534	aesimc	v1.16b,v1.16b
535	aesd	v18.16b,v17.16b
536	aesimc	v18.16b,v18.16b
537	aesd	v1.16b,v20.16b
538	aesimc	v1.16b,v1.16b
539	aesd	v18.16b,v20.16b
540	aesimc	v18.16b,v18.16b
541	cmn	x2,#0x20
542	aesd	v1.16b,v21.16b
543	aesimc	v1.16b,v1.16b
544	aesd	v18.16b,v21.16b
545	aesimc	v18.16b,v18.16b
546	eor	v5.16b,v6.16b,v7.16b
547	aesd	v1.16b,v22.16b
548	aesimc	v1.16b,v1.16b
549	aesd	v18.16b,v22.16b
550	aesimc	v18.16b,v18.16b
551	eor	v17.16b,v3.16b,v7.16b
552	aesd	v1.16b,v23.16b
553	aesd	v18.16b,v23.16b
554	b.eq	.Lcbc_dec_one
555	eor	v5.16b,v5.16b,v1.16b
556	eor	v17.16b,v17.16b,v18.16b
557	orr	v6.16b,v19.16b,v19.16b
558	st1	{v5.16b},[x1],#16
559	st1	{v17.16b},[x1],#16
560	b	.Lcbc_done
561
562.Lcbc_dec_one:
563	eor	v5.16b,v5.16b,v18.16b
564	orr	v6.16b,v19.16b,v19.16b
565	st1	{v5.16b},[x1],#16
566
567.Lcbc_done:
568	st1	{v6.16b},[x4]
569.Lcbc_abort:
570	ldr	x29,[sp],#16
571	ret
572.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
573.globl	aes_v8_ctr32_encrypt_blocks
574.type	aes_v8_ctr32_encrypt_blocks,%function
575.align	5
576aes_v8_ctr32_encrypt_blocks:
577	stp	x29,x30,[sp,#-16]!
578	add	x29,sp,#0
579	ldr	w5,[x3,#240]
580
581	ldr	w8, [x4, #12]
582#ifdef __ARMEB__
583	ld1	{v0.16b},[x4]
584#else
585	ld1	{v0.4s},[x4]
586#endif
587	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
588	sub	w5,w5,#4
589	mov	x12,#16
590	cmp	x2,#2
591	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
592	sub	w5,w5,#2
593	ld1	{v20.4s,v21.4s},[x7],#32
594	ld1	{v22.4s,v23.4s},[x7],#32
595	ld1	{v7.4s},[x7]
596	add	x7,x3,#32
597	mov	w6,w5
598	csel	x12,xzr,x12,lo
599#ifndef __ARMEB__
600	rev	w8, w8
601#endif
602	orr	v1.16b,v0.16b,v0.16b
603	add	w10, w8, #1
604	orr	v18.16b,v0.16b,v0.16b
605	add	w8, w8, #2
606	orr	v6.16b,v0.16b,v0.16b
607	rev	w10, w10
608	mov	v1.s[3],w10
609	b.ls	.Lctr32_tail
610	rev	w12, w8
611	sub	x2,x2,#3		// bias
612	mov	v18.s[3],w12
613	b	.Loop3x_ctr32
614
615.align	4
616.Loop3x_ctr32:
617	aese	v0.16b,v16.16b
618	aesmc	v0.16b,v0.16b
619	aese	v1.16b,v16.16b
620	aesmc	v1.16b,v1.16b
621	aese	v18.16b,v16.16b
622	aesmc	v18.16b,v18.16b
623	ld1	{v16.4s},[x7],#16
624	subs	w6,w6,#2
625	aese	v0.16b,v17.16b
626	aesmc	v0.16b,v0.16b
627	aese	v1.16b,v17.16b
628	aesmc	v1.16b,v1.16b
629	aese	v18.16b,v17.16b
630	aesmc	v18.16b,v18.16b
631	ld1	{v17.4s},[x7],#16
632	b.gt	.Loop3x_ctr32
633
634	aese	v0.16b,v16.16b
635	aesmc	v4.16b,v0.16b
636	aese	v1.16b,v16.16b
637	aesmc	v5.16b,v1.16b
638	ld1	{v2.16b},[x0],#16
639	orr	v0.16b,v6.16b,v6.16b
640	aese	v18.16b,v16.16b
641	aesmc	v18.16b,v18.16b
642	ld1	{v3.16b},[x0],#16
643	orr	v1.16b,v6.16b,v6.16b
644	aese	v4.16b,v17.16b
645	aesmc	v4.16b,v4.16b
646	aese	v5.16b,v17.16b
647	aesmc	v5.16b,v5.16b
648	ld1	{v19.16b},[x0],#16
649	mov	x7,x3
650	aese	v18.16b,v17.16b
651	aesmc	v17.16b,v18.16b
652	orr	v18.16b,v6.16b,v6.16b
653	add	w9,w8,#1
654	aese	v4.16b,v20.16b
655	aesmc	v4.16b,v4.16b
656	aese	v5.16b,v20.16b
657	aesmc	v5.16b,v5.16b
658	eor	v2.16b,v2.16b,v7.16b
659	add	w10,w8,#2
660	aese	v17.16b,v20.16b
661	aesmc	v17.16b,v17.16b
662	eor	v3.16b,v3.16b,v7.16b
663	add	w8,w8,#3
664	aese	v4.16b,v21.16b
665	aesmc	v4.16b,v4.16b
666	aese	v5.16b,v21.16b
667	aesmc	v5.16b,v5.16b
668	eor	v19.16b,v19.16b,v7.16b
669	rev	w9,w9
670	aese	v17.16b,v21.16b
671	aesmc	v17.16b,v17.16b
672	mov	v0.s[3], w9
673	rev	w10,w10
674	aese	v4.16b,v22.16b
675	aesmc	v4.16b,v4.16b
676	aese	v5.16b,v22.16b
677	aesmc	v5.16b,v5.16b
678	mov	v1.s[3], w10
679	rev	w12,w8
680	aese	v17.16b,v22.16b
681	aesmc	v17.16b,v17.16b
682	mov	v18.s[3], w12
683	subs	x2,x2,#3
684	aese	v4.16b,v23.16b
685	aese	v5.16b,v23.16b
686	aese	v17.16b,v23.16b
687
688	eor	v2.16b,v2.16b,v4.16b
689	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
690	st1	{v2.16b},[x1],#16
691	eor	v3.16b,v3.16b,v5.16b
692	mov	w6,w5
693	st1	{v3.16b},[x1],#16
694	eor	v19.16b,v19.16b,v17.16b
695	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
696	st1	{v19.16b},[x1],#16
697	b.hs	.Loop3x_ctr32
698
699	adds	x2,x2,#3
700	b.eq	.Lctr32_done
701	cmp	x2,#1
702	mov	x12,#16
703	csel	x12,xzr,x12,eq
704
705.Lctr32_tail:
706	aese	v0.16b,v16.16b
707	aesmc	v0.16b,v0.16b
708	aese	v1.16b,v16.16b
709	aesmc	v1.16b,v1.16b
710	ld1	{v16.4s},[x7],#16
711	subs	w6,w6,#2
712	aese	v0.16b,v17.16b
713	aesmc	v0.16b,v0.16b
714	aese	v1.16b,v17.16b
715	aesmc	v1.16b,v1.16b
716	ld1	{v17.4s},[x7],#16
717	b.gt	.Lctr32_tail
718
719	aese	v0.16b,v16.16b
720	aesmc	v0.16b,v0.16b
721	aese	v1.16b,v16.16b
722	aesmc	v1.16b,v1.16b
723	aese	v0.16b,v17.16b
724	aesmc	v0.16b,v0.16b
725	aese	v1.16b,v17.16b
726	aesmc	v1.16b,v1.16b
727	ld1	{v2.16b},[x0],x12
728	aese	v0.16b,v20.16b
729	aesmc	v0.16b,v0.16b
730	aese	v1.16b,v20.16b
731	aesmc	v1.16b,v1.16b
732	ld1	{v3.16b},[x0]
733	aese	v0.16b,v21.16b
734	aesmc	v0.16b,v0.16b
735	aese	v1.16b,v21.16b
736	aesmc	v1.16b,v1.16b
737	eor	v2.16b,v2.16b,v7.16b
738	aese	v0.16b,v22.16b
739	aesmc	v0.16b,v0.16b
740	aese	v1.16b,v22.16b
741	aesmc	v1.16b,v1.16b
742	eor	v3.16b,v3.16b,v7.16b
743	aese	v0.16b,v23.16b
744	aese	v1.16b,v23.16b
745
746	cmp	x2,#1
747	eor	v2.16b,v2.16b,v0.16b
748	eor	v3.16b,v3.16b,v1.16b
749	st1	{v2.16b},[x1],#16
750	b.eq	.Lctr32_done
751	st1	{v3.16b},[x1]
752
753.Lctr32_done:
754	ldr	x29,[sp],#16
755	ret
756.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
757#endif
758