1# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org>
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions
6# are met:
7#
8#     * Redistributions of source code must retain copyright notices,
9#      this list of conditions and the following disclaimer.
10#
11#     * Redistributions in binary form must reproduce the above
12#      copyright notice, this list of conditions and the following
13#      disclaimer in the documentation and/or other materials
14#      provided with the distribution.
15#
16#     * Neither the name of the Andy Polyakov nor the names of its
17#      copyright holder and contributors may be used to endorse or
18#      promote products derived from this software without specific
19#      prior written permission.
20#
21# ALTERNATIVELY, provided that this notice is retained in full, this
22# product may be distributed under the terms of the GNU General Public
23# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24# those given above.
25#
26# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37#
38# *** This file is auto-generated ***
39#
40# 1 "lib/accelerated/aarch64/elf/aes-aarch64.s.tmp.S"
41# 1 "<built-in>"
42# 1 "<command-line>"
43# 1 "lib/accelerated/aarch64/elf/aes-aarch64.s.tmp.S"
44# 1 "lib/accelerated/aarch64/aarch64-common.h" 1
45# 2 "lib/accelerated/aarch64/elf/aes-aarch64.s.tmp.S" 2
46
47
48.text
49.arch armv8-a+crypto
50.align 5
51.Lrcon:
52.long 0x01,0x01,0x01,0x01
53.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
54.long 0x1b,0x1b,0x1b,0x1b
55
56.globl aes_v8_set_encrypt_key
57.type aes_v8_set_encrypt_key,%function
58.align 5
59aes_v8_set_encrypt_key:
60.Lenc_key:
61 stp x29,x30,[sp,#-16]!
62 add x29,sp,#0
63 mov x3,#-1
64 cmp x0,#0
65 b.eq .Lenc_key_abort
66 cmp x2,#0
67 b.eq .Lenc_key_abort
68 mov x3,#-2
69 cmp w1,#128
70 b.lt .Lenc_key_abort
71 cmp w1,#256
72 b.gt .Lenc_key_abort
73 tst w1,#0x3f
74 b.ne .Lenc_key_abort
75
76 adr x3,.Lrcon
77 cmp w1,#192
78
79 eor v0.16b,v0.16b,v0.16b
80 ld1 {v3.16b},[x0],#16
81 mov w1,#8
82 ld1 {v1.4s,v2.4s},[x3],#32
83
84 b.lt .Loop128
85 b.eq .L192
86 b .L256
87
88.align 4
89.Loop128:
90 tbl v6.16b,{v3.16b},v2.16b
91 ext v5.16b,v0.16b,v3.16b,#12
92 st1 {v3.4s},[x2],#16
93 aese v6.16b,v0.16b
94 subs w1,w1,#1
95
96 eor v3.16b,v3.16b,v5.16b
97 ext v5.16b,v0.16b,v5.16b,#12
98 eor v3.16b,v3.16b,v5.16b
99 ext v5.16b,v0.16b,v5.16b,#12
100 eor v6.16b,v6.16b,v1.16b
101 eor v3.16b,v3.16b,v5.16b
102 shl v1.16b,v1.16b,#1
103 eor v3.16b,v3.16b,v6.16b
104 b.ne .Loop128
105
106 ld1 {v1.4s},[x3]
107
108 tbl v6.16b,{v3.16b},v2.16b
109 ext v5.16b,v0.16b,v3.16b,#12
110 st1 {v3.4s},[x2],#16
111 aese v6.16b,v0.16b
112
113 eor v3.16b,v3.16b,v5.16b
114 ext v5.16b,v0.16b,v5.16b,#12
115 eor v3.16b,v3.16b,v5.16b
116 ext v5.16b,v0.16b,v5.16b,#12
117 eor v6.16b,v6.16b,v1.16b
118 eor v3.16b,v3.16b,v5.16b
119 shl v1.16b,v1.16b,#1
120 eor v3.16b,v3.16b,v6.16b
121
122 tbl v6.16b,{v3.16b},v2.16b
123 ext v5.16b,v0.16b,v3.16b,#12
124 st1 {v3.4s},[x2],#16
125 aese v6.16b,v0.16b
126
127 eor v3.16b,v3.16b,v5.16b
128 ext v5.16b,v0.16b,v5.16b,#12
129 eor v3.16b,v3.16b,v5.16b
130 ext v5.16b,v0.16b,v5.16b,#12
131 eor v6.16b,v6.16b,v1.16b
132 eor v3.16b,v3.16b,v5.16b
133 eor v3.16b,v3.16b,v6.16b
134 st1 {v3.4s},[x2]
135 add x2,x2,#0x50
136
137 mov w12,#10
138 b .Ldone
139
140.align 4
141.L192:
142 ld1 {v4.8b},[x0],#8
143 movi v6.16b,#8
144 st1 {v3.4s},[x2],#16
145 sub v2.16b,v2.16b,v6.16b
146
147.Loop192:
148 tbl v6.16b,{v4.16b},v2.16b
149 ext v5.16b,v0.16b,v3.16b,#12
150 st1 {v4.8b},[x2],#8
151 aese v6.16b,v0.16b
152 subs w1,w1,#1
153
154 eor v3.16b,v3.16b,v5.16b
155 ext v5.16b,v0.16b,v5.16b,#12
156 eor v3.16b,v3.16b,v5.16b
157 ext v5.16b,v0.16b,v5.16b,#12
158 eor v3.16b,v3.16b,v5.16b
159
160 dup v5.4s,v3.s[3]
161 eor v5.16b,v5.16b,v4.16b
162 eor v6.16b,v6.16b,v1.16b
163 ext v4.16b,v0.16b,v4.16b,#12
164 shl v1.16b,v1.16b,#1
165 eor v4.16b,v4.16b,v5.16b
166 eor v3.16b,v3.16b,v6.16b
167 eor v4.16b,v4.16b,v6.16b
168 st1 {v3.4s},[x2],#16
169 b.ne .Loop192
170
171 mov w12,#12
172 add x2,x2,#0x20
173 b .Ldone
174
175.align 4
176.L256:
177 ld1 {v4.16b},[x0]
178 mov w1,#7
179 mov w12,#14
180 st1 {v3.4s},[x2],#16
181
182.Loop256:
183 tbl v6.16b,{v4.16b},v2.16b
184 ext v5.16b,v0.16b,v3.16b,#12
185 st1 {v4.4s},[x2],#16
186 aese v6.16b,v0.16b
187 subs w1,w1,#1
188
189 eor v3.16b,v3.16b,v5.16b
190 ext v5.16b,v0.16b,v5.16b,#12
191 eor v3.16b,v3.16b,v5.16b
192 ext v5.16b,v0.16b,v5.16b,#12
193 eor v6.16b,v6.16b,v1.16b
194 eor v3.16b,v3.16b,v5.16b
195 shl v1.16b,v1.16b,#1
196 eor v3.16b,v3.16b,v6.16b
197 st1 {v3.4s},[x2],#16
198 b.eq .Ldone
199
200 dup v6.4s,v3.s[3]
201 ext v5.16b,v0.16b,v4.16b,#12
202 aese v6.16b,v0.16b
203
204 eor v4.16b,v4.16b,v5.16b
205 ext v5.16b,v0.16b,v5.16b,#12
206 eor v4.16b,v4.16b,v5.16b
207 ext v5.16b,v0.16b,v5.16b,#12
208 eor v4.16b,v4.16b,v5.16b
209
210 eor v4.16b,v4.16b,v6.16b
211 b .Loop256
212
213.Ldone:
214 str w12,[x2]
215 mov x3,#0
216
217.Lenc_key_abort:
218 mov x0,x3
219 ldr x29,[sp],#16
220 ret
221.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
222
223.globl aes_v8_set_decrypt_key
224.type aes_v8_set_decrypt_key,%function
225.align 5
226aes_v8_set_decrypt_key:
227.inst 0xd503233f
228 stp x29,x30,[sp,#-16]!
229 add x29,sp,#0
230 bl .Lenc_key
231
232 cmp x0,#0
233 b.ne .Ldec_key_abort
234
235 sub x2,x2,#240
236 mov x4,#-16
237 add x0,x2,x12,lsl#4
238
239 ld1 {v0.4s},[x2]
240 ld1 {v1.4s},[x0]
241 st1 {v0.4s},[x0],x4
242 st1 {v1.4s},[x2],#16
243
244.Loop_imc:
245 ld1 {v0.4s},[x2]
246 ld1 {v1.4s},[x0]
247 aesimc v0.16b,v0.16b
248 aesimc v1.16b,v1.16b
249 st1 {v0.4s},[x0],x4
250 st1 {v1.4s},[x2],#16
251 cmp x0,x2
252 b.hi .Loop_imc
253
254 ld1 {v0.4s},[x2]
255 aesimc v0.16b,v0.16b
256 st1 {v0.4s},[x0]
257
258 eor x0,x0,x0
259.Ldec_key_abort:
260 ldp x29,x30,[sp],#16
261.inst 0xd50323bf
262 ret
263.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
264.globl aes_v8_encrypt
265.type aes_v8_encrypt,%function
266.align 5
267aes_v8_encrypt:
268 ldr w3,[x2,#240]
269 ld1 {v0.4s},[x2],#16
270 ld1 {v2.16b},[x0]
271 sub w3,w3,#2
272 ld1 {v1.4s},[x2],#16
273
274.Loop_enc:
275 aese v2.16b,v0.16b
276 aesmc v2.16b,v2.16b
277 ld1 {v0.4s},[x2],#16
278 subs w3,w3,#2
279 aese v2.16b,v1.16b
280 aesmc v2.16b,v2.16b
281 ld1 {v1.4s},[x2],#16
282 b.gt .Loop_enc
283
284 aese v2.16b,v0.16b
285 aesmc v2.16b,v2.16b
286 ld1 {v0.4s},[x2]
287 aese v2.16b,v1.16b
288 eor v2.16b,v2.16b,v0.16b
289
290 st1 {v2.16b},[x1]
291 ret
292.size aes_v8_encrypt,.-aes_v8_encrypt
293.globl aes_v8_decrypt
294.type aes_v8_decrypt,%function
295.align 5
296aes_v8_decrypt:
297 ldr w3,[x2,#240]
298 ld1 {v0.4s},[x2],#16
299 ld1 {v2.16b},[x0]
300 sub w3,w3,#2
301 ld1 {v1.4s},[x2],#16
302
303.Loop_dec:
304 aesd v2.16b,v0.16b
305 aesimc v2.16b,v2.16b
306 ld1 {v0.4s},[x2],#16
307 subs w3,w3,#2
308 aesd v2.16b,v1.16b
309 aesimc v2.16b,v2.16b
310 ld1 {v1.4s},[x2],#16
311 b.gt .Loop_dec
312
313 aesd v2.16b,v0.16b
314 aesimc v2.16b,v2.16b
315 ld1 {v0.4s},[x2]
316 aesd v2.16b,v1.16b
317 eor v2.16b,v2.16b,v0.16b
318
319 st1 {v2.16b},[x1]
320 ret
321.size aes_v8_decrypt,.-aes_v8_decrypt
322.globl aes_v8_cbc_encrypt
323.type aes_v8_cbc_encrypt,%function
324.align 5
325aes_v8_cbc_encrypt:
326 stp x29,x30,[sp,#-16]!
327 add x29,sp,#0
328 subs x2,x2,#16
329 mov x8,#16
330 b.lo .Lcbc_abort
331 csel x8,xzr,x8,eq
332
333 cmp w5,#0
334 ldr w5,[x3,#240]
335 and x2,x2,#-16
336 ld1 {v6.16b},[x4]
337 ld1 {v0.16b},[x0],x8
338
339 ld1 {v16.4s,v17.4s},[x3]
340 sub w5,w5,#6
341 add x7,x3,x5,lsl#4
342 sub w5,w5,#2
343 ld1 {v18.4s,v19.4s},[x7],#32
344 ld1 {v20.4s,v21.4s},[x7],#32
345 ld1 {v22.4s,v23.4s},[x7],#32
346 ld1 {v7.4s},[x7]
347
348 add x7,x3,#32
349 mov w6,w5
350 b.eq .Lcbc_dec
351
352 cmp w5,#2
353 eor v0.16b,v0.16b,v6.16b
354 eor v5.16b,v16.16b,v7.16b
355 b.eq .Lcbc_enc128
356
357 ld1 {v2.4s,v3.4s},[x7]
358 add x7,x3,#16
359 add x6,x3,#16*4
360 add x12,x3,#16*5
361 aese v0.16b,v16.16b
362 aesmc v0.16b,v0.16b
363 add x14,x3,#16*6
364 add x3,x3,#16*7
365 b .Lenter_cbc_enc
366
367.align 4
368.Loop_cbc_enc:
369 aese v0.16b,v16.16b
370 aesmc v0.16b,v0.16b
371 st1 {v6.16b},[x1],#16
372.Lenter_cbc_enc:
373 aese v0.16b,v17.16b
374 aesmc v0.16b,v0.16b
375 aese v0.16b,v2.16b
376 aesmc v0.16b,v0.16b
377 ld1 {v16.4s},[x6]
378 cmp w5,#4
379 aese v0.16b,v3.16b
380 aesmc v0.16b,v0.16b
381 ld1 {v17.4s},[x12]
382 b.eq .Lcbc_enc192
383
384 aese v0.16b,v16.16b
385 aesmc v0.16b,v0.16b
386 ld1 {v16.4s},[x14]
387 aese v0.16b,v17.16b
388 aesmc v0.16b,v0.16b
389 ld1 {v17.4s},[x3]
390 nop
391
392.Lcbc_enc192:
393 aese v0.16b,v16.16b
394 aesmc v0.16b,v0.16b
395 subs x2,x2,#16
396 aese v0.16b,v17.16b
397 aesmc v0.16b,v0.16b
398 csel x8,xzr,x8,eq
399 aese v0.16b,v18.16b
400 aesmc v0.16b,v0.16b
401 aese v0.16b,v19.16b
402 aesmc v0.16b,v0.16b
403 ld1 {v16.16b},[x0],x8
404 aese v0.16b,v20.16b
405 aesmc v0.16b,v0.16b
406 eor v16.16b,v16.16b,v5.16b
407 aese v0.16b,v21.16b
408 aesmc v0.16b,v0.16b
409 ld1 {v17.4s},[x7]
410 aese v0.16b,v22.16b
411 aesmc v0.16b,v0.16b
412 aese v0.16b,v23.16b
413 eor v6.16b,v0.16b,v7.16b
414 b.hs .Loop_cbc_enc
415
416 st1 {v6.16b},[x1],#16
417 b .Lcbc_done
418
419.align 5
420.Lcbc_enc128:
421 ld1 {v2.4s,v3.4s},[x7]
422 aese v0.16b,v16.16b
423 aesmc v0.16b,v0.16b
424 b .Lenter_cbc_enc128
425.Loop_cbc_enc128:
426 aese v0.16b,v16.16b
427 aesmc v0.16b,v0.16b
428 st1 {v6.16b},[x1],#16
429.Lenter_cbc_enc128:
430 aese v0.16b,v17.16b
431 aesmc v0.16b,v0.16b
432 subs x2,x2,#16
433 aese v0.16b,v2.16b
434 aesmc v0.16b,v0.16b
435 csel x8,xzr,x8,eq
436 aese v0.16b,v3.16b
437 aesmc v0.16b,v0.16b
438 aese v0.16b,v18.16b
439 aesmc v0.16b,v0.16b
440 aese v0.16b,v19.16b
441 aesmc v0.16b,v0.16b
442 ld1 {v16.16b},[x0],x8
443 aese v0.16b,v20.16b
444 aesmc v0.16b,v0.16b
445 aese v0.16b,v21.16b
446 aesmc v0.16b,v0.16b
447 aese v0.16b,v22.16b
448 aesmc v0.16b,v0.16b
449 eor v16.16b,v16.16b,v5.16b
450 aese v0.16b,v23.16b
451 eor v6.16b,v0.16b,v7.16b
452 b.hs .Loop_cbc_enc128
453
454 st1 {v6.16b},[x1],#16
455 b .Lcbc_done
456.align 5
457.Lcbc_dec:
458 ld1 {v18.16b},[x0],#16
459 subs x2,x2,#32
460 add w6,w5,#2
461 orr v3.16b,v0.16b,v0.16b
462 orr v1.16b,v0.16b,v0.16b
463 orr v19.16b,v18.16b,v18.16b
464 b.lo .Lcbc_dec_tail
465
466 orr v1.16b,v18.16b,v18.16b
467 ld1 {v18.16b},[x0],#16
468 orr v2.16b,v0.16b,v0.16b
469 orr v3.16b,v1.16b,v1.16b
470 orr v19.16b,v18.16b,v18.16b
471
472.Loop3x_cbc_dec:
473 aesd v0.16b,v16.16b
474 aesimc v0.16b,v0.16b
475 aesd v1.16b,v16.16b
476 aesimc v1.16b,v1.16b
477 aesd v18.16b,v16.16b
478 aesimc v18.16b,v18.16b
479 ld1 {v16.4s},[x7],#16
480 subs w6,w6,#2
481 aesd v0.16b,v17.16b
482 aesimc v0.16b,v0.16b
483 aesd v1.16b,v17.16b
484 aesimc v1.16b,v1.16b
485 aesd v18.16b,v17.16b
486 aesimc v18.16b,v18.16b
487 ld1 {v17.4s},[x7],#16
488 b.gt .Loop3x_cbc_dec
489
490 aesd v0.16b,v16.16b
491 aesimc v0.16b,v0.16b
492 aesd v1.16b,v16.16b
493 aesimc v1.16b,v1.16b
494 aesd v18.16b,v16.16b
495 aesimc v18.16b,v18.16b
496 eor v4.16b,v6.16b,v7.16b
497 subs x2,x2,#0x30
498 eor v5.16b,v2.16b,v7.16b
499 csel x6,x2,x6,lo
500 aesd v0.16b,v17.16b
501 aesimc v0.16b,v0.16b
502 aesd v1.16b,v17.16b
503 aesimc v1.16b,v1.16b
504 aesd v18.16b,v17.16b
505 aesimc v18.16b,v18.16b
506 eor v17.16b,v3.16b,v7.16b
507 add x0,x0,x6
508
509
510 orr v6.16b,v19.16b,v19.16b
511 mov x7,x3
512 aesd v0.16b,v20.16b
513 aesimc v0.16b,v0.16b
514 aesd v1.16b,v20.16b
515 aesimc v1.16b,v1.16b
516 aesd v18.16b,v20.16b
517 aesimc v18.16b,v18.16b
518 ld1 {v2.16b},[x0],#16
519 aesd v0.16b,v21.16b
520 aesimc v0.16b,v0.16b
521 aesd v1.16b,v21.16b
522 aesimc v1.16b,v1.16b
523 aesd v18.16b,v21.16b
524 aesimc v18.16b,v18.16b
525 ld1 {v3.16b},[x0],#16
526 aesd v0.16b,v22.16b
527 aesimc v0.16b,v0.16b
528 aesd v1.16b,v22.16b
529 aesimc v1.16b,v1.16b
530 aesd v18.16b,v22.16b
531 aesimc v18.16b,v18.16b
532 ld1 {v19.16b},[x0],#16
533 aesd v0.16b,v23.16b
534 aesd v1.16b,v23.16b
535 aesd v18.16b,v23.16b
536 ld1 {v16.4s},[x7],#16
537 add w6,w5,#2
538 eor v4.16b,v4.16b,v0.16b
539 eor v5.16b,v5.16b,v1.16b
540 eor v18.16b,v18.16b,v17.16b
541 ld1 {v17.4s},[x7],#16
542 st1 {v4.16b},[x1],#16
543 orr v0.16b,v2.16b,v2.16b
544 st1 {v5.16b},[x1],#16
545 orr v1.16b,v3.16b,v3.16b
546 st1 {v18.16b},[x1],#16
547 orr v18.16b,v19.16b,v19.16b
548 b.hs .Loop3x_cbc_dec
549
550 cmn x2,#0x30
551 b.eq .Lcbc_done
552 nop
553
554.Lcbc_dec_tail:
555 aesd v1.16b,v16.16b
556 aesimc v1.16b,v1.16b
557 aesd v18.16b,v16.16b
558 aesimc v18.16b,v18.16b
559 ld1 {v16.4s},[x7],#16
560 subs w6,w6,#2
561 aesd v1.16b,v17.16b
562 aesimc v1.16b,v1.16b
563 aesd v18.16b,v17.16b
564 aesimc v18.16b,v18.16b
565 ld1 {v17.4s},[x7],#16
566 b.gt .Lcbc_dec_tail
567
568 aesd v1.16b,v16.16b
569 aesimc v1.16b,v1.16b
570 aesd v18.16b,v16.16b
571 aesimc v18.16b,v18.16b
572 aesd v1.16b,v17.16b
573 aesimc v1.16b,v1.16b
574 aesd v18.16b,v17.16b
575 aesimc v18.16b,v18.16b
576 aesd v1.16b,v20.16b
577 aesimc v1.16b,v1.16b
578 aesd v18.16b,v20.16b
579 aesimc v18.16b,v18.16b
580 cmn x2,#0x20
581 aesd v1.16b,v21.16b
582 aesimc v1.16b,v1.16b
583 aesd v18.16b,v21.16b
584 aesimc v18.16b,v18.16b
585 eor v5.16b,v6.16b,v7.16b
586 aesd v1.16b,v22.16b
587 aesimc v1.16b,v1.16b
588 aesd v18.16b,v22.16b
589 aesimc v18.16b,v18.16b
590 eor v17.16b,v3.16b,v7.16b
591 aesd v1.16b,v23.16b
592 aesd v18.16b,v23.16b
593 b.eq .Lcbc_dec_one
594 eor v5.16b,v5.16b,v1.16b
595 eor v17.16b,v17.16b,v18.16b
596 orr v6.16b,v19.16b,v19.16b
597 st1 {v5.16b},[x1],#16
598 st1 {v17.16b},[x1],#16
599 b .Lcbc_done
600
601.Lcbc_dec_one:
602 eor v5.16b,v5.16b,v18.16b
603 orr v6.16b,v19.16b,v19.16b
604 st1 {v5.16b},[x1],#16
605
606.Lcbc_done:
607 st1 {v6.16b},[x4]
608.Lcbc_abort:
609 ldr x29,[sp],#16
610 ret
611.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
612.globl aes_v8_ctr32_encrypt_blocks
613.type aes_v8_ctr32_encrypt_blocks,%function
614.align 5
615aes_v8_ctr32_encrypt_blocks:
616 stp x29,x30,[sp,#-16]!
617 add x29,sp,#0
618 ldr w5,[x3,#240]
619
620 ldr w8, [x4, #12]
621 ld1 {v0.4s},[x4]
622
623 ld1 {v16.4s,v17.4s},[x3]
624 sub w5,w5,#4
625 mov x12,#16
626 cmp x2,#2
627 add x7,x3,x5,lsl#4
628 sub w5,w5,#2
629 ld1 {v20.4s,v21.4s},[x7],#32
630 ld1 {v22.4s,v23.4s},[x7],#32
631 ld1 {v7.4s},[x7]
632 add x7,x3,#32
633 mov w6,w5
634 csel x12,xzr,x12,lo
635
636 rev w8, w8
637
638 orr v1.16b,v0.16b,v0.16b
639 add w10, w8, #1
640 orr v18.16b,v0.16b,v0.16b
641 add w8, w8, #2
642 orr v6.16b,v0.16b,v0.16b
643 rev w10, w10
644 mov v1.s[3],w10
645 b.ls .Lctr32_tail
646 rev w12, w8
647 sub x2,x2,#3
648 mov v18.s[3],w12
649 b .Loop3x_ctr32
650
651.align 4
652.Loop3x_ctr32:
653 aese v0.16b,v16.16b
654 aesmc v0.16b,v0.16b
655 aese v1.16b,v16.16b
656 aesmc v1.16b,v1.16b
657 aese v18.16b,v16.16b
658 aesmc v18.16b,v18.16b
659 ld1 {v16.4s},[x7],#16
660 subs w6,w6,#2
661 aese v0.16b,v17.16b
662 aesmc v0.16b,v0.16b
663 aese v1.16b,v17.16b
664 aesmc v1.16b,v1.16b
665 aese v18.16b,v17.16b
666 aesmc v18.16b,v18.16b
667 ld1 {v17.4s},[x7],#16
668 b.gt .Loop3x_ctr32
669
670 aese v0.16b,v16.16b
671 aesmc v4.16b,v0.16b
672 aese v1.16b,v16.16b
673 aesmc v5.16b,v1.16b
674 ld1 {v2.16b},[x0],#16
675 orr v0.16b,v6.16b,v6.16b
676 aese v18.16b,v16.16b
677 aesmc v18.16b,v18.16b
678 ld1 {v3.16b},[x0],#16
679 orr v1.16b,v6.16b,v6.16b
680 aese v4.16b,v17.16b
681 aesmc v4.16b,v4.16b
682 aese v5.16b,v17.16b
683 aesmc v5.16b,v5.16b
684 ld1 {v19.16b},[x0],#16
685 mov x7,x3
686 aese v18.16b,v17.16b
687 aesmc v17.16b,v18.16b
688 orr v18.16b,v6.16b,v6.16b
689 add w9,w8,#1
690 aese v4.16b,v20.16b
691 aesmc v4.16b,v4.16b
692 aese v5.16b,v20.16b
693 aesmc v5.16b,v5.16b
694 eor v2.16b,v2.16b,v7.16b
695 add w10,w8,#2
696 aese v17.16b,v20.16b
697 aesmc v17.16b,v17.16b
698 eor v3.16b,v3.16b,v7.16b
699 add w8,w8,#3
700 aese v4.16b,v21.16b
701 aesmc v4.16b,v4.16b
702 aese v5.16b,v21.16b
703 aesmc v5.16b,v5.16b
704 eor v19.16b,v19.16b,v7.16b
705 rev w9,w9
706 aese v17.16b,v21.16b
707 aesmc v17.16b,v17.16b
708 mov v0.s[3], w9
709 rev w10,w10
710 aese v4.16b,v22.16b
711 aesmc v4.16b,v4.16b
712 aese v5.16b,v22.16b
713 aesmc v5.16b,v5.16b
714 mov v1.s[3], w10
715 rev w12,w8
716 aese v17.16b,v22.16b
717 aesmc v17.16b,v17.16b
718 mov v18.s[3], w12
719 subs x2,x2,#3
720 aese v4.16b,v23.16b
721 aese v5.16b,v23.16b
722 aese v17.16b,v23.16b
723
724 eor v2.16b,v2.16b,v4.16b
725 ld1 {v16.4s},[x7],#16
726 st1 {v2.16b},[x1],#16
727 eor v3.16b,v3.16b,v5.16b
728 mov w6,w5
729 st1 {v3.16b},[x1],#16
730 eor v19.16b,v19.16b,v17.16b
731 ld1 {v17.4s},[x7],#16
732 st1 {v19.16b},[x1],#16
733 b.hs .Loop3x_ctr32
734
735 adds x2,x2,#3
736 b.eq .Lctr32_done
737 cmp x2,#1
738 mov x12,#16
739 csel x12,xzr,x12,eq
740
741.Lctr32_tail:
742 aese v0.16b,v16.16b
743 aesmc v0.16b,v0.16b
744 aese v1.16b,v16.16b
745 aesmc v1.16b,v1.16b
746 ld1 {v16.4s},[x7],#16
747 subs w6,w6,#2
748 aese v0.16b,v17.16b
749 aesmc v0.16b,v0.16b
750 aese v1.16b,v17.16b
751 aesmc v1.16b,v1.16b
752 ld1 {v17.4s},[x7],#16
753 b.gt .Lctr32_tail
754
755 aese v0.16b,v16.16b
756 aesmc v0.16b,v0.16b
757 aese v1.16b,v16.16b
758 aesmc v1.16b,v1.16b
759 aese v0.16b,v17.16b
760 aesmc v0.16b,v0.16b
761 aese v1.16b,v17.16b
762 aesmc v1.16b,v1.16b
763 ld1 {v2.16b},[x0],x12
764 aese v0.16b,v20.16b
765 aesmc v0.16b,v0.16b
766 aese v1.16b,v20.16b
767 aesmc v1.16b,v1.16b
768 ld1 {v3.16b},[x0]
769 aese v0.16b,v21.16b
770 aesmc v0.16b,v0.16b
771 aese v1.16b,v21.16b
772 aesmc v1.16b,v1.16b
773 eor v2.16b,v2.16b,v7.16b
774 aese v0.16b,v22.16b
775 aesmc v0.16b,v0.16b
776 aese v1.16b,v22.16b
777 aesmc v1.16b,v1.16b
778 eor v3.16b,v3.16b,v7.16b
779 aese v0.16b,v23.16b
780 aese v1.16b,v23.16b
781
782 cmp x2,#1
783 eor v2.16b,v2.16b,v0.16b
784 eor v3.16b,v3.16b,v1.16b
785 st1 {v2.16b},[x1],#16
786 b.eq .Lctr32_done
787 st1 {v3.16b},[x1]
788
789.Lctr32_done:
790 ldr x29,[sp],#16
791 ret
792.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
793.section .note.GNU-stack,"",%progbits
794