1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "secerr.h"
6 #include "rijndael.h"
7
8 #if ((defined(__clang__) || \
9 (defined(__GNUC__) && defined(__GNUC_MINOR__) && \
10 (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 8)))) && \
11 defined(IS_LITTLE_ENDIAN))
12
13 #ifndef __ARM_FEATURE_CRYPTO
14 #error "Compiler option is invalid"
15 #endif
16
17 #include <arm_neon.h>
18
19 SECStatus
arm_aes_encrypt_ecb_128(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)20 arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
21 unsigned int *outputLen,
22 unsigned int maxOutputLen,
23 const unsigned char *input,
24 unsigned int inputLen,
25 unsigned int blocksize)
26 {
27 #if !defined(HAVE_UNALIGNED_ACCESS)
28 pre_align unsigned char buf[16] post_align;
29 #endif
30 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
31 uint8x16_t key11;
32 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
33
34 if (!inputLen) {
35 return SECSuccess;
36 }
37
38 key1 = vld1q_u8(key);
39 key2 = vld1q_u8(key + 16);
40 key3 = vld1q_u8(key + 32);
41 key4 = vld1q_u8(key + 48);
42 key5 = vld1q_u8(key + 64);
43 key6 = vld1q_u8(key + 80);
44 key7 = vld1q_u8(key + 96);
45 key8 = vld1q_u8(key + 112);
46 key9 = vld1q_u8(key + 128);
47 key10 = vld1q_u8(key + 144);
48 key11 = vld1q_u8(key + 160);
49
50 while (inputLen > 0) {
51 uint8x16_t state;
52 #if defined(HAVE_UNALIGNED_ACCESS)
53 state = vld1q_u8(input);
54 #else
55 if ((uintptr_t)input & 0x7) {
56 memcpy(buf, input, 16);
57 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
58 } else {
59 state = vld1q_u8(__builtin_assume_aligned(input, 8));
60 }
61 #endif
62 input += 16;
63 inputLen -= 16;
64
65 /* Rounds */
66 state = vaeseq_u8(state, key1);
67 state = vaesmcq_u8(state);
68 state = vaeseq_u8(state, key2);
69 state = vaesmcq_u8(state);
70 state = vaeseq_u8(state, key3);
71 state = vaesmcq_u8(state);
72 state = vaeseq_u8(state, key4);
73 state = vaesmcq_u8(state);
74 state = vaeseq_u8(state, key5);
75 state = vaesmcq_u8(state);
76 state = vaeseq_u8(state, key6);
77 state = vaesmcq_u8(state);
78 state = vaeseq_u8(state, key7);
79 state = vaesmcq_u8(state);
80 state = vaeseq_u8(state, key8);
81 state = vaesmcq_u8(state);
82 state = vaeseq_u8(state, key9);
83 state = vaesmcq_u8(state);
84 state = vaeseq_u8(state, key10);
85 /* AddRoundKey */
86 state = veorq_u8(state, key11);
87
88 #if defined(HAVE_UNALIGNED_ACCESS)
89 vst1q_u8(output, state);
90 #else
91 if ((uintptr_t)output & 0x7) {
92 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
93 memcpy(output, buf, 16);
94 } else {
95 vst1q_u8(__builtin_assume_aligned(output, 8), state);
96 }
97 #endif
98 output += 16;
99 }
100
101 return SECSuccess;
102 }
103
104 SECStatus
arm_aes_decrypt_ecb_128(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)105 arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
106 unsigned int *outputLen,
107 unsigned int maxOutputLen,
108 const unsigned char *input,
109 unsigned int inputLen,
110 unsigned int blocksize)
111 {
112 #if !defined(HAVE_UNALIGNED_ACCESS)
113 pre_align unsigned char buf[16] post_align;
114 #endif
115 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
116 uint8x16_t key11;
117 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
118
119 if (inputLen == 0) {
120 return SECSuccess;
121 }
122
123 key1 = vld1q_u8(key);
124 key2 = vld1q_u8(key + 16);
125 key3 = vld1q_u8(key + 32);
126 key4 = vld1q_u8(key + 48);
127 key5 = vld1q_u8(key + 64);
128 key6 = vld1q_u8(key + 80);
129 key7 = vld1q_u8(key + 96);
130 key8 = vld1q_u8(key + 112);
131 key9 = vld1q_u8(key + 128);
132 key10 = vld1q_u8(key + 144);
133 key11 = vld1q_u8(key + 160);
134
135 while (inputLen > 0) {
136 uint8x16_t state;
137 #if defined(HAVE_UNALIGNED_ACCESS)
138 state = vld1q_u8(input);
139 #else
140 if ((uintptr_t)input & 0x7) {
141 memcpy(buf, input, 16);
142 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
143 } else {
144 state = vld1q_u8(__builtin_assume_aligned(input, 8));
145 }
146 #endif
147 input += 16;
148 inputLen -= 16;
149
150 /* Rounds */
151 state = vaesdq_u8(state, key11);
152 state = vaesimcq_u8(state);
153 state = vaesdq_u8(state, key10);
154 state = vaesimcq_u8(state);
155 state = vaesdq_u8(state, key9);
156 state = vaesimcq_u8(state);
157 state = vaesdq_u8(state, key8);
158 state = vaesimcq_u8(state);
159 state = vaesdq_u8(state, key7);
160 state = vaesimcq_u8(state);
161 state = vaesdq_u8(state, key6);
162 state = vaesimcq_u8(state);
163 state = vaesdq_u8(state, key5);
164 state = vaesimcq_u8(state);
165 state = vaesdq_u8(state, key4);
166 state = vaesimcq_u8(state);
167 state = vaesdq_u8(state, key3);
168 state = vaesimcq_u8(state);
169 state = vaesdq_u8(state, key2);
170 /* AddRoundKey */
171 state = veorq_u8(state, key1);
172
173 #if defined(HAVE_UNALIGNED_ACCESS)
174 vst1q_u8(output, state);
175 #else
176 if ((uintptr_t)output & 0x7) {
177 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
178 memcpy(output, buf, 16);
179 } else {
180 vst1q_u8(__builtin_assume_aligned(output, 8), state);
181 }
182 #endif
183 output += 16;
184 }
185
186 return SECSuccess;
187 }
188
189 SECStatus
arm_aes_encrypt_cbc_128(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)190 arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
191 unsigned int *outputLen,
192 unsigned int maxOutputLen,
193 const unsigned char *input,
194 unsigned int inputLen,
195 unsigned int blocksize)
196 {
197 #if !defined(HAVE_UNALIGNED_ACCESS)
198 pre_align unsigned char buf[16] post_align;
199 #endif
200 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
201 uint8x16_t key11;
202 uint8x16_t iv;
203 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
204
205 if (!inputLen) {
206 return SECSuccess;
207 }
208
209 /* iv */
210 iv = vld1q_u8(cx->iv);
211
212 /* expanedKey */
213 key1 = vld1q_u8(key);
214 key2 = vld1q_u8(key + 16);
215 key3 = vld1q_u8(key + 32);
216 key4 = vld1q_u8(key + 48);
217 key5 = vld1q_u8(key + 64);
218 key6 = vld1q_u8(key + 80);
219 key7 = vld1q_u8(key + 96);
220 key8 = vld1q_u8(key + 112);
221 key9 = vld1q_u8(key + 128);
222 key10 = vld1q_u8(key + 144);
223 key11 = vld1q_u8(key + 160);
224
225 while (inputLen > 0) {
226 uint8x16_t state;
227 #if defined(HAVE_UNALIGNED_ACCESS)
228 state = vld1q_u8(input);
229 #else
230 if ((uintptr_t)input & 0x7) {
231 memcpy(buf, input, 16);
232 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
233 } else {
234 state = vld1q_u8(__builtin_assume_aligned(input, 8));
235 }
236 #endif
237 input += 16;
238 inputLen -= 16;
239
240 state = veorq_u8(state, iv);
241
242 /* Rounds */
243 state = vaeseq_u8(state, key1);
244 state = vaesmcq_u8(state);
245 state = vaeseq_u8(state, key2);
246 state = vaesmcq_u8(state);
247 state = vaeseq_u8(state, key3);
248 state = vaesmcq_u8(state);
249 state = vaeseq_u8(state, key4);
250 state = vaesmcq_u8(state);
251 state = vaeseq_u8(state, key5);
252 state = vaesmcq_u8(state);
253 state = vaeseq_u8(state, key6);
254 state = vaesmcq_u8(state);
255 state = vaeseq_u8(state, key7);
256 state = vaesmcq_u8(state);
257 state = vaeseq_u8(state, key8);
258 state = vaesmcq_u8(state);
259 state = vaeseq_u8(state, key9);
260 state = vaesmcq_u8(state);
261 state = vaeseq_u8(state, key10);
262 /* AddRoundKey */
263 state = veorq_u8(state, key11);
264
265 #if defined(HAVE_UNALIGNED_ACCESS)
266 vst1q_u8(output, state);
267 #else
268 if ((uintptr_t)output & 0x7) {
269 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
270 memcpy(output, buf, 16);
271 } else {
272 vst1q_u8(__builtin_assume_aligned(output, 8), state);
273 }
274 #endif
275 output += 16;
276 iv = state;
277 }
278 vst1q_u8(cx->iv, iv);
279
280 return SECSuccess;
281 }
282
283 SECStatus
arm_aes_decrypt_cbc_128(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)284 arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
285 unsigned int *outputLen,
286 unsigned int maxOutputLen,
287 const unsigned char *input,
288 unsigned int inputLen,
289 unsigned int blocksize)
290 {
291 #if !defined(HAVE_UNALIGNED_ACCESS)
292 pre_align unsigned char buf[16] post_align;
293 #endif
294 uint8x16_t iv;
295 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
296 uint8x16_t key11;
297 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
298
299 if (!inputLen) {
300 return SECSuccess;
301 }
302
303 /* iv */
304 iv = vld1q_u8(cx->iv);
305
306 /* expanedKey */
307 key1 = vld1q_u8(key);
308 key2 = vld1q_u8(key + 16);
309 key3 = vld1q_u8(key + 32);
310 key4 = vld1q_u8(key + 48);
311 key5 = vld1q_u8(key + 64);
312 key6 = vld1q_u8(key + 80);
313 key7 = vld1q_u8(key + 96);
314 key8 = vld1q_u8(key + 112);
315 key9 = vld1q_u8(key + 128);
316 key10 = vld1q_u8(key + 144);
317 key11 = vld1q_u8(key + 160);
318
319 while (inputLen > 0) {
320 uint8x16_t state, old_state;
321 #if defined(HAVE_UNALIGNED_ACCESS)
322 state = vld1q_u8(input);
323 #else
324 if ((uintptr_t)input & 0x7) {
325 memcpy(buf, input, 16);
326 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
327 } else {
328 state = vld1q_u8(__builtin_assume_aligned(input, 8));
329 }
330 #endif
331 old_state = state;
332 input += 16;
333 inputLen -= 16;
334
335 /* Rounds */
336 state = vaesdq_u8(state, key11);
337 state = vaesimcq_u8(state);
338 state = vaesdq_u8(state, key10);
339 state = vaesimcq_u8(state);
340 state = vaesdq_u8(state, key9);
341 state = vaesimcq_u8(state);
342 state = vaesdq_u8(state, key8);
343 state = vaesimcq_u8(state);
344 state = vaesdq_u8(state, key7);
345 state = vaesimcq_u8(state);
346 state = vaesdq_u8(state, key6);
347 state = vaesimcq_u8(state);
348 state = vaesdq_u8(state, key5);
349 state = vaesimcq_u8(state);
350 state = vaesdq_u8(state, key4);
351 state = vaesimcq_u8(state);
352 state = vaesdq_u8(state, key3);
353 state = vaesimcq_u8(state);
354 state = vaesdq_u8(state, key2);
355 /* AddRoundKey */
356 state = veorq_u8(state, key1);
357
358 state = veorq_u8(state, iv);
359
360 #if defined(HAVE_UNALIGNED_ACCESS)
361 vst1q_u8(output, state);
362 #else
363 if ((uintptr_t)output & 0x7) {
364 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
365 memcpy(output, buf, 16);
366 } else {
367 vst1q_u8(__builtin_assume_aligned(output, 8), state);
368 }
369 #endif
370 output += 16;
371
372 iv = old_state;
373 }
374 vst1q_u8(cx->iv, iv);
375
376 return SECSuccess;
377 }
378
379 SECStatus
arm_aes_encrypt_ecb_192(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)380 arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
381 unsigned int *outputLen,
382 unsigned int maxOutputLen,
383 const unsigned char *input,
384 unsigned int inputLen,
385 unsigned int blocksize)
386 {
387 #if !defined(HAVE_UNALIGNED_ACCESS)
388 pre_align unsigned char buf[16] post_align;
389 #endif
390 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
391 uint8x16_t key11, key12, key13;
392 PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
393
394 if (!inputLen) {
395 return SECSuccess;
396 }
397
398 key1 = vld1q_u8(key);
399 key2 = vld1q_u8(key + 16);
400 key3 = vld1q_u8(key + 32);
401 key4 = vld1q_u8(key + 48);
402 key5 = vld1q_u8(key + 64);
403 key6 = vld1q_u8(key + 80);
404 key7 = vld1q_u8(key + 96);
405 key8 = vld1q_u8(key + 112);
406 key9 = vld1q_u8(key + 128);
407 key10 = vld1q_u8(key + 144);
408 key11 = vld1q_u8(key + 160);
409 key12 = vld1q_u8(key + 176);
410 key13 = vld1q_u8(key + 192);
411
412 while (inputLen > 0) {
413 uint8x16_t state;
414 #if defined(HAVE_UNALIGNED_ACCESS)
415 state = vld1q_u8(input);
416 #else
417 if ((uintptr_t)input & 0x7) {
418 memcpy(buf, input, 16);
419 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
420 } else {
421 state = vld1q_u8(__builtin_assume_aligned(input, 8));
422 }
423 #endif
424 input += 16;
425 inputLen -= 16;
426
427 /* Rounds */
428 state = vaeseq_u8(state, key1);
429 state = vaesmcq_u8(state);
430 state = vaeseq_u8(state, key2);
431 state = vaesmcq_u8(state);
432 state = vaeseq_u8(state, key3);
433 state = vaesmcq_u8(state);
434 state = vaeseq_u8(state, key4);
435 state = vaesmcq_u8(state);
436 state = vaeseq_u8(state, key5);
437 state = vaesmcq_u8(state);
438 state = vaeseq_u8(state, key6);
439 state = vaesmcq_u8(state);
440 state = vaeseq_u8(state, key7);
441 state = vaesmcq_u8(state);
442 state = vaeseq_u8(state, key8);
443 state = vaesmcq_u8(state);
444 state = vaeseq_u8(state, key9);
445 state = vaesmcq_u8(state);
446 state = vaeseq_u8(state, key10);
447 state = vaesmcq_u8(state);
448 state = vaeseq_u8(state, key11);
449 state = vaesmcq_u8(state);
450 state = vaeseq_u8(state, key12);
451 /* AddRoundKey */
452 state = veorq_u8(state, key13);
453
454 #if defined(HAVE_UNALIGNED_ACCESS)
455 vst1q_u8(output, state);
456 #else
457 if ((uintptr_t)output & 0x7) {
458 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
459 memcpy(output, buf, 16);
460 } else {
461 vst1q_u8(__builtin_assume_aligned(output, 8), state);
462 }
463 #endif
464 output += 16;
465 }
466
467 return SECSuccess;
468 }
469
470 SECStatus
arm_aes_decrypt_ecb_192(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)471 arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
472 unsigned int *outputLen,
473 unsigned int maxOutputLen,
474 const unsigned char *input,
475 unsigned int inputLen,
476 unsigned int blocksize)
477 {
478 #if !defined(HAVE_UNALIGNED_ACCESS)
479 pre_align unsigned char buf[16] post_align;
480 #endif
481 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
482 uint8x16_t key11, key12, key13;
483 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
484
485 if (!inputLen) {
486 return SECSuccess;
487 }
488
489 key1 = vld1q_u8(key);
490 key2 = vld1q_u8(key + 16);
491 key3 = vld1q_u8(key + 32);
492 key4 = vld1q_u8(key + 48);
493 key5 = vld1q_u8(key + 64);
494 key6 = vld1q_u8(key + 80);
495 key7 = vld1q_u8(key + 96);
496 key8 = vld1q_u8(key + 112);
497 key9 = vld1q_u8(key + 128);
498 key10 = vld1q_u8(key + 144);
499 key11 = vld1q_u8(key + 160);
500 key12 = vld1q_u8(key + 176);
501 key13 = vld1q_u8(key + 192);
502
503 while (inputLen > 0) {
504 uint8x16_t state;
505 #if defined(HAVE_UNALIGNED_ACCESS)
506 state = vld1q_u8(input);
507 #else
508 if ((uintptr_t)input & 0x7) {
509 memcpy(buf, input, 16);
510 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
511 } else {
512 state = vld1q_u8(__builtin_assume_aligned(input, 8));
513 }
514 #endif
515 input += 16;
516 inputLen -= 16;
517
518 /* Rounds */
519 state = vaesdq_u8(state, key13);
520 state = vaesimcq_u8(state);
521 state = vaesdq_u8(state, key12);
522 state = vaesimcq_u8(state);
523 state = vaesdq_u8(state, key11);
524 state = vaesimcq_u8(state);
525 state = vaesdq_u8(state, key10);
526 state = vaesimcq_u8(state);
527 state = vaesdq_u8(state, key9);
528 state = vaesimcq_u8(state);
529 state = vaesdq_u8(state, key8);
530 state = vaesimcq_u8(state);
531 state = vaesdq_u8(state, key7);
532 state = vaesimcq_u8(state);
533 state = vaesdq_u8(state, key6);
534 state = vaesimcq_u8(state);
535 state = vaesdq_u8(state, key5);
536 state = vaesimcq_u8(state);
537 state = vaesdq_u8(state, key4);
538 state = vaesimcq_u8(state);
539 state = vaesdq_u8(state, key3);
540 state = vaesimcq_u8(state);
541 state = vaesdq_u8(state, key2);
542 /* AddRoundKey */
543 state = veorq_u8(state, key1);
544
545 #if defined(HAVE_UNALIGNED_ACCESS)
546 vst1q_u8(output, state);
547 #else
548 if ((uintptr_t)output & 0x7) {
549 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
550 memcpy(output, buf, 16);
551 } else {
552 vst1q_u8(__builtin_assume_aligned(output, 8), state);
553 }
554 #endif
555 output += 16;
556 }
557
558 return SECSuccess;
559 }
560
561 SECStatus
arm_aes_encrypt_cbc_192(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)562 arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
563 unsigned int *outputLen,
564 unsigned int maxOutputLen,
565 const unsigned char *input,
566 unsigned int inputLen,
567 unsigned int blocksize)
568 {
569 #if !defined(HAVE_UNALIGNED_ACCESS)
570 pre_align unsigned char buf[16] post_align;
571 #endif
572 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
573 uint8x16_t key11, key12, key13;
574 uint8x16_t iv;
575 PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
576
577 if (!inputLen) {
578 return SECSuccess;
579 }
580
581 /* iv */
582 iv = vld1q_u8(cx->iv);
583
584 /* expanedKey */
585 key1 = vld1q_u8(key);
586 key2 = vld1q_u8(key + 16);
587 key3 = vld1q_u8(key + 32);
588 key4 = vld1q_u8(key + 48);
589 key5 = vld1q_u8(key + 64);
590 key6 = vld1q_u8(key + 80);
591 key7 = vld1q_u8(key + 96);
592 key8 = vld1q_u8(key + 112);
593 key9 = vld1q_u8(key + 128);
594 key10 = vld1q_u8(key + 144);
595 key11 = vld1q_u8(key + 160);
596 key12 = vld1q_u8(key + 176);
597 key13 = vld1q_u8(key + 192);
598
599 while (inputLen > 0) {
600 uint8x16_t state;
601 #if defined(HAVE_UNALIGNED_ACCESS)
602 state = vld1q_u8(input);
603 #else
604 if ((uintptr_t)input & 0x7) {
605 memcpy(buf, input, 16);
606 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
607 } else {
608 state = vld1q_u8(__builtin_assume_aligned(input, 8));
609 }
610 #endif
611 input += 16;
612 inputLen -= 16;
613
614 state = veorq_u8(state, iv);
615
616 /* Rounds */
617 state = vaeseq_u8(state, key1);
618 state = vaesmcq_u8(state);
619 state = vaeseq_u8(state, key2);
620 state = vaesmcq_u8(state);
621 state = vaeseq_u8(state, key3);
622 state = vaesmcq_u8(state);
623 state = vaeseq_u8(state, key4);
624 state = vaesmcq_u8(state);
625 state = vaeseq_u8(state, key5);
626 state = vaesmcq_u8(state);
627 state = vaeseq_u8(state, key6);
628 state = vaesmcq_u8(state);
629 state = vaeseq_u8(state, key7);
630 state = vaesmcq_u8(state);
631 state = vaeseq_u8(state, key8);
632 state = vaesmcq_u8(state);
633 state = vaeseq_u8(state, key9);
634 state = vaesmcq_u8(state);
635 state = vaeseq_u8(state, key10);
636 state = vaesmcq_u8(state);
637 state = vaeseq_u8(state, key11);
638 state = vaesmcq_u8(state);
639 state = vaeseq_u8(state, key12);
640 state = veorq_u8(state, key13);
641
642 #if defined(HAVE_UNALIGNED_ACCESS)
643 vst1q_u8(output, state);
644 #else
645 if ((uintptr_t)output & 0x7) {
646 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
647 memcpy(output, buf, 16);
648 } else {
649 vst1q_u8(__builtin_assume_aligned(output, 8), state);
650 }
651 #endif
652 output += 16;
653 iv = state;
654 }
655 vst1q_u8(cx->iv, iv);
656
657 return SECSuccess;
658 }
659
660 SECStatus
arm_aes_decrypt_cbc_192(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)661 arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
662 unsigned int *outputLen,
663 unsigned int maxOutputLen,
664 const unsigned char *input,
665 unsigned int inputLen,
666 unsigned int blocksize)
667 {
668 #if !defined(HAVE_UNALIGNED_ACCESS)
669 pre_align unsigned char buf[16] post_align;
670 #endif
671 uint8x16_t iv;
672 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
673 uint8x16_t key11, key12, key13;
674 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
675
676 if (!inputLen) {
677 return SECSuccess;
678 }
679
680 /* iv */
681 iv = vld1q_u8(cx->iv);
682
683 /* expanedKey */
684 key1 = vld1q_u8(key);
685 key2 = vld1q_u8(key + 16);
686 key3 = vld1q_u8(key + 32);
687 key4 = vld1q_u8(key + 48);
688 key5 = vld1q_u8(key + 64);
689 key6 = vld1q_u8(key + 80);
690 key7 = vld1q_u8(key + 96);
691 key8 = vld1q_u8(key + 112);
692 key9 = vld1q_u8(key + 128);
693 key10 = vld1q_u8(key + 144);
694 key11 = vld1q_u8(key + 160);
695 key12 = vld1q_u8(key + 176);
696 key13 = vld1q_u8(key + 192);
697
698 while (inputLen > 0) {
699 uint8x16_t state, old_state;
700 #if defined(HAVE_UNALIGNED_ACCESS)
701 state = vld1q_u8(input);
702 #else
703 if ((uintptr_t)input & 0x7) {
704 memcpy(buf, input, 16);
705 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
706 } else {
707 state = vld1q_u8(__builtin_assume_aligned(input, 8));
708 }
709 #endif
710 old_state = state;
711 input += 16;
712 inputLen -= 16;
713
714 /* Rounds */
715 state = vaesdq_u8(state, key13);
716 state = vaesimcq_u8(state);
717 state = vaesdq_u8(state, key12);
718 state = vaesimcq_u8(state);
719 state = vaesdq_u8(state, key11);
720 state = vaesimcq_u8(state);
721 state = vaesdq_u8(state, key10);
722 state = vaesimcq_u8(state);
723 state = vaesdq_u8(state, key9);
724 state = vaesimcq_u8(state);
725 state = vaesdq_u8(state, key8);
726 state = vaesimcq_u8(state);
727 state = vaesdq_u8(state, key7);
728 state = vaesimcq_u8(state);
729 state = vaesdq_u8(state, key6);
730 state = vaesimcq_u8(state);
731 state = vaesdq_u8(state, key5);
732 state = vaesimcq_u8(state);
733 state = vaesdq_u8(state, key4);
734 state = vaesimcq_u8(state);
735 state = vaesdq_u8(state, key3);
736 state = vaesimcq_u8(state);
737 state = vaesdq_u8(state, key2);
738 /* AddRoundKey */
739 state = veorq_u8(state, key1);
740
741 state = veorq_u8(state, iv);
742
743 #if defined(HAVE_UNALIGNED_ACCESS)
744 vst1q_u8(output, state);
745 #else
746 if ((uintptr_t)output & 0x7) {
747 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
748 memcpy(output, buf, 16);
749 } else {
750 vst1q_u8(__builtin_assume_aligned(output, 8), state);
751 }
752 #endif
753 output += 16;
754
755 iv = old_state;
756 }
757 vst1q_u8(cx->iv, iv);
758
759 return SECSuccess;
760 }
761
762 SECStatus
arm_aes_encrypt_ecb_256(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)763 arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
764 unsigned int *outputLen,
765 unsigned int maxOutputLen,
766 const unsigned char *input,
767 unsigned int inputLen,
768 unsigned int blocksize)
769 {
770 #if !defined(HAVE_UNALIGNED_ACCESS)
771 pre_align unsigned char buf[16] post_align;
772 #endif
773 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
774 uint8x16_t key11, key12, key13, key14, key15;
775 PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
776
777 if (inputLen == 0) {
778 return SECSuccess;
779 }
780
781 key1 = vld1q_u8(key);
782 key2 = vld1q_u8(key + 16);
783 key3 = vld1q_u8(key + 32);
784 key4 = vld1q_u8(key + 48);
785 key5 = vld1q_u8(key + 64);
786 key6 = vld1q_u8(key + 80);
787 key7 = vld1q_u8(key + 96);
788 key8 = vld1q_u8(key + 112);
789 key9 = vld1q_u8(key + 128);
790 key10 = vld1q_u8(key + 144);
791 key11 = vld1q_u8(key + 160);
792 key12 = vld1q_u8(key + 176);
793 key13 = vld1q_u8(key + 192);
794 key14 = vld1q_u8(key + 208);
795 key15 = vld1q_u8(key + 224);
796
797 while (inputLen > 0) {
798 uint8x16_t state;
799 #if defined(HAVE_UNALIGNED_ACCESS)
800 state = vld1q_u8(input);
801 #else
802 if ((uintptr_t)input & 0x7) {
803 memcpy(buf, input, 16);
804 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
805 } else {
806 state = vld1q_u8(__builtin_assume_aligned(input, 8));
807 }
808 #endif
809 input += 16;
810 inputLen -= 16;
811
812 /* Rounds */
813 state = vaeseq_u8(state, key1);
814 state = vaesmcq_u8(state);
815 state = vaeseq_u8(state, key2);
816 state = vaesmcq_u8(state);
817 state = vaeseq_u8(state, key3);
818 state = vaesmcq_u8(state);
819 state = vaeseq_u8(state, key4);
820 state = vaesmcq_u8(state);
821 state = vaeseq_u8(state, key5);
822 state = vaesmcq_u8(state);
823 state = vaeseq_u8(state, key6);
824 state = vaesmcq_u8(state);
825 state = vaeseq_u8(state, key7);
826 state = vaesmcq_u8(state);
827 state = vaeseq_u8(state, key8);
828 state = vaesmcq_u8(state);
829 state = vaeseq_u8(state, key9);
830 state = vaesmcq_u8(state);
831 state = vaeseq_u8(state, key10);
832 state = vaesmcq_u8(state);
833 state = vaeseq_u8(state, key11);
834 state = vaesmcq_u8(state);
835 state = vaeseq_u8(state, key12);
836 state = vaesmcq_u8(state);
837 state = vaeseq_u8(state, key13);
838 state = vaesmcq_u8(state);
839 state = vaeseq_u8(state, key14);
840 /* AddRoundKey */
841 state = veorq_u8(state, key15);
842
843 #if defined(HAVE_UNALIGNED_ACCESS)
844 vst1q_u8(output, state);
845 #else
846 if ((uintptr_t)output & 0x7) {
847 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
848 memcpy(output, buf, 16);
849 } else {
850 vst1q_u8(__builtin_assume_aligned(output, 8), state);
851 }
852 #endif
853 output += 16;
854 }
855 return SECSuccess;
856 }
857
858 SECStatus
arm_aes_decrypt_ecb_256(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)859 arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
860 unsigned int *outputLen,
861 unsigned int maxOutputLen,
862 const unsigned char *input,
863 unsigned int inputLen,
864 unsigned int blocksize)
865 {
866 #if !defined(HAVE_UNALIGNED_ACCESS)
867 pre_align unsigned char buf[16] post_align;
868 #endif
869 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
870 uint8x16_t key11, key12, key13, key14, key15;
871 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
872
873 if (!inputLen) {
874 return SECSuccess;
875 }
876
877 key1 = vld1q_u8(key);
878 key2 = vld1q_u8(key + 16);
879 key3 = vld1q_u8(key + 32);
880 key4 = vld1q_u8(key + 48);
881 key5 = vld1q_u8(key + 64);
882 key6 = vld1q_u8(key + 80);
883 key7 = vld1q_u8(key + 96);
884 key8 = vld1q_u8(key + 112);
885 key9 = vld1q_u8(key + 128);
886 key10 = vld1q_u8(key + 144);
887 key11 = vld1q_u8(key + 160);
888 key12 = vld1q_u8(key + 176);
889 key13 = vld1q_u8(key + 192);
890 key14 = vld1q_u8(key + 208);
891 key15 = vld1q_u8(key + 224);
892
893 while (inputLen > 0) {
894 uint8x16_t state;
895 #if defined(HAVE_UNALIGNED_ACCESS)
896 state = vld1q_u8(input);
897 #else
898 if ((uintptr_t)input & 0x7) {
899 memcpy(buf, input, 16);
900 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
901 } else {
902 state = vld1q_u8(__builtin_assume_aligned(input, 8));
903 }
904 #endif
905 input += 16;
906 inputLen -= 16;
907
908 /* Rounds */
909 state = vaesdq_u8(state, key15);
910 state = vaesimcq_u8(state);
911 state = vaesdq_u8(state, key14);
912 state = vaesimcq_u8(state);
913 state = vaesdq_u8(state, key13);
914 state = vaesimcq_u8(state);
915 state = vaesdq_u8(state, key12);
916 state = vaesimcq_u8(state);
917 state = vaesdq_u8(state, key11);
918 state = vaesimcq_u8(state);
919 state = vaesdq_u8(state, key10);
920 state = vaesimcq_u8(state);
921 state = vaesdq_u8(state, key9);
922 state = vaesimcq_u8(state);
923 state = vaesdq_u8(state, key8);
924 state = vaesimcq_u8(state);
925 state = vaesdq_u8(state, key7);
926 state = vaesimcq_u8(state);
927 state = vaesdq_u8(state, key6);
928 state = vaesimcq_u8(state);
929 state = vaesdq_u8(state, key5);
930 state = vaesimcq_u8(state);
931 state = vaesdq_u8(state, key4);
932 state = vaesimcq_u8(state);
933 state = vaesdq_u8(state, key3);
934 state = vaesimcq_u8(state);
935 state = vaesdq_u8(state, key2);
936 /* AddRoundKey */
937 state = veorq_u8(state, key1);
938
939 #if defined(HAVE_UNALIGNED_ACCESS)
940 vst1q_u8(output, state);
941 #else
942 if ((uintptr_t)output & 0x7) {
943 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
944 memcpy(output, buf, 16);
945 } else {
946 vst1q_u8(__builtin_assume_aligned(output, 8), state);
947 }
948 #endif
949 output += 16;
950 }
951
952 return SECSuccess;
953 }
954
955 SECStatus
arm_aes_encrypt_cbc_256(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)956 arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
957 unsigned int *outputLen,
958 unsigned int maxOutputLen,
959 const unsigned char *input,
960 unsigned int inputLen,
961 unsigned int blocksize)
962 {
963 #if !defined(HAVE_UNALIGNED_ACCESS)
964 pre_align unsigned char buf[16] post_align;
965 #endif
966 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
967 uint8x16_t key11, key12, key13, key14, key15;
968 uint8x16_t iv;
969 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
970
971 if (!inputLen) {
972 return SECSuccess;
973 }
974
975 /* iv */
976 iv = vld1q_u8(cx->iv);
977
978 /* expanedKey */
979 key1 = vld1q_u8(key);
980 key2 = vld1q_u8(key + 16);
981 key3 = vld1q_u8(key + 32);
982 key4 = vld1q_u8(key + 48);
983 key5 = vld1q_u8(key + 64);
984 key6 = vld1q_u8(key + 80);
985 key7 = vld1q_u8(key + 96);
986 key8 = vld1q_u8(key + 112);
987 key9 = vld1q_u8(key + 128);
988 key10 = vld1q_u8(key + 144);
989 key11 = vld1q_u8(key + 160);
990 key12 = vld1q_u8(key + 176);
991 key13 = vld1q_u8(key + 192);
992 key14 = vld1q_u8(key + 208);
993 key15 = vld1q_u8(key + 224);
994
995 while (inputLen > 0) {
996 uint8x16_t state;
997 #if defined(HAVE_UNALIGNED_ACCESS)
998 state = vld1q_u8(input);
999 #else
1000 if ((uintptr_t)input & 0x7) {
1001 memcpy(buf, input, 16);
1002 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
1003 } else {
1004 state = vld1q_u8(__builtin_assume_aligned(input, 8));
1005 }
1006 #endif
1007 input += 16;
1008 inputLen -= 16;
1009
1010 state = veorq_u8(state, iv);
1011
1012 /* Rounds */
1013 state = vaeseq_u8(state, key1);
1014 state = vaesmcq_u8(state);
1015 state = vaeseq_u8(state, key2);
1016 state = vaesmcq_u8(state);
1017 state = vaeseq_u8(state, key3);
1018 state = vaesmcq_u8(state);
1019 state = vaeseq_u8(state, key4);
1020 state = vaesmcq_u8(state);
1021 state = vaeseq_u8(state, key5);
1022 state = vaesmcq_u8(state);
1023 state = vaeseq_u8(state, key6);
1024 state = vaesmcq_u8(state);
1025 state = vaeseq_u8(state, key7);
1026 state = vaesmcq_u8(state);
1027 state = vaeseq_u8(state, key8);
1028 state = vaesmcq_u8(state);
1029 state = vaeseq_u8(state, key9);
1030 state = vaesmcq_u8(state);
1031 state = vaeseq_u8(state, key10);
1032 state = vaesmcq_u8(state);
1033 state = vaeseq_u8(state, key11);
1034 state = vaesmcq_u8(state);
1035 state = vaeseq_u8(state, key12);
1036 state = vaesmcq_u8(state);
1037 state = vaeseq_u8(state, key13);
1038 state = vaesmcq_u8(state);
1039 state = vaeseq_u8(state, key14);
1040 /* AddRoundKey */
1041 state = veorq_u8(state, key15);
1042
1043 #if defined(HAVE_UNALIGNED_ACCESS)
1044 vst1q_u8(output, state);
1045 #else
1046 if ((uintptr_t)output & 0x7) {
1047 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
1048 memcpy(output, buf, 16);
1049 } else {
1050 vst1q_u8(__builtin_assume_aligned(output, 8), state);
1051 }
1052 #endif
1053 output += 16;
1054 iv = state;
1055 }
1056 vst1q_u8(cx->iv, iv);
1057
1058 return SECSuccess;
1059 }
1060
1061 SECStatus
arm_aes_decrypt_cbc_256(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen,unsigned int blocksize)1062 arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
1063 unsigned int *outputLen,
1064 unsigned int maxOutputLen,
1065 const unsigned char *input,
1066 unsigned int inputLen,
1067 unsigned int blocksize)
1068 {
1069 #if !defined(HAVE_UNALIGNED_ACCESS)
1070 pre_align unsigned char buf[16] post_align;
1071 #endif
1072 uint8x16_t iv;
1073 uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
1074 uint8x16_t key11, key12, key13, key14, key15;
1075 const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
1076
1077 if (!inputLen) {
1078 return SECSuccess;
1079 }
1080
1081 /* iv */
1082 iv = vld1q_u8(cx->iv);
1083
1084 /* expanedKey */
1085 key1 = vld1q_u8(key);
1086 key2 = vld1q_u8(key + 16);
1087 key3 = vld1q_u8(key + 32);
1088 key4 = vld1q_u8(key + 48);
1089 key5 = vld1q_u8(key + 64);
1090 key6 = vld1q_u8(key + 80);
1091 key7 = vld1q_u8(key + 96);
1092 key8 = vld1q_u8(key + 112);
1093 key9 = vld1q_u8(key + 128);
1094 key10 = vld1q_u8(key + 144);
1095 key11 = vld1q_u8(key + 160);
1096 key12 = vld1q_u8(key + 176);
1097 key13 = vld1q_u8(key + 192);
1098 key14 = vld1q_u8(key + 208);
1099 key15 = vld1q_u8(key + 224);
1100
1101 while (inputLen > 0) {
1102 uint8x16_t state, old_state;
1103 #if defined(HAVE_UNALIGNED_ACCESS)
1104 state = vld1q_u8(input);
1105 #else
1106 if ((uintptr_t)input & 0x7) {
1107 memcpy(buf, input, 16);
1108 state = vld1q_u8(__builtin_assume_aligned(buf, 16));
1109 } else {
1110 state = vld1q_u8(__builtin_assume_aligned(input, 8));
1111 }
1112 #endif
1113 old_state = state;
1114 input += 16;
1115 inputLen -= 16;
1116
1117 /* Rounds */
1118 state = vaesdq_u8(state, key15);
1119 state = vaesimcq_u8(state);
1120 state = vaesdq_u8(state, key14);
1121 state = vaesimcq_u8(state);
1122 state = vaesdq_u8(state, key13);
1123 state = vaesimcq_u8(state);
1124 state = vaesdq_u8(state, key12);
1125 state = vaesimcq_u8(state);
1126 state = vaesdq_u8(state, key11);
1127 state = vaesimcq_u8(state);
1128 state = vaesdq_u8(state, key10);
1129 state = vaesimcq_u8(state);
1130 state = vaesdq_u8(state, key9);
1131 state = vaesimcq_u8(state);
1132 state = vaesdq_u8(state, key8);
1133 state = vaesimcq_u8(state);
1134 state = vaesdq_u8(state, key7);
1135 state = vaesimcq_u8(state);
1136 state = vaesdq_u8(state, key6);
1137 state = vaesimcq_u8(state);
1138 state = vaesdq_u8(state, key5);
1139 state = vaesimcq_u8(state);
1140 state = vaesdq_u8(state, key4);
1141 state = vaesimcq_u8(state);
1142 state = vaesdq_u8(state, key3);
1143 state = vaesimcq_u8(state);
1144 state = vaesdq_u8(state, key2);
1145 /* AddRoundKey */
1146 state = veorq_u8(state, key1);
1147
1148 state = veorq_u8(state, iv);
1149
1150 #if defined(HAVE_UNALIGNED_ACCESS)
1151 vst1q_u8(output, state);
1152 #else
1153 if ((uintptr_t)output & 0x7) {
1154 vst1q_u8(__builtin_assume_aligned(buf, 16), state);
1155 memcpy(output, buf, 16);
1156 } else {
1157 vst1q_u8(__builtin_assume_aligned(output, 8), state);
1158 }
1159 #endif
1160 output += 16;
1161
1162 iv = old_state;
1163 }
1164 vst1q_u8(cx->iv, iv);
1165
1166 return SECSuccess;
1167 }
1168
1169 #endif
1170