1 /* armv8-aes.c
2 *
3 * Copyright (C) 2006-2021 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22
23 /*
24 * There are two versions one for 64 (Aarch64) and one for 32 bit (Aarch32).
25 * If changing one check the other.
26 */
27
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32
33 #include <wolfssl/wolfcrypt/settings.h>
34
35 #if !defined(NO_AES) && defined(WOLFSSL_ARMASM)
36
37 #ifdef HAVE_FIPS
38 #undef HAVE_FIPS
39 #endif
40
41 #include <wolfssl/wolfcrypt/aes.h>
42 #include <wolfssl/wolfcrypt/error-crypt.h>
43 #include <wolfssl/wolfcrypt/logging.h>
44 #ifdef NO_INLINE
45 #include <wolfssl/wolfcrypt/misc.h>
46 #else
47 #define WOLFSSL_MISC_INCLUDED
48 #include <wolfcrypt/src/misc.c>
49 #endif
50
51 #ifdef _MSC_VER
52 /* 4127 warning constant while(1) */
53 #pragma warning(disable: 4127)
54 #endif
55
56
57 static const byte rcon[] = {
58 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,0x1B, 0x36
59 /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
60 };
61
62 /* get table value from hardware */
63 #ifdef __aarch64__
64 #define SBOX(x) \
65 do { \
66 __asm__ volatile ( \
67 "DUP v1.4s, %w[in] \n" \
68 "MOVI v0.16b, #0 \n" \
69 "AESE v0.16b, v1.16b \n" \
70 "UMOV %w[out], v0.s[0] \n" \
71 : [out] "=r"((x)) \
72 : [in] "r" ((x)) \
73 : "cc", "memory", "v0", "v1"\
74 ); \
75 } while(0)
76
77 #define IMIX(x) \
78 do { \
79 __asm__ volatile ( \
80 "LD1 {v0.16b}, [%[in]] \n" \
81 "AESIMC v0.16b, v0.16b \n" \
82 "ST1 {v0.16b}, [%[out]]\n" \
83 : [out] "=r" ((x)) \
84 : [in] "0" ((x)) \
85 : "cc", "memory", "v0" \
86 ); \
87 } while(0)
88 #else /* if not defined __aarch64__ then use 32 bit version */
89 #define SBOX(x) \
90 do { \
91 __asm__ volatile ( \
92 "VDUP.32 q1, %[in] \n" \
93 "VMOV.i32 q0, #0 \n" \
94 "AESE.8 q0, q1 \n" \
95 "VMOV.32 %[out], d0[0] \n" \
96 : [out] "=r"((x)) \
97 : [in] "r" ((x)) \
98 : "cc", "memory", "q0", "q1"\
99 ); \
100 } while(0)
101
102 #define IMIX(x) \
103 do { \
104 __asm__ volatile ( \
105 "VLD1.32 {q0}, [%[in]] \n" \
106 "AESIMC.8 q0, q0 \n" \
107 "VST1.32 {q0}, [%[out]] \n" \
108 : [out] "=r" ((x)) \
109 : [in] "0" ((x)) \
110 : "cc", "memory", "q0" \
111 ); \
112 } while(0)
113 #endif /* aarch64 */
114
115
116 #ifdef HAVE_AESGCM
117
IncrementGcmCounter(byte * inOutCtr)118 static WC_INLINE void IncrementGcmCounter(byte* inOutCtr)
119 {
120 int i;
121
122 /* in network byte order so start at end and work back */
123 for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) {
124 if (++inOutCtr[i]) /* we're done unless we overflow */
125 return;
126 }
127 }
128
129
FlattenSzInBits(byte * buf,word32 sz)130 static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz)
131 {
132 /* Multiply the sz by 8 */
133 word32 szHi = (sz >> (8*sizeof(sz) - 3));
134 sz <<= 3;
135
136 /* copy over the words of the sz into the destination buffer */
137 buf[0] = (szHi >> 24) & 0xff;
138 buf[1] = (szHi >> 16) & 0xff;
139 buf[2] = (szHi >> 8) & 0xff;
140 buf[3] = szHi & 0xff;
141 buf[4] = (sz >> 24) & 0xff;
142 buf[5] = (sz >> 16) & 0xff;
143 buf[6] = (sz >> 8) & 0xff;
144 buf[7] = sz & 0xff;
145 }
146
147 #endif /* HAVE_AESGCM */
148
149 /* Similar to wolfSSL software implementation of expanding the AES key.
150 * Changed out the locations of where table look ups where made to
151 * use hardware instruction. Also altered decryption key to match. */
wc_AesSetKey(Aes * aes,const byte * userKey,word32 keylen,const byte * iv,int dir)152 int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
153 const byte* iv, int dir)
154 {
155 word32 temp;
156 word32 *rk;
157 unsigned int i = 0;
158
159 #if defined(AES_MAX_KEY_SIZE)
160 const word32 max_key_len = (AES_MAX_KEY_SIZE / 8);
161 #endif
162
163 if (!((keylen == 16) || (keylen == 24) || (keylen == 32)) ||
164 aes == NULL || userKey == NULL)
165 return BAD_FUNC_ARG;
166
167 rk = aes->key;
168 #if defined(AES_MAX_KEY_SIZE)
169 /* Check key length */
170 if (keylen > max_key_len) {
171 return BAD_FUNC_ARG;
172 }
173 #endif
174
175 #ifdef WOLFSSL_AES_COUNTER
176 aes->left = 0;
177 #endif /* WOLFSSL_AES_COUNTER */
178
179 aes->keylen = keylen;
180 aes->rounds = keylen/4 + 6;
181 XMEMCPY(rk, userKey, keylen);
182
183 switch(keylen)
184 {
185 #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \
186 defined(WOLFSSL_AES_128)
187 case 16:
188 while (1)
189 {
190 temp = rk[3];
191 SBOX(temp);
192 temp = rotrFixed(temp, 8);
193 rk[4] = rk[0] ^ temp ^ rcon[i];
194 rk[5] = rk[4] ^ rk[1];
195 rk[6] = rk[5] ^ rk[2];
196 rk[7] = rk[6] ^ rk[3];
197 if (++i == 10)
198 break;
199 rk += 4;
200 }
201 break;
202 #endif /* 128 */
203
204 #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192 && \
205 defined(WOLFSSL_AES_192)
206 case 24:
207 /* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */
208 while (1)
209 {
210 temp = rk[5];
211 SBOX(temp);
212 temp = rotrFixed(temp, 8);
213 rk[ 6] = rk[ 0] ^ temp ^ rcon[i];
214 rk[ 7] = rk[ 1] ^ rk[ 6];
215 rk[ 8] = rk[ 2] ^ rk[ 7];
216 rk[ 9] = rk[ 3] ^ rk[ 8];
217 if (++i == 8)
218 break;
219 rk[10] = rk[ 4] ^ rk[ 9];
220 rk[11] = rk[ 5] ^ rk[10];
221 rk += 6;
222 }
223 break;
224 #endif /* 192 */
225
226 #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256 && \
227 defined(WOLFSSL_AES_256)
228 case 32:
229 while (1)
230 {
231 temp = rk[7];
232 SBOX(temp);
233 temp = rotrFixed(temp, 8);
234 rk[8] = rk[0] ^ temp ^ rcon[i];
235 rk[ 9] = rk[ 1] ^ rk[ 8];
236 rk[10] = rk[ 2] ^ rk[ 9];
237 rk[11] = rk[ 3] ^ rk[10];
238 if (++i == 7)
239 break;
240 temp = rk[11];
241 SBOX(temp);
242 rk[12] = rk[ 4] ^ temp;
243 rk[13] = rk[ 5] ^ rk[12];
244 rk[14] = rk[ 6] ^ rk[13];
245 rk[15] = rk[ 7] ^ rk[14];
246
247 rk += 8;
248 }
249 break;
250 #endif /* 256 */
251
252 default:
253 return BAD_FUNC_ARG;
254 }
255
256 if (dir == AES_DECRYPTION)
257 {
258 #ifdef HAVE_AES_DECRYPT
259 unsigned int j;
260 rk = aes->key;
261
262 /* invert the order of the round keys: */
263 for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) {
264 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
265 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
266 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
267 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
268 }
269 /* apply the inverse MixColumn transform to all round keys but the
270 first and the last: */
271 for (i = 1; i < aes->rounds; i++) {
272 rk += 4;
273 IMIX(rk);
274 }
275 #else
276 WOLFSSL_MSG("AES Decryption not compiled in");
277 return BAD_FUNC_ARG;
278 #endif /* HAVE_AES_DECRYPT */
279 }
280
281 return wc_AesSetIV(aes, iv);
282 }
283
284 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
wc_AesSetKeyDirect(Aes * aes,const byte * userKey,word32 keylen,const byte * iv,int dir)285 int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
286 const byte* iv, int dir)
287 {
288 return wc_AesSetKey(aes, userKey, keylen, iv, dir);
289 }
290 #endif
291
292 /* wc_AesSetIV is shared between software and hardware */
wc_AesSetIV(Aes * aes,const byte * iv)293 int wc_AesSetIV(Aes* aes, const byte* iv)
294 {
295 if (aes == NULL)
296 return BAD_FUNC_ARG;
297
298 if (iv)
299 XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
300 else
301 XMEMSET(aes->reg, 0, AES_BLOCK_SIZE);
302
303 return 0;
304 }
305
306
307 #ifdef __aarch64__
308 /* AES CCM/GCM use encrypt direct but not decrypt */
309 #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \
310 defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
wc_AesEncrypt(Aes * aes,const byte * inBlock,byte * outBlock)311 static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
312 {
313 word32* keyPt = aes->key;
314
315 /*
316 AESE exor's input with round key
317 shift rows of exor'ed result
318 sub bytes for shifted rows
319 */
320
321 __asm__ __volatile__ (
322 "LD1 {v0.16b}, [%[CtrIn]] \n"
323 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
324
325 "AESE v0.16b, v1.16b \n"
326 "AESMC v0.16b, v0.16b \n"
327 "AESE v0.16b, v2.16b \n"
328 "AESMC v0.16b, v0.16b \n"
329 "AESE v0.16b, v3.16b \n"
330 "AESMC v0.16b, v0.16b \n"
331 "AESE v0.16b, v4.16b \n"
332 "AESMC v0.16b, v0.16b \n"
333
334 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
335 "AESE v0.16b, v1.16b \n"
336 "AESMC v0.16b, v0.16b \n"
337 "AESE v0.16b, v2.16b \n"
338 "AESMC v0.16b, v0.16b \n"
339 "AESE v0.16b, v3.16b \n"
340 "AESMC v0.16b, v0.16b \n"
341 "AESE v0.16b, v4.16b \n"
342 "AESMC v0.16b, v0.16b \n"
343
344 "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n"
345 "AESE v0.16b, v1.16b \n"
346 "AESMC v0.16b, v0.16b \n"
347 "AESE v0.16b, v2.16b \n"
348
349 "#subtract rounds done so far and see if should continue\n"
350 "MOV w12, %w[R] \n"
351 "SUB w12, w12, #10 \n"
352 "CBZ w12, 1f \n"
353 "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n"
354 "AESMC v0.16b, v0.16b \n"
355 "AESE v0.16b, v1.16b \n"
356 "AESMC v0.16b, v0.16b \n"
357 "AESE v0.16b, v2.16b \n"
358
359 "SUB w12, w12, #2 \n"
360 "CBZ w12, 1f \n"
361 "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n"
362 "AESMC v0.16b, v0.16b \n"
363 "AESE v0.16b, v1.16b \n"
364 "AESMC v0.16b, v0.16b \n"
365 "AESE v0.16b, v2.16b \n"
366
367 "#Final AddRoundKey then store result \n"
368 "1: \n"
369 "LD1 {v1.2d}, [%[Key]], #16 \n"
370 "EOR v0.16b, v0.16b, v1.16b \n"
371 "ST1 {v0.16b}, [%[CtrOut]] \n"
372
373 :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds),
374 "=r" (inBlock)
375 :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds),
376 [CtrIn] "3" (inBlock)
377 : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4"
378 );
379
380 return 0;
381 }
382 #endif /* AES_GCM, AES_CCM, DIRECT or COUNTER */
383 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
384 #ifdef HAVE_AES_DECRYPT
wc_AesDecrypt(Aes * aes,const byte * inBlock,byte * outBlock)385 static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
386 {
387 word32* keyPt = aes->key;
388
389 /*
390 AESE exor's input with round key
391 shift rows of exor'ed result
392 sub bytes for shifted rows
393 */
394
395 __asm__ __volatile__ (
396 "LD1 {v0.16b}, [%[CtrIn]] \n"
397 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
398
399 "AESD v0.16b, v1.16b \n"
400 "AESIMC v0.16b, v0.16b \n"
401 "AESD v0.16b, v2.16b \n"
402 "AESIMC v0.16b, v0.16b \n"
403 "AESD v0.16b, v3.16b \n"
404 "AESIMC v0.16b, v0.16b \n"
405 "AESD v0.16b, v4.16b \n"
406 "AESIMC v0.16b, v0.16b \n"
407
408 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
409 "AESD v0.16b, v1.16b \n"
410 "AESIMC v0.16b, v0.16b \n"
411 "AESD v0.16b, v2.16b \n"
412 "AESIMC v0.16b, v0.16b \n"
413 "AESD v0.16b, v3.16b \n"
414 "AESIMC v0.16b, v0.16b \n"
415 "AESD v0.16b, v4.16b \n"
416 "AESIMC v0.16b, v0.16b \n"
417
418 "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n"
419 "AESD v0.16b, v1.16b \n"
420 "AESIMC v0.16b, v0.16b \n"
421 "AESD v0.16b, v2.16b \n"
422
423 "#subtract rounds done so far and see if should continue\n"
424 "MOV w12, %w[R] \n"
425 "SUB w12, w12, #10 \n"
426 "CBZ w12, 1f \n"
427 "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n"
428 "AESIMC v0.16b, v0.16b \n"
429 "AESD v0.16b, v1.16b \n"
430 "AESIMC v0.16b, v0.16b \n"
431 "AESD v0.16b, v2.16b \n"
432
433 "SUB w12, w12, #2 \n"
434 "CBZ w12, 1f \n"
435 "LD1 {v1.2d-v2.2d}, [%[Key]], #32 \n"
436 "AESIMC v0.16b, v0.16b \n"
437 "AESD v0.16b, v1.16b \n"
438 "AESIMC v0.16b, v0.16b \n"
439 "AESD v0.16b, v2.16b \n"
440
441 "#Final AddRoundKey then store result \n"
442 "1: \n"
443 "LD1 {v1.2d}, [%[Key]], #16 \n"
444 "EOR v0.16b, v0.16b, v1.16b \n"
445 "ST1 {v0.4s}, [%[CtrOut]] \n"
446
447 :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds),
448 "=r" (inBlock)
449 :[Key] "1" (aes->key), "0" (outBlock), [R] "2" (aes->rounds),
450 [CtrIn] "3" (inBlock)
451 : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4"
452 );
453
454 return 0;
455 }
456 #endif /* HAVE_AES_DECRYPT */
457 #endif /* DIRECT or COUNTER */
458
459 /* AES-CBC */
460 #ifdef HAVE_AES_CBC
wc_AesCbcEncrypt(Aes * aes,byte * out,const byte * in,word32 sz)461 int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
462 {
463 word32 numBlocks = sz / AES_BLOCK_SIZE;
464
465 if (aes == NULL || out == NULL || in == NULL) {
466 return BAD_FUNC_ARG;
467 }
468
469 if (sz == 0) {
470 return 0;
471 }
472
473 #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
474 if (sz % AES_BLOCK_SIZE) {
475 return BAD_LENGTH_E;
476 }
477 #endif
478
479 /* do as many block size ops as possible */
480 if (numBlocks > 0) {
481 word32* key = aes->key;
482 word32* reg = aes->reg;
483 /*
484 AESE exor's input with round key
485 shift rows of exor'ed result
486 sub bytes for shifted rows
487
488 note: grouping AESE & AESMC together as pairs reduces latency
489 */
490 switch(aes->rounds) {
491 #ifdef WOLFSSL_AES_128
492 case 10: /* AES 128 BLOCK */
493 __asm__ __volatile__ (
494 "MOV w11, %w[blocks] \n"
495 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
496 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
497 "LD1 {v9.2d-v11.2d},[%[Key]], #48 \n"
498 "LD1 {v0.2d}, [%[reg]] \n"
499
500 "LD1 {v12.2d}, [%[input]], #16 \n"
501 "1:\n"
502 "#CBC operations, xorbuf in with current aes->reg \n"
503 "EOR v0.16b, v0.16b, v12.16b \n"
504 "AESE v0.16b, v1.16b \n"
505 "AESMC v0.16b, v0.16b \n"
506 "AESE v0.16b, v2.16b \n"
507 "AESMC v0.16b, v0.16b \n"
508 "AESE v0.16b, v3.16b \n"
509 "AESMC v0.16b, v0.16b \n"
510 "AESE v0.16b, v4.16b \n"
511 "AESMC v0.16b, v0.16b \n"
512 "AESE v0.16b, v5.16b \n"
513 "AESMC v0.16b, v0.16b \n"
514 "AESE v0.16b, v6.16b \n"
515 "AESMC v0.16b, v0.16b \n"
516 "AESE v0.16b, v7.16b \n"
517 "AESMC v0.16b, v0.16b \n"
518 "AESE v0.16b, v8.16b \n"
519 "AESMC v0.16b, v0.16b \n"
520 "AESE v0.16b, v9.16b \n"
521 "AESMC v0.16b, v0.16b \n"
522 "AESE v0.16b, v10.16b \n"
523 "SUB w11, w11, #1 \n"
524 "EOR v0.16b, v0.16b, v11.16b \n"
525 "ST1 {v0.2d}, [%[out]], #16 \n"
526
527 "CBZ w11, 2f \n"
528 "LD1 {v12.2d}, [%[input]], #16 \n"
529 "B 1b \n"
530
531 "2:\n"
532 "#store current counter value at the end \n"
533 "ST1 {v0.2d}, [%[regOut]] \n"
534
535 :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
536 :"0" (out), [Key] "r" (key), [input] "2" (in),
537 [blocks] "r" (numBlocks), [reg] "1" (reg)
538 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
539 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13"
540 );
541 break;
542 #endif /* WOLFSSL_AES_128 */
543 #ifdef WOLFSSL_AES_192
544 case 12: /* AES 192 BLOCK */
545 __asm__ __volatile__ (
546 "MOV w11, %w[blocks] \n"
547 "LD1 {v1.2d-v4.2d}, %[Key], #64 \n"
548 "LD1 {v5.2d-v8.2d}, %[Key], #64 \n"
549 "LD1 {v9.2d-v12.2d},%[Key], #64 \n"
550 "LD1 {v13.2d}, %[Key], #16 \n"
551 "LD1 {v0.2d}, %[reg] \n"
552
553 "LD1 {v14.2d}, [%[input]], #16 \n"
554 "1:\n"
555 "#CBC operations, xorbuf in with current aes->reg \n"
556 "EOR v0.16b, v0.16b, v14.16b \n"
557 "AESE v0.16b, v1.16b \n"
558 "AESMC v0.16b, v0.16b \n"
559 "AESE v0.16b, v2.16b \n"
560 "AESMC v0.16b, v0.16b \n"
561 "AESE v0.16b, v3.16b \n"
562 "AESMC v0.16b, v0.16b \n"
563 "AESE v0.16b, v4.16b \n"
564 "AESMC v0.16b, v0.16b \n"
565 "AESE v0.16b, v5.16b \n"
566 "AESMC v0.16b, v0.16b \n"
567 "AESE v0.16b, v6.16b \n"
568 "AESMC v0.16b, v0.16b \n"
569 "AESE v0.16b, v7.16b \n"
570 "AESMC v0.16b, v0.16b \n"
571 "AESE v0.16b, v8.16b \n"
572 "AESMC v0.16b, v0.16b \n"
573 "AESE v0.16b, v9.16b \n"
574 "AESMC v0.16b, v0.16b \n"
575 "AESE v0.16b, v10.16b \n"
576 "AESMC v0.16b, v0.16b \n"
577 "AESE v0.16b, v11.16b \n"
578 "AESMC v0.16b, v0.16b \n"
579 "AESE v0.16b, v12.16b \n"
580 "EOR v0.16b, v0.16b, v13.16b \n"
581 "SUB w11, w11, #1 \n"
582 "ST1 {v0.2d}, [%[out]], #16 \n"
583
584 "CBZ w11, 2f \n"
585 "LD1 {v14.2d}, [%[input]], #16\n"
586 "B 1b \n"
587
588 "2:\n"
589 "#store current counter value at the end \n"
590 "ST1 {v0.2d}, %[regOut] \n"
591
592
593 :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in)
594 :"0" (out), [Key] "m" (aes->key), [input] "2" (in),
595 [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
596 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
597 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
598 );
599 break;
600 #endif /* WOLFSSL_AES_192*/
601 #ifdef WOLFSSL_AES_256
602 case 14: /* AES 256 BLOCK */
603 __asm__ __volatile__ (
604 "MOV w11, %w[blocks] \n"
605 "LD1 {v1.2d-v4.2d}, %[Key], #64 \n"
606
607 "LD1 {v5.2d-v8.2d}, %[Key], #64 \n"
608 "LD1 {v9.2d-v12.2d}, %[Key], #64 \n"
609 "LD1 {v13.2d-v15.2d}, %[Key], #48 \n"
610 "LD1 {v0.2d}, %[reg] \n"
611
612 "LD1 {v16.2d}, [%[input]], #16 \n"
613 "1: \n"
614 "#CBC operations, xorbuf in with current aes->reg \n"
615 "EOR v0.16b, v0.16b, v16.16b \n"
616 "AESE v0.16b, v1.16b \n"
617 "AESMC v0.16b, v0.16b \n"
618 "AESE v0.16b, v2.16b \n"
619 "AESMC v0.16b, v0.16b \n"
620 "AESE v0.16b, v3.16b \n"
621 "AESMC v0.16b, v0.16b \n"
622 "AESE v0.16b, v4.16b \n"
623 "AESMC v0.16b, v0.16b \n"
624 "AESE v0.16b, v5.16b \n"
625 "AESMC v0.16b, v0.16b \n"
626 "AESE v0.16b, v6.16b \n"
627 "AESMC v0.16b, v0.16b \n"
628 "AESE v0.16b, v7.16b \n"
629 "AESMC v0.16b, v0.16b \n"
630 "AESE v0.16b, v8.16b \n"
631 "AESMC v0.16b, v0.16b \n"
632 "AESE v0.16b, v9.16b \n"
633 "AESMC v0.16b, v0.16b \n"
634 "AESE v0.16b, v10.16b \n"
635 "AESMC v0.16b, v0.16b \n"
636 "AESE v0.16b, v11.16b \n"
637 "AESMC v0.16b, v0.16b \n"
638 "AESE v0.16b, v12.16b \n"
639 "AESMC v0.16b, v0.16b \n"
640 "AESE v0.16b, v13.16b \n"
641 "AESMC v0.16b, v0.16b \n"
642 "AESE v0.16b, v14.16b \n"
643 "EOR v0.16b, v0.16b, v15.16b \n"
644 "SUB w11, w11, #1 \n"
645 "ST1 {v0.2d}, [%[out]], #16 \n"
646
647 "CBZ w11, 2f \n"
648 "LD1 {v16.2d}, [%[input]], #16 \n"
649 "B 1b \n"
650
651 "2: \n"
652 "#store current counter value at the end \n"
653 "ST1 {v0.2d}, %[regOut] \n"
654
655
656 :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in)
657 :"0" (out), [Key] "m" (aes->key), [input] "2" (in),
658 [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
659 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
660 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15",
661 "v16"
662 );
663 break;
664 #endif /* WOLFSSL_AES_256 */
665 default:
666 WOLFSSL_MSG("Bad AES-CBC round value");
667 return BAD_FUNC_ARG;
668 }
669 }
670
671 return 0;
672 }
673
674 #ifdef HAVE_AES_DECRYPT
wc_AesCbcDecrypt(Aes * aes,byte * out,const byte * in,word32 sz)675 int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
676 {
677 word32 numBlocks = sz / AES_BLOCK_SIZE;
678
679 if (aes == NULL || out == NULL || in == NULL) {
680 return BAD_FUNC_ARG;
681 }
682
683 if (sz == 0) {
684 return 0;
685 }
686
687 if (sz % AES_BLOCK_SIZE) {
688 #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
689 return BAD_LENGTH_E;
690 #else
691 return BAD_FUNC_ARG;
692 #endif
693 }
694
695 /* do as many block size ops as possible */
696 if (numBlocks > 0) {
697 word32* key = aes->key;
698 word32* reg = aes->reg;
699
700 switch(aes->rounds) {
701 #ifdef WOLFSSL_AES_128
702 case 10: /* AES 128 BLOCK */
703 __asm__ __volatile__ (
704 "MOV w11, %w[blocks] \n"
705 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
706 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
707 "LD1 {v9.2d-v11.2d},[%[Key]], #48 \n"
708 "LD1 {v13.2d}, [%[reg]] \n"
709
710 "1:\n"
711 "LD1 {v0.2d}, [%[input]], #16 \n"
712 "MOV v12.16b, v0.16b \n"
713 "AESD v0.16b, v1.16b \n"
714 "AESIMC v0.16b, v0.16b \n"
715 "AESD v0.16b, v2.16b \n"
716 "AESIMC v0.16b, v0.16b \n"
717 "AESD v0.16b, v3.16b \n"
718 "AESIMC v0.16b, v0.16b \n"
719 "AESD v0.16b, v4.16b \n"
720 "AESIMC v0.16b, v0.16b \n"
721 "AESD v0.16b, v5.16b \n"
722 "AESIMC v0.16b, v0.16b \n"
723 "AESD v0.16b, v6.16b \n"
724 "AESIMC v0.16b, v0.16b \n"
725 "AESD v0.16b, v7.16b \n"
726 "AESIMC v0.16b, v0.16b \n"
727 "AESD v0.16b, v8.16b \n"
728 "AESIMC v0.16b, v0.16b \n"
729 "AESD v0.16b, v9.16b \n"
730 "AESIMC v0.16b, v0.16b \n"
731 "AESD v0.16b, v10.16b \n"
732 "EOR v0.16b, v0.16b, v11.16b \n"
733
734 "EOR v0.16b, v0.16b, v13.16b \n"
735 "SUB w11, w11, #1 \n"
736 "ST1 {v0.2d}, [%[out]], #16 \n"
737 "MOV v13.16b, v12.16b \n"
738
739 "CBZ w11, 2f \n"
740 "B 1b \n"
741
742 "2: \n"
743 "#store current counter value at the end \n"
744 "ST1 {v13.2d}, [%[regOut]] \n"
745
746 :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
747 :"0" (out), [Key] "r" (key), [input] "2" (in),
748 [blocks] "r" (numBlocks), [reg] "1" (reg)
749 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
750 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13"
751 );
752 break;
753 #endif /* WOLFSSL_AES_128 */
754 #ifdef WOLFSSL_AES_192
755 case 12: /* AES 192 BLOCK */
756 __asm__ __volatile__ (
757 "MOV w11, %w[blocks] \n"
758 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
759 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
760 "LD1 {v9.2d-v12.2d},[%[Key]], #64 \n"
761 "LD1 {v13.16b}, [%[Key]], #16 \n"
762 "LD1 {v15.2d}, [%[reg]] \n"
763
764 "LD1 {v0.2d}, [%[input]], #16 \n"
765 "1: \n"
766 "MOV v14.16b, v0.16b \n"
767 "AESD v0.16b, v1.16b \n"
768 "AESIMC v0.16b, v0.16b \n"
769 "AESD v0.16b, v2.16b \n"
770 "AESIMC v0.16b, v0.16b \n"
771 "AESD v0.16b, v3.16b \n"
772 "AESIMC v0.16b, v0.16b \n"
773 "AESD v0.16b, v4.16b \n"
774 "AESIMC v0.16b, v0.16b \n"
775 "AESD v0.16b, v5.16b \n"
776 "AESIMC v0.16b, v0.16b \n"
777 "AESD v0.16b, v6.16b \n"
778 "AESIMC v0.16b, v0.16b \n"
779 "AESD v0.16b, v7.16b \n"
780 "AESIMC v0.16b, v0.16b \n"
781 "AESD v0.16b, v8.16b \n"
782 "AESIMC v0.16b, v0.16b \n"
783 "AESD v0.16b, v9.16b \n"
784 "AESIMC v0.16b, v0.16b \n"
785 "AESD v0.16b, v10.16b \n"
786 "AESIMC v0.16b, v0.16b \n"
787 "AESD v0.16b, v11.16b \n"
788 "AESIMC v0.16b, v0.16b \n"
789 "AESD v0.16b, v12.16b \n"
790 "EOR v0.16b, v0.16b, v13.16b \n"
791
792 "EOR v0.16b, v0.16b, v15.16b \n"
793 "SUB w11, w11, #1 \n"
794 "ST1 {v0.2d}, [%[out]], #16 \n"
795 "MOV v15.16b, v14.16b \n"
796
797 "CBZ w11, 2f \n"
798 "LD1 {v0.2d}, [%[input]], #16 \n"
799 "B 1b \n"
800
801 "2:\n"
802 "#store current counter value at the end \n"
803 "ST1 {v15.2d}, [%[regOut]] \n"
804
805 :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
806 :"0" (out), [Key] "r" (key), [input] "2" (in),
807 [blocks] "r" (numBlocks), [reg] "1" (reg)
808 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
809 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15"
810 );
811 break;
812 #endif /* WOLFSSL_AES_192 */
813 #ifdef WOLFSSL_AES_256
814 case 14: /* AES 256 BLOCK */
815 __asm__ __volatile__ (
816 "MOV w11, %w[blocks] \n"
817 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
818 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
819 "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n"
820 "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n"
821 "LD1 {v17.2d}, [%[reg]] \n"
822
823 "LD1 {v0.2d}, [%[input]], #16 \n"
824 "1: \n"
825 "MOV v16.16b, v0.16b \n"
826 "AESD v0.16b, v1.16b \n"
827 "AESIMC v0.16b, v0.16b \n"
828 "AESD v0.16b, v2.16b \n"
829 "AESIMC v0.16b, v0.16b \n"
830 "AESD v0.16b, v3.16b \n"
831 "AESIMC v0.16b, v0.16b \n"
832 "AESD v0.16b, v4.16b \n"
833 "AESIMC v0.16b, v0.16b \n"
834 "AESD v0.16b, v5.16b \n"
835 "AESIMC v0.16b, v0.16b \n"
836 "AESD v0.16b, v6.16b \n"
837 "AESIMC v0.16b, v0.16b \n"
838 "AESD v0.16b, v7.16b \n"
839 "AESIMC v0.16b, v0.16b \n"
840 "AESD v0.16b, v8.16b \n"
841 "AESIMC v0.16b, v0.16b \n"
842 "AESD v0.16b, v9.16b \n"
843 "AESIMC v0.16b, v0.16b \n"
844 "AESD v0.16b, v10.16b \n"
845 "AESIMC v0.16b, v0.16b \n"
846 "AESD v0.16b, v11.16b \n"
847 "AESIMC v0.16b, v0.16b \n"
848 "AESD v0.16b, v12.16b \n"
849 "AESIMC v0.16b, v0.16b \n"
850 "AESD v0.16b, v13.16b \n"
851 "AESIMC v0.16b, v0.16b \n"
852 "AESD v0.16b, v14.16b \n"
853 "EOR v0.16b, v0.16b, v15.16b \n"
854
855 "EOR v0.16b, v0.16b, v17.16b \n"
856 "SUB w11, w11, #1 \n"
857 "ST1 {v0.2d}, [%[out]], #16 \n"
858 "MOV v17.16b, v16.16b \n"
859
860 "CBZ w11, 2f \n"
861 "LD1 {v0.2d}, [%[input]], #16 \n"
862 "B 1b \n"
863
864 "2:\n"
865 "#store current counter value at the end \n"
866 "ST1 {v17.2d}, [%[regOut]] \n"
867
868 :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
869 :"0" (out), [Key] "r" (key), [input] "2" (in),
870 [blocks] "r" (numBlocks), [reg] "1" (reg)
871 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
872 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15",
873 "v16", "v17"
874 );
875 break;
876 #endif /* WOLFSSL_AES_256 */
877 default:
878 WOLFSSL_MSG("Bad AES-CBC round value");
879 return BAD_FUNC_ARG;
880 }
881 }
882
883 return 0;
884 }
885 #endif
886
887 #endif /* HAVE_AES_CBC */
888
889 /* AES-CTR */
890 #ifdef WOLFSSL_AES_COUNTER
891
892 /* Increment AES counter */
IncrementAesCounter(byte * inOutCtr)893 static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
894 {
895 int i;
896
897 /* in network byte order so start at end and work back */
898 for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
899 if (++inOutCtr[i]) /* we're done unless we overflow */
900 return;
901 }
902 }
903
wc_AesCtrEncrypt(Aes * aes,byte * out,const byte * in,word32 sz)904 int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
905 {
906 byte* tmp;
907 word32 numBlocks;
908
909 if (aes == NULL || out == NULL || in == NULL) {
910 return BAD_FUNC_ARG;
911 }
912
913 tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
914
915 /* consume any unused bytes left in aes->tmp */
916 while (aes->left && sz) {
917 *(out++) = *(in++) ^ *(tmp++);
918 aes->left--;
919 sz--;
920 }
921
922 /* do as many block size ops as possible */
923 numBlocks = sz/AES_BLOCK_SIZE;
924 if (numBlocks > 0) {
925 /* pointer needed because it is incremented when read, causing
926 * an issue with call to encrypt/decrypt leftovers */
927 byte* keyPt = (byte*)aes->key;
928 sz -= numBlocks * AES_BLOCK_SIZE;
929 switch(aes->rounds) {
930 #ifdef WOLFSSL_AES_128
931 case 10: /* AES 128 BLOCK */
932 __asm__ __volatile__ (
933 "MOV w11, %w[blocks] \n"
934 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
935
936 "#Create vector with the value 1 \n"
937 "MOVI v15.16b, #1 \n"
938 "USHR v15.2d, v15.2d, #56 \n"
939 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
940 "EOR v14.16b, v14.16b, v14.16b \n"
941 "EXT v14.16b, v15.16b, v14.16b, #8\n"
942
943 "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
944 "LD1 {v13.2d}, %[reg] \n"
945
946 /* double block */
947 "1: \n"
948 "CMP w11, #1 \n"
949 "BEQ 2f \n"
950 "CMP w11, #0 \n"
951 "BEQ 3f \n"
952
953 "MOV v0.16b, v13.16b \n"
954 "AESE v0.16b, v1.16b \n"
955 "AESMC v0.16b, v0.16b \n"
956 "REV64 v13.16b, v13.16b \n" /* network order */
957 "AESE v0.16b, v2.16b \n"
958 "AESMC v0.16b, v0.16b \n"
959 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
960 "SUB w11, w11, #2 \n"
961 "ADD v15.2d, v13.2d, v14.2d \n" /* add 1 to counter */
962 "ADD v13.2d, v15.2d, v14.2d \n" /* add 1 to counter */
963
964 "AESE v0.16b, v3.16b \n"
965 "AESMC v0.16b, v0.16b \n"
966 "EXT v15.16b, v15.16b, v15.16b, #8 \n"
967 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
968
969 "AESE v0.16b, v4.16b \n"
970 "AESMC v0.16b, v0.16b \n"
971 "REV64 v15.16b, v15.16b \n" /* revert from network order */
972 "REV64 v13.16b, v13.16b \n" /* revert from network order */
973
974 "AESE v0.16b, v5.16b \n"
975 "AESMC v0.16b, v0.16b \n"
976 "AESE v15.16b, v1.16b \n"
977 "AESMC v15.16b, v15.16b \n"
978
979 "AESE v0.16b, v6.16b \n"
980 "AESMC v0.16b, v0.16b \n"
981 "AESE v15.16b, v2.16b \n"
982 "AESMC v15.16b, v15.16b \n"
983
984 "AESE v0.16b, v7.16b \n"
985 "AESMC v0.16b, v0.16b \n"
986 "AESE v15.16b, v3.16b \n"
987 "AESMC v15.16b, v15.16b \n"
988
989 "AESE v0.16b, v8.16b \n"
990 "AESMC v0.16b, v0.16b \n"
991 "AESE v15.16b, v4.16b \n"
992 "AESMC v15.16b, v15.16b \n"
993
994 "AESE v0.16b, v9.16b \n"
995 "AESMC v0.16b, v0.16b \n"
996 "AESE v15.16b, v5.16b \n"
997 "AESMC v15.16b, v15.16b \n"
998
999 "AESE v0.16b, v10.16b \n"
1000 "AESE v15.16b, v6.16b \n"
1001 "AESMC v15.16b, v15.16b \n"
1002
1003 "EOR v0.16b, v0.16b, v11.16b \n"
1004 "AESE v15.16b, v7.16b \n"
1005 "AESMC v15.16b, v15.16b \n"
1006
1007 "LD1 {v12.2d}, [%[input]], #16 \n"
1008 "AESE v15.16b, v8.16b \n"
1009 "AESMC v15.16b, v15.16b \n"
1010
1011 "EOR v0.16b, v0.16b, v12.16b \n"
1012 "AESE v15.16b, v9.16b \n"
1013 "AESMC v15.16b, v15.16b \n"
1014
1015 "LD1 {v12.2d}, [%[input]], #16 \n"
1016 "AESE v15.16b, v10.16b \n"
1017 "ST1 {v0.2d}, [%[out]], #16 \n"
1018 "EOR v15.16b, v15.16b, v11.16b \n"
1019 "EOR v15.16b, v15.16b, v12.16b \n"
1020 "ST1 {v15.2d}, [%[out]], #16 \n"
1021
1022 "B 1b \n"
1023
1024 /* single block */
1025 "2: \n"
1026 "MOV v0.16b, v13.16b \n"
1027 "AESE v0.16b, v1.16b \n"
1028 "AESMC v0.16b, v0.16b \n"
1029 "REV64 v13.16b, v13.16b \n" /* network order */
1030 "AESE v0.16b, v2.16b \n"
1031 "AESMC v0.16b, v0.16b \n"
1032 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1033 "AESE v0.16b, v3.16b \n"
1034 "AESMC v0.16b, v0.16b \n"
1035 "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1036 "AESE v0.16b, v4.16b \n"
1037 "AESMC v0.16b, v0.16b \n"
1038 "SUB w11, w11, #1 \n"
1039 "AESE v0.16b, v5.16b \n"
1040 "AESMC v0.16b, v0.16b \n"
1041 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1042 "AESE v0.16b, v6.16b \n"
1043 "AESMC v0.16b, v0.16b \n"
1044 "REV64 v13.16b, v13.16b \n" /* revert from network order */
1045 "AESE v0.16b, v7.16b \n"
1046 "AESMC v0.16b, v0.16b \n"
1047 "AESE v0.16b, v8.16b \n"
1048 "AESMC v0.16b, v0.16b \n"
1049 "AESE v0.16b, v9.16b \n"
1050 "AESMC v0.16b, v0.16b \n"
1051 "AESE v0.16b, v10.16b \n"
1052 "EOR v0.16b, v0.16b, v11.16b \n"
1053 "#CTR operations, increment counter and xorbuf \n"
1054 "LD1 {v12.2d}, [%[input]], #16 \n"
1055 "EOR v0.16b, v0.16b, v12.16b \n"
1056 "ST1 {v0.2d}, [%[out]], #16 \n"
1057
1058 "3: \n"
1059 "#store current counter value at the end \n"
1060 "ST1 {v13.2d}, %[regOut] \n"
1061
1062 :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg),
1063 "=r" (in)
1064 :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
1065 [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
1066 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
1067 "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15"
1068 );
1069 break;
1070 #endif /* WOLFSSL_AES_128 */
1071 #ifdef WOLFSSL_AES_192
1072 case 12: /* AES 192 BLOCK */
1073 __asm__ __volatile__ (
1074 "MOV w11, %w[blocks] \n"
1075 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1076
1077 "#Create vector with the value 1 \n"
1078 "MOVI v16.16b, #1 \n"
1079 "USHR v16.2d, v16.2d, #56 \n"
1080 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1081 "EOR v14.16b, v14.16b, v14.16b \n"
1082 "EXT v16.16b, v16.16b, v14.16b, #8\n"
1083
1084 "LD1 {v9.2d-v12.2d}, [%[Key]], #64\n"
1085 "LD1 {v15.2d}, %[reg] \n"
1086 "LD1 {v13.16b}, [%[Key]], #16 \n"
1087
1088 /* double block */
1089 "1: \n"
1090 "CMP w11, #1 \n"
1091 "BEQ 2f \n"
1092 "CMP w11, #0 \n"
1093 "BEQ 3f \n"
1094
1095 "MOV v0.16b, v15.16b \n"
1096 "AESE v0.16b, v1.16b \n"
1097 "AESMC v0.16b, v0.16b \n"
1098 "REV64 v15.16b, v15.16b \n" /* network order */
1099 "AESE v0.16b, v2.16b \n"
1100 "AESMC v0.16b, v0.16b \n"
1101 "EXT v15.16b, v15.16b, v15.16b, #8 \n"
1102 "SUB w11, w11, #2 \n"
1103 "ADD v17.2d, v15.2d, v16.2d \n" /* add 1 to counter */
1104 "ADD v15.2d, v17.2d, v16.2d \n" /* add 1 to counter */
1105
1106 "AESE v0.16b, v3.16b \n"
1107 "AESMC v0.16b, v0.16b \n"
1108 "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1109 "EXT v15.16b, v15.16b, v15.16b, #8 \n"
1110
1111 "AESE v0.16b, v4.16b \n"
1112 "AESMC v0.16b, v0.16b \n"
1113 "REV64 v17.16b, v17.16b \n" /* revert from network order */
1114 "REV64 v15.16b, v15.16b \n" /* revert from network order */
1115
1116 "AESE v0.16b, v5.16b \n"
1117 "AESMC v0.16b, v0.16b \n"
1118 "AESE v17.16b, v1.16b \n"
1119 "AESMC v17.16b, v17.16b \n"
1120
1121 "AESE v0.16b, v6.16b \n"
1122 "AESMC v0.16b, v0.16b \n"
1123 "AESE v17.16b, v2.16b \n"
1124 "AESMC v17.16b, v17.16b \n"
1125
1126 "AESE v0.16b, v7.16b \n"
1127 "AESMC v0.16b, v0.16b \n"
1128 "AESE v17.16b, v3.16b \n"
1129 "AESMC v17.16b, v17.16b \n"
1130
1131 "AESE v0.16b, v8.16b \n"
1132 "AESMC v0.16b, v0.16b \n"
1133 "AESE v17.16b, v4.16b \n"
1134 "AESMC v17.16b, v17.16b \n"
1135
1136 "AESE v0.16b, v9.16b \n"
1137 "AESMC v0.16b, v0.16b \n"
1138 "AESE v17.16b, v5.16b \n"
1139 "AESMC v17.16b, v17.16b \n"
1140
1141 "AESE v0.16b, v10.16b \n"
1142 "AESMC v0.16b, v0.16b \n"
1143 "AESE v17.16b, v6.16b \n"
1144 "AESMC v17.16b, v17.16b \n"
1145
1146 "AESE v0.16b, v11.16b \n"
1147 "AESMC v0.16b, v0.16b \n"
1148 "AESE v17.16b, v7.16b \n"
1149 "AESMC v17.16b, v17.16b \n"
1150
1151 "AESE v0.16b, v12.16b \n"
1152 "AESE v17.16b, v8.16b \n"
1153 "AESMC v17.16b, v17.16b \n"
1154
1155 "EOR v0.16b, v0.16b, v13.16b \n"
1156 "AESE v17.16b, v9.16b \n"
1157 "AESMC v17.16b, v17.16b \n"
1158
1159 "LD1 {v14.2d}, [%[input]], #16 \n"
1160 "AESE v17.16b, v10.16b \n"
1161 "AESMC v17.16b, v17.16b \n"
1162
1163 "EOR v0.16b, v0.16b, v14.16b \n"
1164 "AESE v17.16b, v11.16b \n"
1165 "AESMC v17.16b, v17.16b \n"
1166
1167 "LD1 {v14.2d}, [%[input]], #16 \n"
1168 "AESE v17.16b, v12.16b \n"
1169 "ST1 {v0.2d}, [%[out]], #16 \n"
1170 "EOR v17.16b, v17.16b, v13.16b \n"
1171 "EOR v17.16b, v17.16b, v14.16b \n"
1172 "ST1 {v17.2d}, [%[out]], #16 \n"
1173
1174 "B 1b \n"
1175
1176 "2: \n"
1177 "LD1 {v14.2d}, [%[input]], #16 \n"
1178 "MOV v0.16b, v15.16b \n"
1179
1180 "AESE v0.16b, v1.16b \n"
1181 "AESMC v0.16b, v0.16b \n"
1182 "REV64 v15.16b, v15.16b \n" /* network order */
1183 "AESE v0.16b, v2.16b \n"
1184 "AESMC v0.16b, v0.16b \n"
1185 "EXT v15.16b, v15.16b, v15.16b, #8 \n"
1186 "AESE v0.16b, v3.16b \n"
1187 "AESMC v0.16b, v0.16b \n"
1188 "ADD v15.2d, v15.2d, v16.2d \n" /* add 1 to counter */
1189 "AESE v0.16b, v4.16b \n"
1190 "AESMC v0.16b, v0.16b \n"
1191 "SUB w11, w11, #1 \n"
1192 "AESE v0.16b, v5.16b \n"
1193 "AESMC v0.16b, v0.16b \n"
1194 "EXT v15.16b, v15.16b, v15.16b, #8 \n"
1195 "AESE v0.16b, v6.16b \n"
1196 "AESMC v0.16b, v0.16b \n"
1197 "REV64 v15.16b, v15.16b \n" /* revert from network order */
1198 "AESE v0.16b, v7.16b \n"
1199 "AESMC v0.16b, v0.16b \n"
1200 "AESE v0.16b, v8.16b \n"
1201 "AESMC v0.16b, v0.16b \n"
1202 "AESE v0.16b, v9.16b \n"
1203 "AESMC v0.16b, v0.16b \n"
1204 "AESE v0.16b, v10.16b \n"
1205 "AESMC v0.16b, v0.16b \n"
1206 "AESE v0.16b, v11.16b \n"
1207 "AESMC v0.16b, v0.16b \n"
1208 "AESE v0.16b, v12.16b \n"
1209 "EOR v0.16b, v0.16b, v13.16b \n"
1210 "#CTR operations, increment counter and xorbuf \n"
1211 "EOR v0.16b, v0.16b, v14.16b \n"
1212 "ST1 {v0.2d}, [%[out]], #16 \n"
1213
1214 "3: \n"
1215 "#store current counter value at the end \n"
1216 "ST1 {v15.2d}, %[regOut] \n"
1217
1218 :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg),
1219 "=r" (in)
1220 :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
1221 [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
1222 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
1223 "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15",
1224 "v16", "v17"
1225 );
1226 break;
1227 #endif /* WOLFSSL_AES_192 */
1228 #ifdef WOLFSSL_AES_256
1229 case 14: /* AES 256 BLOCK */
1230 __asm__ __volatile__ (
1231 "MOV w11, %w[blocks] \n"
1232 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1233
1234 "#Create vector with the value 1 \n"
1235 "MOVI v18.16b, #1 \n"
1236 "USHR v18.2d, v18.2d, #56 \n"
1237 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1238 "EOR v19.16b, v19.16b, v19.16b \n"
1239 "EXT v18.16b, v18.16b, v19.16b, #8\n"
1240
1241 "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n"
1242 "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n"
1243 "LD1 {v17.2d}, %[reg] \n"
1244
1245 /* double block */
1246 "1: \n"
1247 "CMP w11, #1 \n"
1248 "BEQ 2f \n"
1249 "CMP w11, #0 \n"
1250 "BEQ 3f \n"
1251
1252 "MOV v0.16b, v17.16b \n"
1253 "AESE v0.16b, v1.16b \n"
1254 "AESMC v0.16b, v0.16b \n"
1255 "REV64 v17.16b, v17.16b \n" /* network order */
1256 "AESE v0.16b, v2.16b \n"
1257 "AESMC v0.16b, v0.16b \n"
1258 "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1259 "SUB w11, w11, #2 \n"
1260 "ADD v19.2d, v17.2d, v18.2d \n" /* add 1 to counter */
1261 "ADD v17.2d, v19.2d, v18.2d \n" /* add 1 to counter */
1262
1263 "AESE v0.16b, v3.16b \n"
1264 "AESMC v0.16b, v0.16b \n"
1265 "EXT v19.16b, v19.16b, v19.16b, #8 \n"
1266 "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1267
1268 "AESE v0.16b, v4.16b \n"
1269 "AESMC v0.16b, v0.16b \n"
1270 "REV64 v19.16b, v19.16b \n" /* revert from network order */
1271 "REV64 v17.16b, v17.16b \n" /* revert from network order */
1272
1273 "AESE v0.16b, v5.16b \n"
1274 "AESMC v0.16b, v0.16b \n"
1275 "AESE v19.16b, v1.16b \n"
1276 "AESMC v19.16b, v19.16b \n"
1277
1278 "AESE v0.16b, v6.16b \n"
1279 "AESMC v0.16b, v0.16b \n"
1280 "AESE v19.16b, v2.16b \n"
1281 "AESMC v19.16b, v19.16b \n"
1282
1283 "AESE v0.16b, v7.16b \n"
1284 "AESMC v0.16b, v0.16b \n"
1285 "AESE v19.16b, v3.16b \n"
1286 "AESMC v19.16b, v19.16b \n"
1287
1288 "AESE v0.16b, v8.16b \n"
1289 "AESMC v0.16b, v0.16b \n"
1290 "AESE v19.16b, v4.16b \n"
1291 "AESMC v19.16b, v19.16b \n"
1292
1293 "AESE v0.16b, v9.16b \n"
1294 "AESMC v0.16b, v0.16b \n"
1295 "AESE v19.16b, v5.16b \n"
1296 "AESMC v19.16b, v19.16b \n"
1297
1298 "AESE v0.16b, v10.16b \n"
1299 "AESMC v0.16b, v0.16b \n"
1300 "AESE v19.16b, v6.16b \n"
1301 "AESMC v19.16b, v19.16b \n"
1302
1303 "AESE v0.16b, v11.16b \n"
1304 "AESMC v0.16b, v0.16b \n"
1305 "AESE v19.16b, v7.16b \n"
1306 "AESMC v19.16b, v19.16b \n"
1307
1308 "AESE v0.16b, v12.16b \n"
1309 "AESMC v0.16b, v0.16b \n"
1310 "AESE v19.16b, v8.16b \n"
1311 "AESMC v19.16b, v19.16b \n"
1312
1313 "AESE v0.16b, v13.16b \n"
1314 "AESMC v0.16b, v0.16b \n"
1315 "AESE v19.16b, v9.16b \n"
1316 "AESMC v19.16b, v19.16b \n"
1317
1318 "AESE v0.16b, v14.16b \n"
1319 "AESE v19.16b, v10.16b \n"
1320 "AESMC v19.16b, v19.16b \n"
1321
1322 "EOR v0.16b, v0.16b, v15.16b \n"
1323 "AESE v19.16b, v11.16b \n"
1324 "AESMC v19.16b, v19.16b \n"
1325
1326 "LD1 {v16.2d}, [%[input]], #16 \n"
1327 "AESE v19.16b, v12.16b \n"
1328 "AESMC v19.16b, v19.16b \n"
1329
1330 "EOR v0.16b, v0.16b, v16.16b \n"
1331 "AESE v19.16b, v13.16b \n"
1332 "AESMC v19.16b, v19.16b \n"
1333
1334 "LD1 {v16.2d}, [%[input]], #16 \n"
1335 "AESE v19.16b, v14.16b \n"
1336 "ST1 {v0.2d}, [%[out]], #16 \n"
1337 "EOR v19.16b, v19.16b, v15.16b \n"
1338 "EOR v19.16b, v19.16b, v16.16b \n"
1339 "ST1 {v19.2d}, [%[out]], #16 \n"
1340
1341 "B 1b \n"
1342
1343 "2: \n"
1344 "LD1 {v16.2d}, [%[input]], #16 \n"
1345 "MOV v0.16b, v17.16b \n"
1346 "AESE v0.16b, v1.16b \n"
1347 "AESMC v0.16b, v0.16b \n"
1348 "REV64 v17.16b, v17.16b \n" /* network order */
1349 "AESE v0.16b, v2.16b \n"
1350 "AESMC v0.16b, v0.16b \n"
1351 "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1352 "AESE v0.16b, v3.16b \n"
1353 "AESMC v0.16b, v0.16b \n"
1354 "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */
1355 "AESE v0.16b, v4.16b \n"
1356 "AESMC v0.16b, v0.16b \n"
1357 "AESE v0.16b, v5.16b \n"
1358 "AESMC v0.16b, v0.16b \n"
1359 "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1360 "AESE v0.16b, v6.16b \n"
1361 "AESMC v0.16b, v0.16b \n"
1362 "REV64 v17.16b, v17.16b \n" /* revert from network order */
1363 "AESE v0.16b, v7.16b \n"
1364 "AESMC v0.16b, v0.16b \n"
1365 "AESE v0.16b, v8.16b \n"
1366 "AESMC v0.16b, v0.16b \n"
1367 "AESE v0.16b, v9.16b \n"
1368 "AESMC v0.16b, v0.16b \n"
1369 "AESE v0.16b, v10.16b \n"
1370 "AESMC v0.16b, v0.16b \n"
1371 "AESE v0.16b, v11.16b \n"
1372 "AESMC v0.16b, v0.16b \n"
1373 "AESE v0.16b, v12.16b \n"
1374 "AESMC v0.16b, v0.16b \n"
1375 "AESE v0.16b, v13.16b \n"
1376 "AESMC v0.16b, v0.16b \n"
1377 "AESE v0.16b, v14.16b \n"
1378 "EOR v0.16b, v0.16b, v15.16b \n"
1379 "#CTR operations, increment counter and xorbuf \n"
1380 "EOR v0.16b, v0.16b, v16.16b \n"
1381 "ST1 {v0.2d}, [%[out]], #16 \n"
1382
1383 "3: \n"
1384 "#store current counter value at the end \n"
1385 "ST1 {v17.2d}, %[regOut] \n"
1386
1387
1388 :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg),
1389 "=r" (in)
1390 :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
1391 [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
1392 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
1393 "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15",
1394 "v16", "v17", "v18", "v19"
1395 );
1396 break;
1397 #endif /* WOLFSSL_AES_256 */
1398 default:
1399 WOLFSSL_MSG("Bad AES-CTR round value");
1400 return BAD_FUNC_ARG;
1401 }
1402
1403 aes->left = 0;
1404 }
1405
1406 /* handle non block size remaining */
1407 if (sz) {
1408 wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp);
1409 IncrementAesCounter((byte*)aes->reg);
1410
1411 aes->left = AES_BLOCK_SIZE;
1412 tmp = (byte*)aes->tmp;
1413
1414 while (sz--) {
1415 *(out++) = *(in++) ^ *(tmp++);
1416 aes->left--;
1417 }
1418 }
1419 return 0;
1420 }
1421
1422 #endif /* WOLFSSL_AES_COUNTER */
1423
1424 #ifdef HAVE_AESGCM
1425
1426 /*
1427 * Based from GCM implementation in wolfcrypt/src/aes.c
1428 */
1429
1430 /* PMULL and RBIT only with AArch64 */
1431 /* Use ARM hardware for polynomial multiply */
GMULT(byte * X,byte * Y)1432 static void GMULT(byte* X, byte* Y)
1433 {
1434 __asm__ volatile (
1435 "LD1 {v0.16b}, [%[inX]] \n"
1436 "LD1 {v1.16b}, [%[inY]] \n" /* v1 already reflected from set key */
1437 "RBIT v0.16b, v0.16b \n"
1438
1439
1440 /* Algorithm 1 from Intel GCM white paper.
1441 "Carry-Less Multiplication and Its Usage for Computing the GCM Mode"
1442 */
1443 "PMULL v3.1q, v0.1d, v1.1d \n" /* a0 * b0 = C */
1444 "PMULL2 v4.1q, v0.2d, v1.2d \n" /* a1 * b1 = D */
1445 "EXT v5.16b, v1.16b, v1.16b, #8 \n" /* b0b1 -> b1b0 */
1446 "PMULL v6.1q, v0.1d, v5.1d \n" /* a0 * b1 = E */
1447 "PMULL2 v5.1q, v0.2d, v5.2d \n" /* a1 * b0 = F */
1448
1449 "#Set a register to all 0s using EOR \n"
1450 "EOR v7.16b, v7.16b, v7.16b \n"
1451 "EOR v5.16b, v5.16b, v6.16b \n" /* F ^ E */
1452 "EXT v6.16b, v7.16b, v5.16b, #8 \n" /* get (F^E)[0] */
1453 "EOR v3.16b, v3.16b, v6.16b \n" /* low 128 bits in v3 */
1454 "EXT v6.16b, v5.16b, v7.16b, #8 \n" /* get (F^E)[1] */
1455 "EOR v4.16b, v4.16b, v6.16b \n" /* high 128 bits in v4 */
1456
1457
1458 /* Based from White Paper "Implementing GCM on ARMv8"
1459 by Conrado P.L. Gouvea and Julio Lopez
1460 reduction on 256bit value using Algorithm 5 */
1461 "MOVI v8.16b, #0x87 \n"
1462 "USHR v8.2d, v8.2d, #56 \n"
1463 /* v8 is now 0x00000000000000870000000000000087 reflected 0xe1....*/
1464 "PMULL2 v5.1q, v4.2d, v8.2d \n"
1465 "EXT v6.16b, v5.16b, v7.16b, #8 \n" /* v7 is all 0's */
1466 "EOR v4.16b, v4.16b, v6.16b \n"
1467 "EXT v6.16b, v7.16b, v5.16b, #8 \n"
1468 "EOR v3.16b, v3.16b, v6.16b \n"
1469 "PMULL v5.1q, v4.1d, v8.1d \n"
1470 "EOR v4.16b, v3.16b, v5.16b \n"
1471
1472 "RBIT v4.16b, v4.16b \n"
1473 "STR q4, [%[out]] \n"
1474 : [out] "=r" (X), "=r" (Y)
1475 : [inX] "0" (X), [inY] "1" (Y)
1476 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8"
1477 );
1478 }
1479
1480
GHASH(Aes * aes,const byte * a,word32 aSz,const byte * c,word32 cSz,byte * s,word32 sSz)1481 void GHASH(Aes* aes, const byte* a, word32 aSz,
1482 const byte* c, word32 cSz, byte* s, word32 sSz)
1483 {
1484 byte x[AES_BLOCK_SIZE];
1485 byte scratch[AES_BLOCK_SIZE];
1486 word32 blocks, partial;
1487 byte* h = aes->H;
1488
1489 XMEMSET(x, 0, AES_BLOCK_SIZE);
1490
1491 /* Hash in A, the Additional Authentication Data */
1492 if (aSz != 0 && a != NULL) {
1493 blocks = aSz / AES_BLOCK_SIZE;
1494 partial = aSz % AES_BLOCK_SIZE;
1495 /* do as many blocks as possible */
1496 while (blocks--) {
1497 xorbuf(x, a, AES_BLOCK_SIZE);
1498 GMULT(x, h);
1499 a += AES_BLOCK_SIZE;
1500 }
1501 if (partial != 0) {
1502 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1503 XMEMCPY(scratch, a, partial);
1504 xorbuf(x, scratch, AES_BLOCK_SIZE);
1505 GMULT(x, h);
1506 }
1507 }
1508
1509 /* Hash in C, the Ciphertext */
1510 if (cSz != 0 && c != NULL) {
1511 blocks = cSz / AES_BLOCK_SIZE;
1512 partial = cSz % AES_BLOCK_SIZE;
1513 while (blocks--) {
1514 xorbuf(x, c, AES_BLOCK_SIZE);
1515 GMULT(x, h);
1516 c += AES_BLOCK_SIZE;
1517 }
1518 if (partial != 0) {
1519 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1520 XMEMCPY(scratch, c, partial);
1521 xorbuf(x, scratch, AES_BLOCK_SIZE);
1522 GMULT(x, h);
1523 }
1524 }
1525
1526 /* Hash in the lengths of A and C in bits */
1527 FlattenSzInBits(&scratch[0], aSz);
1528 FlattenSzInBits(&scratch[8], cSz);
1529 xorbuf(x, scratch, AES_BLOCK_SIZE);
1530
1531 /* Copy the result (minus last GMULT) into s. */
1532 XMEMCPY(s, x, sSz);
1533 }
1534
1535
1536 #ifdef WOLFSSL_AES_128
1537 /* internal function : see wc_AesGcmEncrypt */
Aes128GcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)1538 static int Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
1539 const byte* iv, word32 ivSz,
1540 byte* authTag, word32 authTagSz,
1541 const byte* authIn, word32 authInSz)
1542 {
1543 word32 blocks;
1544 word32 partial;
1545 byte counter[AES_BLOCK_SIZE];
1546 byte initialCounter[AES_BLOCK_SIZE];
1547 byte x[AES_BLOCK_SIZE];
1548 byte scratch[AES_BLOCK_SIZE];
1549
1550 /* Noticed different optimization levels treated head of array different.
1551 Some cases was stack pointer plus offset others was a regester containing
1552 address. To make uniform for passing in to inline assembly code am using
1553 pointers to the head of each local array.
1554 */
1555 byte* ctr = counter;
1556 byte* iCtr = initialCounter;
1557 byte* xPt = x;
1558 byte* sPt = scratch;
1559 byte* keyPt; /* pointer to handle pointer advencment */
1560
1561 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
1562 if (ivSz == GCM_NONCE_MID_SZ) {
1563 XMEMCPY(initialCounter, iv, ivSz);
1564 initialCounter[AES_BLOCK_SIZE - 1] = 1;
1565 }
1566 else {
1567 GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
1568 GMULT(initialCounter, aes->H);
1569 }
1570 XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE);
1571
1572
1573 /* Hash in the Additional Authentication Data */
1574 XMEMSET(x, 0, AES_BLOCK_SIZE);
1575 if (authInSz != 0 && authIn != NULL) {
1576 blocks = authInSz / AES_BLOCK_SIZE;
1577 partial = authInSz % AES_BLOCK_SIZE;
1578 /* do as many blocks as possible */
1579 while (blocks--) {
1580 xorbuf(x, authIn, AES_BLOCK_SIZE);
1581 GMULT(x, aes->H);
1582 authIn += AES_BLOCK_SIZE;
1583 }
1584 if (partial != 0) {
1585 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1586 XMEMCPY(scratch, authIn, partial);
1587 xorbuf(x, scratch, AES_BLOCK_SIZE);
1588 GMULT(x, aes->H);
1589 }
1590 }
1591
1592 /* do as many blocks as possible */
1593 blocks = sz / AES_BLOCK_SIZE;
1594 partial = sz % AES_BLOCK_SIZE;
1595 if (blocks > 0) {
1596 keyPt = (byte*)aes->key;
1597 __asm__ __volatile__ (
1598 "MOV w11, %w[blocks] \n"
1599 "LD1 {v13.2d}, [%[ctr]] \n"
1600
1601 "#Create vector with the value 1 \n"
1602 "MOVI v14.16b, #1 \n"
1603 "USHR v14.2d, v14.2d, #56 \n"
1604 "EOR v22.16b, v22.16b, v22.16b \n"
1605 "EXT v14.16b, v14.16b, v22.16b, #8\n"
1606
1607
1608 /***************************************************
1609 Get first out block for GHASH using AES encrypt
1610 ***************************************************/
1611 "REV64 v13.16b, v13.16b \n" /* network order */
1612 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1613 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1614 "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1615 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1616 "REV64 v13.16b, v13.16b \n" /* revert from network order */
1617 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1618 "MOV v0.16b, v13.16b \n"
1619 "AESE v0.16b, v1.16b \n"
1620 "AESMC v0.16b, v0.16b \n"
1621 "AESE v0.16b, v2.16b \n"
1622 "AESMC v0.16b, v0.16b \n"
1623 "AESE v0.16b, v3.16b \n"
1624 "AESMC v0.16b, v0.16b \n"
1625 "LD1 {v16.2d}, %[inY] \n"
1626 "AESE v0.16b, v4.16b \n"
1627 "AESMC v0.16b, v0.16b \n"
1628 "SUB w11, w11, #1 \n"
1629 "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
1630 "AESE v0.16b, v5.16b \n"
1631 "AESMC v0.16b, v0.16b \n"
1632 "MOVI v23.16b, #0x87 \n"
1633 "AESE v0.16b, v6.16b \n"
1634 "AESMC v0.16b, v0.16b \n"
1635 "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */
1636 "AESE v0.16b, v7.16b \n"
1637 "AESMC v0.16b, v0.16b \n"
1638 "USHR v23.2d, v23.2d, #56 \n"
1639 "AESE v0.16b, v8.16b \n"
1640 "AESMC v0.16b, v0.16b \n"
1641 "LD1 {v12.2d}, [%[input]], #16 \n"
1642 "AESE v0.16b, v9.16b \n"
1643 "AESMC v0.16b, v0.16b \n"
1644 "AESE v0.16b, v10.16b \n"
1645 "EOR v0.16b, v0.16b, v11.16b \n"
1646
1647 "EOR v0.16b, v0.16b, v12.16b \n"
1648 "ST1 {v0.2d}, [%[out]], #16 \n"
1649 "MOV v15.16b, v0.16b \n"
1650
1651 "CBZ w11, 1f \n" /* only one block jump to final GHASH */
1652
1653 "LD1 {v12.2d}, [%[input]], #16 \n"
1654
1655 /***************************************************
1656 Interweave GHASH and encrypt if more then 1 block
1657 ***************************************************/
1658 "2: \n"
1659 "REV64 v13.16b, v13.16b \n" /* network order */
1660 "EOR v15.16b, v17.16b, v15.16b \n"
1661 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1662 "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1663 "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */
1664 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1665 "REV64 v13.16b, v13.16b \n" /* revert from network order */
1666 "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */
1667 "MOV v0.16b, v13.16b \n"
1668 "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */
1669 "AESE v0.16b, v1.16b \n"
1670 "AESMC v0.16b, v0.16b \n"
1671 "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
1672 "AESE v0.16b, v2.16b \n"
1673 "AESMC v0.16b, v0.16b \n"
1674 "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */
1675 "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */
1676 "AESE v0.16b, v3.16b \n"
1677 "AESMC v0.16b, v0.16b \n"
1678 "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */
1679 "AESE v0.16b, v4.16b \n"
1680 "AESMC v0.16b, v0.16b \n"
1681 "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
1682 "SUB w11, w11, #1 \n"
1683 "AESE v0.16b, v5.16b \n"
1684 "AESMC v0.16b, v0.16b \n"
1685 "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */
1686 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
1687 "AESE v0.16b, v6.16b \n"
1688 "AESMC v0.16b, v0.16b \n"
1689 "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */
1690 "AESE v0.16b, v7.16b \n"
1691 "AESMC v0.16b, v0.16b \n"
1692 "PMULL2 v20.1q, v19.2d, v23.2d \n"
1693 "AESE v0.16b, v8.16b \n"
1694 "AESMC v0.16b, v0.16b \n"
1695 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
1696 "AESE v0.16b, v9.16b \n"
1697 "AESMC v0.16b, v0.16b \n"
1698 "EOR v19.16b, v19.16b, v21.16b \n"
1699 "AESE v0.16b, v10.16b \n"
1700 "EXT v21.16b, v22.16b, v20.16b, #8 \n"
1701 "EOR v0.16b, v0.16b, v11.16b \n"
1702 "EOR v18.16b, v18.16b, v21.16b \n"
1703
1704 "EOR v0.16b, v0.16b, v12.16b \n"
1705 "PMULL v20.1q, v19.1d, v23.1d \n"
1706 "ST1 {v0.2d}, [%[out]], #16 \n"
1707 "EOR v19.16b, v18.16b, v20.16b \n"
1708 "MOV v15.16b, v0.16b \n"
1709 "RBIT v17.16b, v19.16b \n"
1710
1711 "CBZ w11, 1f \n"
1712 "LD1 {v12.2d}, [%[input]], #16 \n"
1713 "B 2b \n"
1714
1715 /***************************************************
1716 GHASH on last block
1717 ***************************************************/
1718 "1: \n"
1719 "EOR v15.16b, v17.16b, v15.16b \n"
1720 "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */
1721
1722 "#store current AES counter value \n"
1723 "ST1 {v13.2d}, [%[ctrOut]] \n"
1724 "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */
1725 "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */
1726 "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
1727 "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */
1728 "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */
1729 "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */
1730 "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
1731 "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */
1732 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
1733 "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */
1734
1735 "#Reduce product from multiplication \n"
1736 "PMULL2 v20.1q, v19.2d, v23.2d \n"
1737 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
1738 "EOR v19.16b, v19.16b, v21.16b \n"
1739 "EXT v21.16b, v22.16b, v20.16b, #8 \n"
1740 "EOR v18.16b, v18.16b, v21.16b \n"
1741 "PMULL v20.1q, v19.1d, v23.1d \n"
1742 "EOR v19.16b, v18.16b, v20.16b \n"
1743 "RBIT v17.16b, v19.16b \n"
1744 "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */
1745
1746 :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in)
1747 ,[xOut] "=r" (xPt),"=m" (aes->H)
1748 :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
1749 [input] "3" (in)
1750 ,[inX] "4" (xPt), [inY] "m" (aes->H)
1751 : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
1752 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
1753 ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24"
1754 );
1755 }
1756
1757 /* take care of partial block sizes leftover */
1758 if (partial != 0) {
1759 IncrementGcmCounter(counter);
1760 wc_AesEncrypt(aes, counter, scratch);
1761 xorbuf(scratch, in, partial);
1762 XMEMCPY(out, scratch, partial);
1763
1764 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1765 XMEMCPY(scratch, out, partial);
1766 xorbuf(x, scratch, AES_BLOCK_SIZE);
1767 GMULT(x, aes->H);
1768 }
1769
1770 /* Hash in the lengths of A and C in bits */
1771 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1772 FlattenSzInBits(&scratch[0], authInSz);
1773 FlattenSzInBits(&scratch[8], sz);
1774 xorbuf(x, scratch, AES_BLOCK_SIZE);
1775 XMEMCPY(scratch, x, AES_BLOCK_SIZE);
1776
1777 keyPt = (byte*)aes->key;
1778 __asm__ __volatile__ (
1779
1780 "LD1 {v16.16b}, [%[tag]] \n"
1781 "LD1 {v17.16b}, %[h] \n"
1782 "RBIT v16.16b, v16.16b \n"
1783
1784 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1785 "PMULL v18.1q, v16.1d, v17.1d \n" /* a0 * b0 = C */
1786 "PMULL2 v19.1q, v16.2d, v17.2d \n" /* a1 * b1 = D */
1787 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1788 "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */
1789 "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
1790 "PMULL v21.1q, v16.1d, v20.1d \n" /* a0 * b1 = E */
1791 "PMULL2 v20.1q, v16.2d, v20.2d \n" /* a1 * b0 = F */
1792 "LD1 {v0.2d}, [%[ctr]] \n"
1793
1794 "#Set a register to all 0s using EOR \n"
1795 "EOR v22.16b, v22.16b, v22.16b \n"
1796 "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */
1797 "AESE v0.16b, v1.16b \n"
1798 "AESMC v0.16b, v0.16b \n"
1799 "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
1800 "AESE v0.16b, v2.16b \n"
1801 "AESMC v0.16b, v0.16b \n"
1802 "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */
1803 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
1804 "AESE v0.16b, v3.16b \n"
1805 "AESMC v0.16b, v0.16b \n"
1806 "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */
1807 "MOVI v23.16b, #0x87 \n"
1808 "AESE v0.16b, v4.16b \n"
1809 "AESMC v0.16b, v0.16b \n"
1810 "USHR v23.2d, v23.2d, #56 \n"
1811 "PMULL2 v20.1q, v19.2d, v23.2d \n"
1812 "AESE v0.16b, v5.16b \n"
1813 "AESMC v0.16b, v0.16b \n"
1814 "EXT v21.16b, v20.16b, v22.16b, #8 \n"
1815 "AESE v0.16b, v6.16b \n"
1816 "AESMC v0.16b, v0.16b \n"
1817 "EOR v19.16b, v19.16b, v21.16b \n"
1818 "AESE v0.16b, v7.16b \n"
1819 "AESMC v0.16b, v0.16b \n"
1820 "EXT v21.16b, v22.16b, v20.16b, #8 \n"
1821 "AESE v0.16b, v8.16b \n"
1822 "AESMC v0.16b, v0.16b \n"
1823 "EOR v18.16b, v18.16b, v21.16b \n"
1824 "AESE v0.16b, v9.16b \n"
1825 "AESMC v0.16b, v0.16b \n"
1826 "PMULL v20.1q, v19.1d, v23.1d \n"
1827 "EOR v19.16b, v18.16b, v20.16b \n"
1828 "AESE v0.16b, v10.16b \n"
1829 "RBIT v19.16b, v19.16b \n"
1830 "EOR v0.16b, v0.16b, v11.16b \n"
1831 "EOR v19.16b, v19.16b, v0.16b \n"
1832 "STR q19, [%[out]] \n"
1833
1834 :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr)
1835 :[tag] "0" (sPt), [Key] "1" (keyPt),
1836 [ctr] "2" (iCtr) , [h] "m" (aes->H)
1837 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5",
1838 "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14",
1839 "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23","v24"
1840 );
1841
1842
1843 if (authTagSz > AES_BLOCK_SIZE) {
1844 XMEMCPY(authTag, scratch, AES_BLOCK_SIZE);
1845 }
1846 else {
1847 /* authTagSz can be smaller than AES_BLOCK_SIZE */
1848 XMEMCPY(authTag, scratch, authTagSz);
1849 }
1850 return 0;
1851 }
1852 #endif /* WOLFSSL_AES_128 */
1853
1854 #ifdef WOLFSSL_AES_192
1855 /* internal function : see wc_AesGcmEncrypt */
Aes192GcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)1856 static int Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
1857 const byte* iv, word32 ivSz,
1858 byte* authTag, word32 authTagSz,
1859 const byte* authIn, word32 authInSz)
1860 {
1861 word32 blocks;
1862 word32 partial;
1863 byte counter[AES_BLOCK_SIZE];
1864 byte initialCounter[AES_BLOCK_SIZE];
1865 byte x[AES_BLOCK_SIZE];
1866 byte scratch[AES_BLOCK_SIZE];
1867
1868 /* Noticed different optimization levels treated head of array different.
1869 Some cases was stack pointer plus offset others was a regester containing
1870 address. To make uniform for passing in to inline assembly code am using
1871 pointers to the head of each local array.
1872 */
1873 byte* ctr = counter;
1874 byte* iCtr = initialCounter;
1875 byte* xPt = x;
1876 byte* sPt = scratch;
1877 byte* keyPt; /* pointer to handle pointer advencment */
1878
1879 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
1880 if (ivSz == GCM_NONCE_MID_SZ) {
1881 XMEMCPY(initialCounter, iv, ivSz);
1882 initialCounter[AES_BLOCK_SIZE - 1] = 1;
1883 }
1884 else {
1885 GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
1886 GMULT(initialCounter, aes->H);
1887 }
1888 XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE);
1889
1890
1891 /* Hash in the Additional Authentication Data */
1892 XMEMSET(x, 0, AES_BLOCK_SIZE);
1893 if (authInSz != 0 && authIn != NULL) {
1894 blocks = authInSz / AES_BLOCK_SIZE;
1895 partial = authInSz % AES_BLOCK_SIZE;
1896 /* do as many blocks as possible */
1897 while (blocks--) {
1898 xorbuf(x, authIn, AES_BLOCK_SIZE);
1899 GMULT(x, aes->H);
1900 authIn += AES_BLOCK_SIZE;
1901 }
1902 if (partial != 0) {
1903 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1904 XMEMCPY(scratch, authIn, partial);
1905 xorbuf(x, scratch, AES_BLOCK_SIZE);
1906 GMULT(x, aes->H);
1907 }
1908 }
1909
1910 /* do as many blocks as possible */
1911 blocks = sz / AES_BLOCK_SIZE;
1912 partial = sz % AES_BLOCK_SIZE;
1913 if (blocks > 0) {
1914 keyPt = (byte*)aes->key;
1915 __asm__ __volatile__ (
1916 "MOV w11, %w[blocks] \n"
1917 "LD1 {v13.2d}, [%[ctr]] \n"
1918
1919 "#Create vector with the value 1 \n"
1920 "MOVI v14.16b, #1 \n"
1921 "USHR v14.2d, v14.2d, #56 \n"
1922 "EOR v22.16b, v22.16b, v22.16b \n"
1923 "EXT v14.16b, v14.16b, v22.16b, #8\n"
1924
1925
1926 /***************************************************
1927 Get first out block for GHASH using AES encrypt
1928 ***************************************************/
1929 "REV64 v13.16b, v13.16b \n" /* network order */
1930 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1931 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1932 "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1933 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1934 "REV64 v13.16b, v13.16b \n" /* revert from network order */
1935 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1936 "MOV v0.16b, v13.16b \n"
1937 "AESE v0.16b, v1.16b \n"
1938 "AESMC v0.16b, v0.16b \n"
1939 "AESE v0.16b, v2.16b \n"
1940 "AESMC v0.16b, v0.16b \n"
1941 "AESE v0.16b, v3.16b \n"
1942 "AESMC v0.16b, v0.16b \n"
1943 "LD1 {v16.2d}, %[inY] \n"
1944 "AESE v0.16b, v4.16b \n"
1945 "AESMC v0.16b, v0.16b \n"
1946 "SUB w11, w11, #1 \n"
1947 "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
1948 "LD1 {v30.2d-v31.2d}, [%[Key]], #32\n"
1949 "AESE v0.16b, v5.16b \n"
1950 "AESMC v0.16b, v0.16b \n"
1951 "MOVI v23.16b, #0x87 \n"
1952 "AESE v0.16b, v6.16b \n"
1953 "AESMC v0.16b, v0.16b \n"
1954 "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */
1955 "AESE v0.16b, v7.16b \n"
1956 "AESMC v0.16b, v0.16b \n"
1957 "USHR v23.2d, v23.2d, #56 \n"
1958 "AESE v0.16b, v8.16b \n"
1959 "AESMC v0.16b, v0.16b \n"
1960 "LD1 {v12.2d}, [%[input]], #16 \n"
1961 "AESE v0.16b, v9.16b \n"
1962 "AESMC v0.16b, v0.16b \n"
1963 "AESE v0.16b, v10.16b \n"
1964 "AESMC v0.16b, v0.16b \n"
1965 "AESE v0.16b, v11.16b \n"
1966 "AESMC v0.16b, v0.16b \n"
1967 "AESE v0.16b, v30.16b \n"
1968 "EOR v0.16b, v0.16b, v31.16b \n"
1969
1970 "EOR v0.16b, v0.16b, v12.16b \n"
1971 "ST1 {v0.2d}, [%[out]], #16 \n"
1972 "MOV v15.16b, v0.16b \n"
1973
1974 "CBZ w11, 1f \n" /* only one block jump to final GHASH */
1975 "LD1 {v12.2d}, [%[input]], #16 \n"
1976
1977 /***************************************************
1978 Interweave GHASH and encrypt if more then 1 block
1979 ***************************************************/
1980 "2: \n"
1981 "REV64 v13.16b, v13.16b \n" /* network order */
1982 "EOR v15.16b, v17.16b, v15.16b \n"
1983 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1984 "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1985 "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */
1986 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1987 "REV64 v13.16b, v13.16b \n" /* revert from network order */
1988 "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */
1989 "MOV v0.16b, v13.16b \n"
1990 "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */
1991 "AESE v0.16b, v1.16b \n"
1992 "AESMC v0.16b, v0.16b \n"
1993 "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
1994 "AESE v0.16b, v2.16b \n"
1995 "AESMC v0.16b, v0.16b \n"
1996 "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */
1997 "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */
1998 "AESE v0.16b, v3.16b \n"
1999 "AESMC v0.16b, v0.16b \n"
2000 "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */
2001 "AESE v0.16b, v4.16b \n"
2002 "AESMC v0.16b, v0.16b \n"
2003 "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2004 "SUB w11, w11, #1 \n"
2005 "AESE v0.16b, v5.16b \n"
2006 "AESMC v0.16b, v0.16b \n"
2007 "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */
2008 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2009 "AESE v0.16b, v6.16b \n"
2010 "AESMC v0.16b, v0.16b \n"
2011 "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */
2012 "AESE v0.16b, v7.16b \n"
2013 "AESMC v0.16b, v0.16b \n"
2014 "PMULL2 v20.1q, v19.2d, v23.2d \n"
2015 "AESE v0.16b, v8.16b \n"
2016 "AESMC v0.16b, v0.16b \n"
2017 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
2018 "AESE v0.16b, v9.16b \n"
2019 "AESMC v0.16b, v0.16b \n"
2020 "AESE v0.16b, v10.16b \n"
2021 "AESMC v0.16b, v0.16b \n"
2022 "AESE v0.16b, v11.16b \n"
2023 "AESMC v0.16b, v0.16b \n"
2024 "EOR v19.16b, v19.16b, v21.16b \n"
2025 "AESE v0.16b, v30.16b \n"
2026 "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2027 "EOR v0.16b, v0.16b, v31.16b \n"
2028 "EOR v18.16b, v18.16b, v21.16b \n"
2029
2030 "EOR v0.16b, v0.16b, v12.16b \n"
2031 "PMULL v20.1q, v19.1d, v23.1d \n"
2032 "ST1 {v0.2d}, [%[out]], #16 \n"
2033 "EOR v19.16b, v18.16b, v20.16b \n"
2034 "MOV v15.16b, v0.16b \n"
2035 "RBIT v17.16b, v19.16b \n"
2036
2037 "CBZ w11, 1f \n"
2038 "LD1 {v12.2d}, [%[input]], #16 \n"
2039 "B 2b \n"
2040
2041 /***************************************************
2042 GHASH on last block
2043 ***************************************************/
2044 "1: \n"
2045 "EOR v15.16b, v17.16b, v15.16b \n"
2046 "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */
2047
2048 "#store current AES counter value \n"
2049 "ST1 {v13.2d}, [%[ctrOut]] \n"
2050 "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */
2051 "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */
2052 "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
2053 "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */
2054 "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */
2055 "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */
2056 "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2057 "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */
2058 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2059 "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */
2060
2061 "#Reduce product from multiplication \n"
2062 "PMULL2 v20.1q, v19.2d, v23.2d \n"
2063 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
2064 "EOR v19.16b, v19.16b, v21.16b \n"
2065 "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2066 "EOR v18.16b, v18.16b, v21.16b \n"
2067 "PMULL v20.1q, v19.1d, v23.1d \n"
2068 "EOR v19.16b, v18.16b, v20.16b \n"
2069 "RBIT v17.16b, v19.16b \n"
2070 "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */
2071
2072 :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in)
2073 ,[xOut] "=r" (xPt),"=m" (aes->H)
2074 :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2075 [input] "3" (in)
2076 ,[inX] "4" (xPt), [inY] "m" (aes->H)
2077 : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2078 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
2079 ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
2080 "v24","v25","v26","v27","v28","v29","v30","v31"
2081 );
2082 }
2083
2084 /* take care of partial block sizes leftover */
2085 if (partial != 0) {
2086 IncrementGcmCounter(counter);
2087 wc_AesEncrypt(aes, counter, scratch);
2088 xorbuf(scratch, in, partial);
2089 XMEMCPY(out, scratch, partial);
2090
2091 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2092 XMEMCPY(scratch, out, partial);
2093 xorbuf(x, scratch, AES_BLOCK_SIZE);
2094 GMULT(x, aes->H);
2095 }
2096
2097 /* Hash in the lengths of A and C in bits */
2098 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2099 FlattenSzInBits(&scratch[0], authInSz);
2100 FlattenSzInBits(&scratch[8], sz);
2101 xorbuf(x, scratch, AES_BLOCK_SIZE);
2102 XMEMCPY(scratch, x, AES_BLOCK_SIZE);
2103
2104 keyPt = (byte*)aes->key;
2105 __asm__ __volatile__ (
2106
2107 "LD1 {v16.16b}, [%[tag]] \n"
2108 "LD1 {v17.16b}, %[h] \n"
2109 "RBIT v16.16b, v16.16b \n"
2110
2111 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
2112 "PMULL v18.1q, v16.1d, v17.1d \n" /* a0 * b0 = C */
2113 "PMULL2 v19.1q, v16.2d, v17.2d \n" /* a1 * b1 = D */
2114 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
2115 "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */
2116 "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
2117 "LD1 {v30.2d-v31.2d}, [%[Key]], #32\n"
2118 "PMULL v21.1q, v16.1d, v20.1d \n" /* a0 * b1 = E */
2119 "PMULL2 v20.1q, v16.2d, v20.2d \n" /* a1 * b0 = F */
2120 "LD1 {v0.2d}, [%[ctr]] \n"
2121
2122 "#Set a register to all 0s using EOR \n"
2123 "EOR v22.16b, v22.16b, v22.16b \n"
2124 "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */
2125 "AESE v0.16b, v1.16b \n"
2126 "AESMC v0.16b, v0.16b \n"
2127 "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2128 "AESE v0.16b, v2.16b \n"
2129 "AESMC v0.16b, v0.16b \n"
2130 "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */
2131 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2132 "AESE v0.16b, v3.16b \n"
2133 "AESMC v0.16b, v0.16b \n"
2134 "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */
2135 "MOVI v23.16b, #0x87 \n"
2136 "AESE v0.16b, v4.16b \n"
2137 "AESMC v0.16b, v0.16b \n"
2138 "USHR v23.2d, v23.2d, #56 \n"
2139 "PMULL2 v20.1q, v19.2d, v23.2d \n"
2140 "AESE v0.16b, v5.16b \n"
2141 "AESMC v0.16b, v0.16b \n"
2142 "EXT v21.16b, v20.16b, v22.16b, #8 \n"
2143 "AESE v0.16b, v6.16b \n"
2144 "AESMC v0.16b, v0.16b \n"
2145 "EOR v19.16b, v19.16b, v21.16b \n"
2146 "AESE v0.16b, v7.16b \n"
2147 "AESMC v0.16b, v0.16b \n"
2148 "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2149 "AESE v0.16b, v8.16b \n"
2150 "AESMC v0.16b, v0.16b \n"
2151 "EOR v18.16b, v18.16b, v21.16b \n"
2152 "AESE v0.16b, v9.16b \n"
2153 "AESMC v0.16b, v0.16b \n"
2154 "AESE v0.16b, v10.16b \n"
2155 "AESMC v0.16b, v0.16b \n"
2156 "AESE v0.16b, v11.16b \n"
2157 "AESMC v0.16b, v0.16b \n"
2158 "PMULL v20.1q, v19.1d, v23.1d \n"
2159 "EOR v19.16b, v18.16b, v20.16b \n"
2160 "AESE v0.16b, v30.16b \n"
2161 "RBIT v19.16b, v19.16b \n"
2162 "EOR v0.16b, v0.16b, v31.16b \n"
2163 "EOR v19.16b, v19.16b, v0.16b \n"
2164 "STR q19, [%[out]] \n"
2165
2166 :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr)
2167 :[tag] "0" (sPt), [Key] "1" (keyPt),
2168 [ctr] "2" (iCtr) , [h] "m" (aes->H)
2169 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5",
2170 "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14",
2171 "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23","v24"
2172 );
2173
2174
2175 if (authTagSz > AES_BLOCK_SIZE) {
2176 XMEMCPY(authTag, scratch, AES_BLOCK_SIZE);
2177 }
2178 else {
2179 /* authTagSz can be smaller than AES_BLOCK_SIZE */
2180 XMEMCPY(authTag, scratch, authTagSz);
2181 }
2182
2183 return 0;
2184 }
2185 #endif /* WOLFSSL_AES_192 */
2186
2187 #ifdef WOLFSSL_AES_256
2188 /* internal function : see wc_AesGcmEncrypt */
Aes256GcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)2189 static int Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
2190 const byte* iv, word32 ivSz,
2191 byte* authTag, word32 authTagSz,
2192 const byte* authIn, word32 authInSz)
2193 {
2194 word32 blocks;
2195 word32 partial;
2196 byte counter[AES_BLOCK_SIZE];
2197 byte initialCounter[AES_BLOCK_SIZE];
2198 byte x[AES_BLOCK_SIZE];
2199 byte scratch[AES_BLOCK_SIZE];
2200
2201 /* Noticed different optimization levels treated head of array different.
2202 Some cases was stack pointer plus offset others was a regester containing
2203 address. To make uniform for passing in to inline assembly code am using
2204 pointers to the head of each local array.
2205 */
2206 byte* ctr = counter;
2207 byte* iCtr = initialCounter;
2208 byte* xPt = x;
2209 byte* sPt = scratch;
2210 byte* keyPt; /* pointer to handle pointer advencment */
2211
2212 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
2213 if (ivSz == GCM_NONCE_MID_SZ) {
2214 XMEMCPY(initialCounter, iv, ivSz);
2215 initialCounter[AES_BLOCK_SIZE - 1] = 1;
2216 }
2217 else {
2218 GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
2219 GMULT(initialCounter, aes->H);
2220 }
2221 XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE);
2222
2223
2224 /* Hash in the Additional Authentication Data */
2225 XMEMSET(x, 0, AES_BLOCK_SIZE);
2226 if (authInSz != 0 && authIn != NULL) {
2227 blocks = authInSz / AES_BLOCK_SIZE;
2228 partial = authInSz % AES_BLOCK_SIZE;
2229 /* do as many blocks as possible */
2230 while (blocks--) {
2231 xorbuf(x, authIn, AES_BLOCK_SIZE);
2232 GMULT(x, aes->H);
2233 authIn += AES_BLOCK_SIZE;
2234 }
2235 if (partial != 0) {
2236 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2237 XMEMCPY(scratch, authIn, partial);
2238 xorbuf(x, scratch, AES_BLOCK_SIZE);
2239 GMULT(x, aes->H);
2240 }
2241 }
2242
2243 /* do as many blocks as possible */
2244 blocks = sz / AES_BLOCK_SIZE;
2245 partial = sz % AES_BLOCK_SIZE;
2246 if (blocks > 0) {
2247 keyPt = (byte*)aes->key;
2248 __asm__ __volatile__ (
2249 "MOV w11, %w[blocks] \n"
2250 "LD1 {v13.2d}, [%[ctr]] \n"
2251
2252 "#Create vector with the value 1 \n"
2253 "MOVI v14.16b, #1 \n"
2254 "USHR v14.2d, v14.2d, #56 \n"
2255 "EOR v22.16b, v22.16b, v22.16b \n"
2256 "EXT v14.16b, v14.16b, v22.16b, #8\n"
2257
2258
2259 /***************************************************
2260 Get first out block for GHASH using AES encrypt
2261 ***************************************************/
2262 "REV64 v13.16b, v13.16b \n" /* network order */
2263 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
2264 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
2265 "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
2266 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
2267 "REV64 v13.16b, v13.16b \n" /* revert from network order */
2268 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
2269 "MOV v0.16b, v13.16b \n"
2270 "AESE v0.16b, v1.16b \n"
2271 "AESMC v0.16b, v0.16b \n"
2272 "AESE v0.16b, v2.16b \n"
2273 "AESMC v0.16b, v0.16b \n"
2274 "AESE v0.16b, v3.16b \n"
2275 "AESMC v0.16b, v0.16b \n"
2276 "LD1 {v16.2d}, %[inY] \n"
2277 "AESE v0.16b, v4.16b \n"
2278 "AESMC v0.16b, v0.16b \n"
2279 "SUB w11, w11, #1 \n"
2280 "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
2281 "LD1 {v28.2d-v31.2d}, [%[Key]], #64\n"
2282 "AESE v0.16b, v5.16b \n"
2283 "AESMC v0.16b, v0.16b \n"
2284 "MOVI v23.16b, #0x87 \n"
2285 "AESE v0.16b, v6.16b \n"
2286 "AESMC v0.16b, v0.16b \n"
2287 "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */
2288 "AESE v0.16b, v7.16b \n"
2289 "AESMC v0.16b, v0.16b \n"
2290 "USHR v23.2d, v23.2d, #56 \n"
2291 "AESE v0.16b, v8.16b \n"
2292 "AESMC v0.16b, v0.16b \n"
2293 "LD1 {v12.2d}, [%[input]], #16 \n"
2294 "AESE v0.16b, v9.16b \n"
2295 "AESMC v0.16b, v0.16b \n"
2296 "AESE v0.16b, v10.16b \n"
2297 "AESMC v0.16b, v0.16b \n"
2298 "AESE v0.16b, v11.16b \n"
2299 "AESMC v0.16b, v0.16b \n"
2300 "AESE v0.16b, v28.16b \n"
2301 "AESMC v0.16b, v0.16b \n"
2302 "AESE v0.16b, v29.16b \n"
2303 "AESMC v0.16b, v0.16b \n"
2304 "AESE v0.16b, v30.16b \n"
2305 "EOR v0.16b, v0.16b, v31.16b \n"
2306
2307 "EOR v0.16b, v0.16b, v12.16b \n"
2308 "ST1 {v0.2d}, [%[out]], #16 \n"
2309 "MOV v15.16b, v0.16b \n"
2310
2311 "CBZ w11, 1f \n" /* only one block jump to final GHASH */
2312 "LD1 {v12.2d}, [%[input]], #16 \n"
2313
2314 /***************************************************
2315 Interweave GHASH and encrypt if more then 1 block
2316 ***************************************************/
2317 "2: \n"
2318 "REV64 v13.16b, v13.16b \n" /* network order */
2319 "EOR v15.16b, v17.16b, v15.16b \n"
2320 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
2321 "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
2322 "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */
2323 "EXT v13.16b, v13.16b, v13.16b, #8 \n"
2324 "REV64 v13.16b, v13.16b \n" /* revert from network order */
2325 "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */
2326 "MOV v0.16b, v13.16b \n"
2327 "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */
2328 "AESE v0.16b, v1.16b \n"
2329 "AESMC v0.16b, v0.16b \n"
2330 "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
2331 "AESE v0.16b, v2.16b \n"
2332 "AESMC v0.16b, v0.16b \n"
2333 "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */
2334 "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */
2335 "AESE v0.16b, v3.16b \n"
2336 "AESMC v0.16b, v0.16b \n"
2337 "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */
2338 "AESE v0.16b, v4.16b \n"
2339 "AESMC v0.16b, v0.16b \n"
2340 "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2341 "SUB w11, w11, #1 \n"
2342 "AESE v0.16b, v5.16b \n"
2343 "AESMC v0.16b, v0.16b \n"
2344 "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */
2345 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2346 "AESE v0.16b, v6.16b \n"
2347 "AESMC v0.16b, v0.16b \n"
2348 "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */
2349 "AESE v0.16b, v7.16b \n"
2350 "AESMC v0.16b, v0.16b \n"
2351 "PMULL2 v20.1q, v19.2d, v23.2d \n"
2352 "AESE v0.16b, v8.16b \n"
2353 "AESMC v0.16b, v0.16b \n"
2354 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
2355 "AESE v0.16b, v9.16b \n"
2356 "AESMC v0.16b, v0.16b \n"
2357 "AESE v0.16b, v10.16b \n"
2358 "AESMC v0.16b, v0.16b \n"
2359 "AESE v0.16b, v11.16b \n"
2360 "AESMC v0.16b, v0.16b \n"
2361 "AESE v0.16b, v28.16b \n"
2362 "AESMC v0.16b, v0.16b \n"
2363 "AESE v0.16b, v29.16b \n"
2364 "AESMC v0.16b, v0.16b \n"
2365 "EOR v19.16b, v19.16b, v21.16b \n"
2366 "AESE v0.16b, v30.16b \n"
2367 "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2368 "EOR v0.16b, v0.16b, v31.16b \n"
2369 "EOR v18.16b, v18.16b, v21.16b \n"
2370
2371 "EOR v0.16b, v0.16b, v12.16b \n"
2372 "PMULL v20.1q, v19.1d, v23.1d \n"
2373 "ST1 {v0.2d}, [%[out]], #16 \n"
2374 "EOR v19.16b, v18.16b, v20.16b \n"
2375 "MOV v15.16b, v0.16b \n"
2376 "RBIT v17.16b, v19.16b \n"
2377
2378 "CBZ w11, 1f \n"
2379 "LD1 {v12.2d}, [%[input]], #16 \n"
2380 "B 2b \n"
2381
2382 /***************************************************
2383 GHASH on last block
2384 ***************************************************/
2385 "1: \n"
2386 "EOR v15.16b, v17.16b, v15.16b \n"
2387 "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */
2388
2389 "#store current AES counter value \n"
2390 "ST1 {v13.2d}, [%[ctrOut]] \n"
2391 "PMULL v18.1q, v15.1d, v16.1d \n" /* a0 * b0 = C */
2392 "PMULL2 v19.1q, v15.2d, v16.2d \n" /* a1 * b1 = D */
2393 "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
2394 "PMULL v21.1q, v15.1d, v20.1d \n" /* a0 * b1 = E */
2395 "PMULL2 v20.1q, v15.2d, v20.2d \n" /* a1 * b0 = F */
2396 "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */
2397 "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2398 "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */
2399 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2400 "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */
2401
2402 "#Reduce product from multiplication \n"
2403 "PMULL2 v20.1q, v19.2d, v23.2d \n"
2404 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
2405 "EOR v19.16b, v19.16b, v21.16b \n"
2406 "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2407 "EOR v18.16b, v18.16b, v21.16b \n"
2408 "PMULL v20.1q, v19.1d, v23.1d \n"
2409 "EOR v19.16b, v18.16b, v20.16b \n"
2410 "RBIT v17.16b, v19.16b \n"
2411 "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */
2412
2413 :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in)
2414 ,[xOut] "=r" (xPt),"=m" (aes->H)
2415 :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2416 [input] "3" (in)
2417 ,[inX] "4" (xPt), [inY] "m" (aes->H)
2418 : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2419 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
2420 ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24"
2421 );
2422 }
2423
2424 /* take care of partial block sizes leftover */
2425 if (partial != 0) {
2426 IncrementGcmCounter(counter);
2427 wc_AesEncrypt(aes, counter, scratch);
2428 xorbuf(scratch, in, partial);
2429 XMEMCPY(out, scratch, partial);
2430
2431 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2432 XMEMCPY(scratch, out, partial);
2433 xorbuf(x, scratch, AES_BLOCK_SIZE);
2434 GMULT(x, aes->H);
2435 }
2436
2437 /* Hash in the lengths of A and C in bits */
2438 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2439 FlattenSzInBits(&scratch[0], authInSz);
2440 FlattenSzInBits(&scratch[8], sz);
2441 xorbuf(x, scratch, AES_BLOCK_SIZE);
2442 XMEMCPY(scratch, x, AES_BLOCK_SIZE);
2443
2444 keyPt = (byte*)aes->key;
2445 __asm__ __volatile__ (
2446
2447 "LD1 {v16.16b}, [%[tag]] \n"
2448 "LD1 {v17.16b}, %[h] \n"
2449 "RBIT v16.16b, v16.16b \n"
2450
2451 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
2452 "PMULL v18.1q, v16.1d, v17.1d \n" /* a0 * b0 = C */
2453 "PMULL2 v19.1q, v16.2d, v17.2d \n" /* a1 * b1 = D */
2454 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
2455 "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */
2456 "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
2457 "LD1 {v28.2d-v31.2d}, [%[Key]], #64\n"
2458 "PMULL v21.1q, v16.1d, v20.1d \n" /* a0 * b1 = E */
2459 "PMULL2 v20.1q, v16.2d, v20.2d \n" /* a1 * b0 = F */
2460 "LD1 {v0.2d}, [%[ctr]] \n"
2461
2462 "#Set a register to all 0s using EOR \n"
2463 "EOR v22.16b, v22.16b, v22.16b \n"
2464 "EOR v20.16b, v20.16b, v21.16b \n" /* F ^ E */
2465 "AESE v0.16b, v1.16b \n"
2466 "AESMC v0.16b, v0.16b \n"
2467 "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2468 "AESE v0.16b, v2.16b \n"
2469 "AESMC v0.16b, v0.16b \n"
2470 "EOR v18.16b, v18.16b, v21.16b \n" /* low 128 bits in v3 */
2471 "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2472 "AESE v0.16b, v3.16b \n"
2473 "AESMC v0.16b, v0.16b \n"
2474 "EOR v19.16b, v19.16b, v21.16b \n" /* high 128 bits in v4 */
2475 "MOVI v23.16b, #0x87 \n"
2476 "AESE v0.16b, v4.16b \n"
2477 "AESMC v0.16b, v0.16b \n"
2478 "USHR v23.2d, v23.2d, #56 \n"
2479 "PMULL2 v20.1q, v19.2d, v23.2d \n"
2480 "AESE v0.16b, v5.16b \n"
2481 "AESMC v0.16b, v0.16b \n"
2482 "EXT v21.16b, v20.16b, v22.16b, #8 \n"
2483 "AESE v0.16b, v6.16b \n"
2484 "AESMC v0.16b, v0.16b \n"
2485 "EOR v19.16b, v19.16b, v21.16b \n"
2486 "AESE v0.16b, v7.16b \n"
2487 "AESMC v0.16b, v0.16b \n"
2488 "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2489 "AESE v0.16b, v8.16b \n"
2490 "AESMC v0.16b, v0.16b \n"
2491 "EOR v18.16b, v18.16b, v21.16b \n"
2492 "AESE v0.16b, v9.16b \n"
2493 "AESMC v0.16b, v0.16b \n"
2494 "AESE v0.16b, v10.16b \n"
2495 "AESMC v0.16b, v0.16b \n"
2496 "AESE v0.16b, v11.16b \n"
2497 "AESMC v0.16b, v0.16b \n"
2498 "AESE v0.16b, v28.16b \n"
2499 "AESMC v0.16b, v0.16b \n"
2500 "AESE v0.16b, v29.16b \n"
2501 "AESMC v0.16b, v0.16b \n"
2502 "PMULL v20.1q, v19.1d, v23.1d \n"
2503 "EOR v19.16b, v18.16b, v20.16b \n"
2504 "AESE v0.16b, v30.16b \n"
2505 "RBIT v19.16b, v19.16b \n"
2506 "EOR v0.16b, v0.16b, v31.16b \n"
2507 "EOR v19.16b, v19.16b, v0.16b \n"
2508 "STR q19, [%[out]] \n"
2509
2510 :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr)
2511 :[tag] "0" (sPt), [Key] "1" (keyPt),
2512 [ctr] "2" (iCtr) , [h] "m" (aes->H)
2513 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5",
2514 "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14",
2515 "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23",
2516 "v24","v25","v26","v27","v28","v29","v30","v31"
2517 );
2518
2519
2520 if (authTagSz > AES_BLOCK_SIZE) {
2521 XMEMCPY(authTag, scratch, AES_BLOCK_SIZE);
2522 }
2523 else {
2524 /* authTagSz can be smaller than AES_BLOCK_SIZE */
2525 XMEMCPY(authTag, scratch, authTagSz);
2526 }
2527
2528 return 0;
2529 }
2530 #endif /* WOLFSSL_AES_256 */
2531
2532
2533 /* aarch64 with PMULL and PMULL2
2534 * Encrypt and tag data using AES with GCM mode.
2535 * aes: Aes structure having already been set with set key function
2536 * out: encrypted data output buffer
2537 * in: plain text input buffer
2538 * sz: size of plain text and out buffer
2539 * iv: initialization vector
2540 * ivSz: size of iv buffer
2541 * authTag: buffer to hold tag
2542 * authTagSz: size of tag buffer
2543 * authIn: additional data buffer
2544 * authInSz: size of additional data buffer
2545 *
2546 * Notes:
2547 * GHASH multiplication based from Algorithm 1 from Intel GCM white paper.
2548 * "Carry-Less Multiplication and Its Usage for Computing the GCM Mode"
2549 *
2550 * GHASH reduction Based from White Paper "Implementing GCM on ARMv8"
2551 * by Conrado P.L. Gouvea and Julio Lopez reduction on 256bit value using
2552 * Algorithm 5
2553 */
wc_AesGcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)2554 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
2555 const byte* iv, word32 ivSz,
2556 byte* authTag, word32 authTagSz,
2557 const byte* authIn, word32 authInSz)
2558 {
2559 /* sanity checks */
2560 if (aes == NULL || (iv == NULL && ivSz > 0) ||
2561 (authTag == NULL) ||
2562 (authIn == NULL && authInSz > 0) ||
2563 (ivSz == 0)) {
2564 WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
2565 return BAD_FUNC_ARG;
2566 }
2567
2568 if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || authTagSz > AES_BLOCK_SIZE) {
2569 WOLFSSL_MSG("GcmEncrypt authTagSz error");
2570 return BAD_FUNC_ARG;
2571 }
2572
2573 switch (aes->rounds) {
2574 #ifdef WOLFSSL_AES_128
2575 case 10:
2576 return Aes128GcmEncrypt(aes, out, in, sz, iv, ivSz,
2577 authTag, authTagSz, authIn, authInSz);
2578 #endif
2579 #ifdef WOLFSSL_AES_192
2580 case 12:
2581 return Aes192GcmEncrypt(aes, out, in, sz, iv, ivSz,
2582 authTag, authTagSz, authIn, authInSz);
2583 #endif
2584 #ifdef WOLFSSL_AES_256
2585 case 14:
2586 return Aes256GcmEncrypt(aes, out, in, sz, iv, ivSz,
2587 authTag, authTagSz, authIn, authInSz);
2588 #endif
2589 default:
2590 WOLFSSL_MSG("AES-GCM invalid round number");
2591 return BAD_FUNC_ARG;
2592 }
2593 }
2594
2595
2596 #ifdef HAVE_AES_DECRYPT
2597 /*
2598 * Check tag and decrypt data using AES with GCM mode.
2599 * aes: Aes structure having already been set with set key function
2600 * out: decrypted data output buffer
2601 * in: cipher text buffer
2602 * sz: size of plain text and out buffer
2603 * iv: initialization vector
2604 * ivSz: size of iv buffer
2605 * authTag: buffer holding tag
2606 * authTagSz: size of tag buffer
2607 * authIn: additional data buffer
2608 * authInSz: size of additional data buffer
2609 */
wc_AesGcmDecrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,const byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)2610 int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
2611 const byte* iv, word32 ivSz,
2612 const byte* authTag, word32 authTagSz,
2613 const byte* authIn, word32 authInSz)
2614 {
2615 word32 blocks = sz / AES_BLOCK_SIZE;
2616 word32 partial = sz % AES_BLOCK_SIZE;
2617 const byte* c = in;
2618 byte* p = out;
2619 byte counter[AES_BLOCK_SIZE];
2620 byte initialCounter[AES_BLOCK_SIZE];
2621 byte *ctr ;
2622 byte scratch[AES_BLOCK_SIZE];
2623
2624 ctr = counter ;
2625
2626 /* sanity checks */
2627 if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
2628 authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 ||
2629 ivSz == 0) {
2630 WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
2631 return BAD_FUNC_ARG;
2632 }
2633
2634 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
2635 if (ivSz == GCM_NONCE_MID_SZ) {
2636 XMEMCPY(initialCounter, iv, ivSz);
2637 initialCounter[AES_BLOCK_SIZE - 1] = 1;
2638 }
2639 else {
2640 GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
2641 GMULT(initialCounter, aes->H);
2642 }
2643 XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
2644
2645 /* Calculate the authTag again using the received auth data and the
2646 * cipher text. */
2647 {
2648 byte Tprime[AES_BLOCK_SIZE];
2649 byte EKY0[AES_BLOCK_SIZE];
2650
2651 GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime));
2652 GMULT(Tprime, aes->H);
2653 wc_AesEncrypt(aes, ctr, EKY0);
2654 xorbuf(Tprime, EKY0, sizeof(Tprime));
2655
2656 if (ConstantCompare(authTag, Tprime, authTagSz) != 0) {
2657 return AES_GCM_AUTH_E;
2658 }
2659 }
2660
2661 /* do as many blocks as possible */
2662 if (blocks > 0) {
2663 /* pointer needed because it is incremented when read, causing
2664 * an issue with call to encrypt/decrypt leftovers */
2665 byte* keyPt = (byte*)aes->key;
2666 switch(aes->rounds) {
2667 #ifdef WOLFSSL_AES_128
2668 case 10: /* AES 128 BLOCK */
2669 __asm__ __volatile__ (
2670 "MOV w11, %w[blocks] \n"
2671 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
2672
2673 "#Create vector with the value 1 \n"
2674 "MOVI v14.16b, #1 \n"
2675 "USHR v14.2d, v14.2d, #56 \n"
2676 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
2677 "EOR v13.16b, v13.16b, v13.16b \n"
2678 "EXT v14.16b, v14.16b, v13.16b, #8 \n"
2679
2680 "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n"
2681 "LD1 {v12.2d}, [%[ctr]] \n"
2682 "LD1 {v13.2d}, [%[input]], #16 \n"
2683
2684 "1: \n"
2685 "REV64 v12.16b, v12.16b \n" /* network order */
2686 "EXT v12.16b, v12.16b, v12.16b, #8 \n"
2687 "ADD v12.2d, v12.2d, v14.2d \n" /* add 1 to counter */
2688 "EXT v12.16b, v12.16b, v12.16b, #8 \n"
2689 "REV64 v12.16b, v12.16b \n" /* revert from network order */
2690 "MOV v0.16b, v12.16b \n"
2691 "AESE v0.16b, v1.16b \n"
2692 "AESMC v0.16b, v0.16b \n"
2693 "AESE v0.16b, v2.16b \n"
2694 "AESMC v0.16b, v0.16b \n"
2695 "AESE v0.16b, v3.16b \n"
2696 "AESMC v0.16b, v0.16b \n"
2697 "AESE v0.16b, v4.16b \n"
2698 "AESMC v0.16b, v0.16b \n"
2699 "SUB w11, w11, #1 \n"
2700 "AESE v0.16b, v5.16b \n"
2701 "AESMC v0.16b, v0.16b \n"
2702 "AESE v0.16b, v6.16b \n"
2703 "AESMC v0.16b, v0.16b \n"
2704 "AESE v0.16b, v7.16b \n"
2705 "AESMC v0.16b, v0.16b \n"
2706 "AESE v0.16b, v8.16b \n"
2707 "AESMC v0.16b, v0.16b \n"
2708 "AESE v0.16b, v9.16b \n"
2709 "AESMC v0.16b, v0.16b \n"
2710 "AESE v0.16b, v10.16b \n"
2711 "EOR v0.16b, v0.16b, v11.16b \n"
2712
2713 "EOR v0.16b, v0.16b, v13.16b \n"
2714 "ST1 {v0.2d}, [%[out]], #16 \n"
2715
2716 "CBZ w11, 2f \n"
2717 "LD1 {v13.2d}, [%[input]], #16 \n"
2718 "B 1b \n"
2719
2720 "2: \n"
2721 "#store current counter value at the end \n"
2722 "ST1 {v12.16b}, [%[ctrOut]] \n"
2723
2724 :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c)
2725 :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2726 [input] "3" (c)
2727 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2728 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
2729 );
2730 break;
2731 #endif
2732 #ifdef WOLFSSL_AES_192
2733 case 12: /* AES 192 BLOCK */
2734 __asm__ __volatile__ (
2735 "MOV w11, %w[blocks] \n"
2736 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
2737
2738 "#Create vector with the value 1 \n"
2739 "MOVI v16.16b, #1 \n"
2740 "USHR v16.2d, v16.2d, #56 \n"
2741 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
2742 "EOR v14.16b, v14.16b, v14.16b \n"
2743 "EXT v16.16b, v16.16b, v14.16b, #8 \n"
2744
2745 "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n"
2746 "LD1 {v13.2d}, [%[Key]], #16 \n"
2747 "LD1 {v14.2d}, [%[ctr]] \n"
2748 "LD1 {v15.2d}, [%[input]], #16 \n"
2749
2750 "1: \n"
2751 "REV64 v14.16b, v14.16b \n" /* network order */
2752 "EXT v14.16b, v14.16b, v14.16b, #8 \n"
2753 "ADD v14.2d, v14.2d, v16.2d \n" /* add 1 to counter */
2754 "EXT v14.16b, v14.16b, v14.16b, #8 \n"
2755 "REV64 v14.16b, v14.16b \n" /* revert from network order */
2756 "MOV v0.16b, v14.16b \n"
2757 "AESE v0.16b, v1.16b \n"
2758 "AESMC v0.16b, v0.16b \n"
2759 "AESE v0.16b, v2.16b \n"
2760 "AESMC v0.16b, v0.16b \n"
2761 "AESE v0.16b, v3.16b \n"
2762 "AESMC v0.16b, v0.16b \n"
2763 "AESE v0.16b, v4.16b \n"
2764 "AESMC v0.16b, v0.16b \n"
2765 "SUB w11, w11, #1 \n"
2766 "AESE v0.16b, v5.16b \n"
2767 "AESMC v0.16b, v0.16b \n"
2768 "AESE v0.16b, v6.16b \n"
2769 "AESMC v0.16b, v0.16b \n"
2770 "AESE v0.16b, v7.16b \n"
2771 "AESMC v0.16b, v0.16b \n"
2772 "AESE v0.16b, v8.16b \n"
2773 "AESMC v0.16b, v0.16b \n"
2774 "AESE v0.16b, v9.16b \n"
2775 "AESMC v0.16b, v0.16b \n"
2776 "AESE v0.16b, v10.16b \n"
2777 "AESMC v0.16b, v0.16b \n"
2778 "AESE v0.16b, v11.16b \n"
2779 "AESMC v0.16b, v0.16b \n"
2780 "AESE v0.16b, v12.16b \n"
2781 "EOR v0.16b, v0.16b, v13.16b \n"
2782
2783 "EOR v0.16b, v0.16b, v15.16b \n"
2784 "ST1 {v0.2d}, [%[out]], #16 \n"
2785
2786 "CBZ w11, 2f \n"
2787 "LD1 {v15.2d}, [%[input]], #16 \n"
2788 "B 1b \n"
2789
2790 "2: \n"
2791 "#store current counter value at the end \n"
2792 "ST1 {v14.2d}, [%[ctrOut]] \n"
2793
2794 :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c)
2795 :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2796 [input] "3" (c)
2797 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2798 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
2799 "v16"
2800 );
2801 break;
2802 #endif /* WOLFSSL_AES_192 */
2803 #ifdef WOLFSSL_AES_256
2804 case 14: /* AES 256 BLOCK */
2805 __asm__ __volatile__ (
2806 "MOV w11, %w[blocks] \n"
2807 "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
2808
2809 "#Create vector with the value 1 \n"
2810 "MOVI v18.16b, #1 \n"
2811 "USHR v18.2d, v18.2d, #56 \n"
2812 "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
2813 "EOR v19.16b, v19.16b, v19.16b \n"
2814 "EXT v18.16b, v18.16b, v19.16b, #8 \n"
2815
2816 "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n"
2817 "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n"
2818 "LD1 {v17.2d}, [%[ctr]] \n"
2819 "LD1 {v16.2d}, [%[input]], #16 \n"
2820
2821 "1: \n"
2822 "REV64 v17.16b, v17.16b \n" /* network order */
2823 "EXT v17.16b, v17.16b, v17.16b, #8 \n"
2824 "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */
2825 "EXT v17.16b, v17.16b, v17.16b, #8 \n"
2826 "REV64 v17.16b, v17.16b \n" /* revert from network order */
2827 "MOV v0.16b, v17.16b \n"
2828 "AESE v0.16b, v1.16b \n"
2829 "AESMC v0.16b, v0.16b \n"
2830 "AESE v0.16b, v2.16b \n"
2831 "AESMC v0.16b, v0.16b \n"
2832 "AESE v0.16b, v3.16b \n"
2833 "AESMC v0.16b, v0.16b \n"
2834 "AESE v0.16b, v4.16b \n"
2835 "AESMC v0.16b, v0.16b \n"
2836 "SUB w11, w11, #1 \n"
2837 "AESE v0.16b, v5.16b \n"
2838 "AESMC v0.16b, v0.16b \n"
2839 "AESE v0.16b, v6.16b \n"
2840 "AESMC v0.16b, v0.16b \n"
2841 "AESE v0.16b, v7.16b \n"
2842 "AESMC v0.16b, v0.16b \n"
2843 "AESE v0.16b, v8.16b \n"
2844 "AESMC v0.16b, v0.16b \n"
2845 "AESE v0.16b, v9.16b \n"
2846 "AESMC v0.16b, v0.16b \n"
2847 "AESE v0.16b, v10.16b \n"
2848 "AESMC v0.16b, v0.16b \n"
2849 "AESE v0.16b, v11.16b \n"
2850 "AESMC v0.16b, v0.16b \n"
2851 "AESE v0.16b, v12.16b \n"
2852 "AESMC v0.16b, v0.16b \n"
2853 "AESE v0.16b, v13.16b \n"
2854 "AESMC v0.16b, v0.16b \n"
2855 "AESE v0.16b, v14.16b \n"
2856 "EOR v0.16b, v0.16b, v15.16b \n"
2857
2858 "EOR v0.16b, v0.16b, v16.16b \n"
2859 "ST1 {v0.2d}, [%[out]], #16 \n"
2860
2861 "CBZ w11, 2f \n"
2862 "LD1 {v16.2d}, [%[input]], #16 \n"
2863 "B 1b \n"
2864
2865 "2: \n"
2866 "#store current counter value at the end \n"
2867 "ST1 {v17.2d}, [%[ctrOut]] \n"
2868
2869 :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c)
2870 :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2871 [input] "3" (c)
2872 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2873 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
2874 "v16", "v17", "v18", "v19"
2875 );
2876 break;
2877 #endif /* WOLFSSL_AES_256 */
2878 default:
2879 WOLFSSL_MSG("Bad AES-GCM round value");
2880 return BAD_FUNC_ARG;
2881 }
2882 }
2883 if (partial != 0) {
2884 IncrementGcmCounter(ctr);
2885 wc_AesEncrypt(aes, ctr, scratch);
2886
2887 /* check if pointer is null after main AES-GCM blocks
2888 * helps static analysis */
2889 if (p == NULL || c == NULL) {
2890 return BAD_STATE_E;
2891 }
2892 xorbuf(scratch, c, partial);
2893 XMEMCPY(p, scratch, partial);
2894 }
2895 return 0;
2896 }
2897
2898 #endif /* HAVE_AES_DECRYPT */
2899 #endif /* HAVE_AESGCM */
2900
2901
2902 /***************************************
2903 * not 64 bit so use 32 bit mode
2904 ****************************************/
2905 #else
2906
2907 /* AES CCM/GCM use encrypt direct but not decrypt */
2908 #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \
2909 defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
wc_AesEncrypt(Aes * aes,const byte * inBlock,byte * outBlock)2910 static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
2911 {
2912 /*
2913 AESE exor's input with round key
2914 shift rows of exor'ed result
2915 sub bytes for shifted rows
2916 */
2917
2918 word32* keyPt = aes->key;
2919 __asm__ __volatile__ (
2920 "VLD1.32 {q0}, [%[CtrIn]] \n"
2921 "VLDM %[Key]!, {q1-q4} \n"
2922
2923 "AESE.8 q0, q1\n"
2924 "AESMC.8 q0, q0\n"
2925 "AESE.8 q0, q2\n"
2926 "AESMC.8 q0, q0\n"
2927 "VLD1.32 {q1}, [%[Key]]! \n"
2928 "AESE.8 q0, q3\n"
2929 "AESMC.8 q0, q0\n"
2930 "VLD1.32 {q2}, [%[Key]]! \n"
2931 "AESE.8 q0, q4\n"
2932 "AESMC.8 q0, q0\n"
2933 "VLD1.32 {q3}, [%[Key]]! \n"
2934 "AESE.8 q0, q1\n"
2935 "AESMC.8 q0, q0\n"
2936 "VLD1.32 {q4}, [%[Key]]! \n"
2937 "AESE.8 q0, q2\n"
2938 "AESMC.8 q0, q0\n"
2939 "AESE.8 q0, q3\n"
2940 "AESMC.8 q0, q0\n"
2941 "VLD1.32 {q1}, [%[Key]]! \n"
2942 "AESE.8 q0, q4\n"
2943 "AESMC.8 q0, q0\n"
2944 "VLD1.32 {q2}, [%[Key]]! \n"
2945 "AESE.8 q0, q1\n"
2946 "AESMC.8 q0, q0\n"
2947 "AESE.8 q0, q2\n"
2948
2949 "MOV r12, %[R] \n"
2950 "CMP r12, #10 \n"
2951 "BEQ 1f \n"
2952 "VLD1.32 {q1}, [%[Key]]! \n"
2953 "AESMC.8 q0, q0\n"
2954 "VLD1.32 {q2}, [%[Key]]! \n"
2955 "AESE.8 q0, q1\n"
2956 "AESMC.8 q0, q0\n"
2957 "AESE.8 q0, q2\n"
2958
2959 "CMP r12, #12 \n"
2960 "BEQ 1f \n"
2961 "VLD1.32 {q1}, [%[Key]]! \n"
2962 "AESMC.8 q0, q0\n"
2963 "VLD1.32 {q2}, [%[Key]]! \n"
2964 "AESE.8 q0, q1\n"
2965 "AESMC.8 q0, q0\n"
2966 "AESE.8 q0, q2\n"
2967
2968 "#Final AddRoundKey then store result \n"
2969 "1: \n"
2970 "VLD1.32 {q1}, [%[Key]]! \n"
2971 "VEOR.32 q0, q0, q1\n"
2972 "VST1.32 {q0}, [%[CtrOut]] \n"
2973
2974 :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds),
2975 "=r" (inBlock)
2976 :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds),
2977 [CtrIn] "3" (inBlock)
2978 : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4"
2979 );
2980
2981 return 0;
2982 }
2983 #endif /* AES_GCM, AES_CCM, DIRECT or COUNTER */
2984 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
2985 #ifdef HAVE_AES_DECRYPT
wc_AesDecrypt(Aes * aes,const byte * inBlock,byte * outBlock)2986 static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
2987 {
2988 /*
2989 AESE exor's input with round key
2990 shift rows of exor'ed result
2991 sub bytes for shifted rows
2992 */
2993
2994 word32* keyPt = aes->key;
2995 __asm__ __volatile__ (
2996 "VLD1.32 {q0}, [%[CtrIn]] \n"
2997 "VLDM %[Key]!, {q1-q4} \n"
2998
2999 "AESD.8 q0, q1\n"
3000 "AESIMC.8 q0, q0\n"
3001 "AESD.8 q0, q2\n"
3002 "AESIMC.8 q0, q0\n"
3003 "VLD1.32 {q1}, [%[Key]]! \n"
3004 "AESD.8 q0, q3\n"
3005 "AESIMC.8 q0, q0\n"
3006 "VLD1.32 {q2}, [%[Key]]! \n"
3007 "AESD.8 q0, q4\n"
3008 "AESIMC.8 q0, q0\n"
3009 "VLD1.32 {q3}, [%[Key]]! \n"
3010 "AESD.8 q0, q1\n"
3011 "AESIMC.8 q0, q0\n"
3012 "VLD1.32 {q4}, [%[Key]]! \n"
3013 "AESD.8 q0, q2\n"
3014 "AESIMC.8 q0, q0\n"
3015 "AESD.8 q0, q3\n"
3016 "AESIMC.8 q0, q0\n"
3017 "VLD1.32 {q1}, [%[Key]]! \n"
3018 "AESD.8 q0, q4\n"
3019 "AESIMC.8 q0, q0\n"
3020 "VLD1.32 {q2}, [%[Key]]! \n"
3021 "AESD.8 q0, q1\n"
3022 "AESIMC.8 q0, q0\n"
3023 "AESD.8 q0, q2\n"
3024
3025 "MOV r12, %[R] \n"
3026 "CMP r12, #10 \n"
3027 "BEQ 1f \n"
3028 "VLD1.32 {q1}, [%[Key]]! \n"
3029 "AESIMC.8 q0, q0\n"
3030 "VLD1.32 {q2}, [%[Key]]! \n"
3031 "AESD.8 q0, q1\n"
3032 "AESIMC.8 q0, q0\n"
3033 "AESD.8 q0, q2\n"
3034
3035 "CMP r12, #12 \n"
3036 "BEQ 1f \n"
3037 "VLD1.32 {q1}, [%[Key]]! \n"
3038 "AESIMC.8 q0, q0\n"
3039 "VLD1.32 {q2}, [%[Key]]! \n"
3040 "AESD.8 q0, q1\n"
3041 "AESIMC.8 q0, q0\n"
3042 "AESD.8 q0, q2\n"
3043
3044 "#Final AddRoundKey then store result \n"
3045 "1: \n"
3046 "VLD1.32 {q1}, [%[Key]]! \n"
3047 "VEOR.32 q0, q0, q1\n"
3048 "VST1.32 {q0}, [%[CtrOut]] \n"
3049
3050 :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds),
3051 "=r" (inBlock)
3052 :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds),
3053 [CtrIn] "3" (inBlock)
3054 : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4"
3055 );
3056
3057 return 0;
3058 }
3059 #endif /* HAVE_AES_DECRYPT */
3060 #endif /* DIRECT or COUNTER */
3061
3062 /* AES-CBC */
3063 #ifdef HAVE_AES_CBC
wc_AesCbcEncrypt(Aes * aes,byte * out,const byte * in,word32 sz)3064 int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
3065 {
3066 word32 numBlocks = sz / AES_BLOCK_SIZE;
3067
3068 if (aes == NULL || out == NULL || in == NULL) {
3069 return BAD_FUNC_ARG;
3070 }
3071
3072 if (sz == 0) {
3073 return 0;
3074 }
3075
3076 #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
3077 if (sz % AES_BLOCK_SIZE) {
3078 return BAD_LENGTH_E;
3079 }
3080 #endif
3081
3082 /* do as many block size ops as possible */
3083 if (numBlocks > 0) {
3084 word32* keyPt = aes->key;
3085 word32* regPt = aes->reg;
3086 /*
3087 AESE exor's input with round key
3088 shift rows of exor'ed result
3089 sub bytes for shifted rows
3090
3091 note: grouping AESE & AESMC together as pairs reduces latency
3092 */
3093 switch(aes->rounds) {
3094 #ifdef WOLFSSL_AES_128
3095 case 10: /* AES 128 BLOCK */
3096 __asm__ __volatile__ (
3097 "MOV r11, %[blocks] \n"
3098 "VLD1.32 {q1}, [%[Key]]! \n"
3099 "VLD1.32 {q2}, [%[Key]]! \n"
3100 "VLD1.32 {q3}, [%[Key]]! \n"
3101 "VLD1.32 {q4}, [%[Key]]! \n"
3102 "VLD1.32 {q5}, [%[Key]]! \n"
3103 "VLD1.32 {q6}, [%[Key]]! \n"
3104 "VLD1.32 {q7}, [%[Key]]! \n"
3105 "VLD1.32 {q8}, [%[Key]]! \n"
3106 "VLD1.32 {q9}, [%[Key]]! \n"
3107 "VLD1.32 {q10}, [%[Key]]! \n"
3108 "VLD1.32 {q11}, [%[Key]]! \n"
3109 "VLD1.32 {q0}, [%[reg]] \n"
3110 "VLD1.32 {q12}, [%[input]]!\n"
3111
3112 "1:\n"
3113 "#CBC operations, xorbuf in with current aes->reg \n"
3114 "VEOR.32 q0, q0, q12 \n"
3115 "AESE.8 q0, q1 \n"
3116 "AESMC.8 q0, q0\n"
3117 "AESE.8 q0, q2 \n"
3118 "AESMC.8 q0, q0\n"
3119 "AESE.8 q0, q3 \n"
3120 "AESMC.8 q0, q0\n"
3121 "AESE.8 q0, q4 \n"
3122 "AESMC.8 q0, q0\n"
3123 "AESE.8 q0, q5 \n"
3124 "AESMC.8 q0, q0\n"
3125 "AESE.8 q0, q6 \n"
3126 "AESMC.8 q0, q0\n"
3127 "AESE.8 q0, q7 \n"
3128 "AESMC.8 q0, q0\n"
3129 "AESE.8 q0, q8 \n"
3130 "AESMC.8 q0, q0\n"
3131 "AESE.8 q0, q9 \n"
3132 "AESMC.8 q0, q0\n"
3133 "AESE.8 q0, q10\n"
3134 "VEOR.32 q0, q0, q11 \n"
3135 "SUB r11, r11, #1 \n"
3136 "VST1.32 {q0}, [%[out]]! \n"
3137
3138 "CMP r11, #0 \n"
3139 "BEQ 2f \n"
3140 "VLD1.32 {q12}, [%[input]]! \n"
3141 "B 1b \n"
3142
3143 "2:\n"
3144 "#store current counter value at the end \n"
3145 "VST1.32 {q0}, [%[regOut]] \n"
3146
3147 :[out] "=r" (out), [regOut] "=r" (regPt)
3148 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3149 [blocks] "r" (numBlocks), [reg] "1" (regPt)
3150 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3151 "q6", "q7", "q8", "q9", "q10", "q11", "q12"
3152 );
3153 break;
3154 #endif /* WOLFSSL_AES_128 */
3155 #ifdef WOLFSSL_AES_192
3156 case 12: /* AES 192 BLOCK */
3157 __asm__ __volatile__ (
3158 "MOV r11, %[blocks] \n"
3159 "VLD1.32 {q1}, [%[Key]]! \n"
3160 "VLD1.32 {q2}, [%[Key]]! \n"
3161 "VLD1.32 {q3}, [%[Key]]! \n"
3162 "VLD1.32 {q4}, [%[Key]]! \n"
3163 "VLD1.32 {q5}, [%[Key]]! \n"
3164 "VLD1.32 {q6}, [%[Key]]! \n"
3165 "VLD1.32 {q7}, [%[Key]]! \n"
3166 "VLD1.32 {q8}, [%[Key]]! \n"
3167 "VLD1.32 {q9}, [%[Key]]! \n"
3168 "VLD1.32 {q10}, [%[Key]]! \n"
3169 "VLD1.32 {q11}, [%[Key]]! \n"
3170 "VLD1.32 {q0}, [%[reg]] \n"
3171 "VLD1.32 {q12}, [%[input]]!\n"
3172 "VLD1.32 {q13}, [%[Key]]! \n"
3173 "VLD1.32 {q14}, [%[Key]]! \n"
3174
3175 "1:\n"
3176 "#CBC operations, xorbuf in with current aes->reg \n"
3177 "VEOR.32 q0, q0, q12 \n"
3178 "AESE.8 q0, q1 \n"
3179 "AESMC.8 q0, q0\n"
3180 "AESE.8 q0, q2 \n"
3181 "AESMC.8 q0, q0\n"
3182 "AESE.8 q0, q3 \n"
3183 "AESMC.8 q0, q0\n"
3184 "AESE.8 q0, q4 \n"
3185 "AESMC.8 q0, q0\n"
3186 "AESE.8 q0, q5 \n"
3187 "AESMC.8 q0, q0\n"
3188 "AESE.8 q0, q6 \n"
3189 "AESMC.8 q0, q0\n"
3190 "AESE.8 q0, q7 \n"
3191 "AESMC.8 q0, q0\n"
3192 "AESE.8 q0, q8 \n"
3193 "AESMC.8 q0, q0\n"
3194 "AESE.8 q0, q9 \n"
3195 "AESMC.8 q0, q0\n"
3196 "AESE.8 q0, q10 \n"
3197 "AESMC.8 q0, q0\n"
3198 "AESE.8 q0, q11 \n"
3199 "AESMC.8 q0, q0\n"
3200 "AESE.8 q0, q13\n"
3201 "VEOR.32 q0, q0, q14 \n"
3202 "SUB r11, r11, #1 \n"
3203 "VST1.32 {q0}, [%[out]]! \n"
3204
3205 "CMP r11, #0 \n"
3206 "BEQ 2f \n"
3207 "VLD1.32 {q12}, [%[input]]! \n"
3208 "B 1b \n"
3209
3210 "2:\n"
3211 "#store current counter qalue at the end \n"
3212 "VST1.32 {q0}, [%[regOut]] \n"
3213
3214 :[out] "=r" (out), [regOut] "=r" (regPt)
3215 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3216 [blocks] "r" (numBlocks), [reg] "1" (regPt)
3217 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3218 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14"
3219 );
3220 break;
3221 #endif /* WOLFSSL_AES_192 */
3222 #ifdef WOLFSSL_AES_256
3223 case 14: /* AES 256 BLOCK */
3224 __asm__ __volatile__ (
3225 "MOV r11, %[blocks] \n"
3226 "VLD1.32 {q1}, [%[Key]]! \n"
3227 "VLD1.32 {q2}, [%[Key]]! \n"
3228 "VLD1.32 {q3}, [%[Key]]! \n"
3229 "VLD1.32 {q4}, [%[Key]]! \n"
3230 "VLD1.32 {q5}, [%[Key]]! \n"
3231 "VLD1.32 {q6}, [%[Key]]! \n"
3232 "VLD1.32 {q7}, [%[Key]]! \n"
3233 "VLD1.32 {q8}, [%[Key]]! \n"
3234 "VLD1.32 {q9}, [%[Key]]! \n"
3235 "VLD1.32 {q10}, [%[Key]]! \n"
3236 "VLD1.32 {q11}, [%[Key]]! \n"
3237 "VLD1.32 {q0}, [%[reg]] \n"
3238 "VLD1.32 {q12}, [%[input]]!\n"
3239 "VLD1.32 {q13}, [%[Key]]! \n"
3240 "VLD1.32 {q14}, [%[Key]]! \n"
3241
3242 "1:\n"
3243 "#CBC operations, xorbuf in with current aes->reg \n"
3244 "VEOR.32 q0, q0, q12 \n"
3245 "AESE.8 q0, q1 \n"
3246 "AESMC.8 q0, q0\n"
3247 "AESE.8 q0, q2 \n"
3248 "AESMC.8 q0, q0\n"
3249 "AESE.8 q0, q3 \n"
3250 "AESMC.8 q0, q0\n"
3251 "AESE.8 q0, q4 \n"
3252 "AESMC.8 q0, q0\n"
3253 "AESE.8 q0, q5 \n"
3254 "AESMC.8 q0, q0\n"
3255 "AESE.8 q0, q6 \n"
3256 "AESMC.8 q0, q0\n"
3257 "AESE.8 q0, q7 \n"
3258 "AESMC.8 q0, q0\n"
3259 "AESE.8 q0, q8 \n"
3260 "AESMC.8 q0, q0\n"
3261 "AESE.8 q0, q9 \n"
3262 "AESMC.8 q0, q0\n"
3263 "AESE.8 q0, q10 \n"
3264 "AESMC.8 q0, q0\n"
3265 "AESE.8 q0, q11 \n"
3266 "AESMC.8 q0, q0\n"
3267 "AESE.8 q0, q13 \n"
3268 "AESMC.8 q0, q0\n"
3269 "VLD1.32 {q15}, [%[Key]]! \n"
3270 "AESE.8 q0, q14 \n"
3271 "AESMC.8 q0, q0\n"
3272 "AESE.8 q0, q15\n"
3273 "VLD1.32 {q15}, [%[Key]] \n"
3274 "VEOR.32 q0, q0, q15 \n"
3275 "SUB r11, r11, #1 \n"
3276 "VST1.32 {q0}, [%[out]]! \n"
3277 "SUB %[Key], %[Key], #16 \n"
3278
3279 "CMP r11, #0 \n"
3280 "BEQ 2f \n"
3281 "VLD1.32 {q12}, [%[input]]! \n"
3282 "B 1b \n"
3283
3284 "2:\n"
3285 "#store current counter qalue at the end \n"
3286 "VST1.32 {q0}, [%[regOut]] \n"
3287
3288 :[out] "=r" (out), [regOut] "=r" (regPt), "=r" (keyPt)
3289 :"0" (out), [Key] "2" (keyPt), [input] "r" (in),
3290 [blocks] "r" (numBlocks), [reg] "1" (regPt)
3291 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3292 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
3293 );
3294 break;
3295 #endif /* WOLFSSL_AES_256 */
3296 default:
3297 WOLFSSL_MSG("Bad AES-CBC round value");
3298 return BAD_FUNC_ARG;
3299 }
3300 }
3301
3302 return 0;
3303 }
3304
3305 #ifdef HAVE_AES_DECRYPT
wc_AesCbcDecrypt(Aes * aes,byte * out,const byte * in,word32 sz)3306 int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
3307 {
3308 word32 numBlocks = sz / AES_BLOCK_SIZE;
3309
3310 if (aes == NULL || out == NULL || in == NULL) {
3311 return BAD_FUNC_ARG;
3312 }
3313
3314 if (sz == 0) {
3315 return 0;
3316 }
3317
3318 if (sz % AES_BLOCK_SIZE) {
3319 #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
3320 return BAD_LENGTH_E;
3321 #else
3322 return BAD_FUNC_ARG;
3323 #endif
3324 }
3325
3326 /* do as many block size ops as possible */
3327 if (numBlocks > 0) {
3328 word32* keyPt = aes->key;
3329 word32* regPt = aes->reg;
3330 switch(aes->rounds) {
3331 #ifdef WOLFSSL_AES_128
3332 case 10: /* AES 128 BLOCK */
3333 __asm__ __volatile__ (
3334 "MOV r11, %[blocks] \n"
3335 "VLD1.32 {q1}, [%[Key]]! \n"
3336 "VLD1.32 {q2}, [%[Key]]! \n"
3337 "VLD1.32 {q3}, [%[Key]]! \n"
3338 "VLD1.32 {q4}, [%[Key]]! \n"
3339 "VLD1.32 {q5}, [%[Key]]! \n"
3340 "VLD1.32 {q6}, [%[Key]]! \n"
3341 "VLD1.32 {q7}, [%[Key]]! \n"
3342 "VLD1.32 {q8}, [%[Key]]! \n"
3343 "VLD1.32 {q9}, [%[Key]]! \n"
3344 "VLD1.32 {q10}, [%[Key]]! \n"
3345 "VLD1.32 {q11}, [%[Key]]! \n"
3346 "VLD1.32 {q13}, [%[reg]] \n"
3347 "VLD1.32 {q0}, [%[input]]!\n"
3348
3349 "1:\n"
3350 "VMOV.32 q12, q0 \n"
3351 "AESD.8 q0, q1\n"
3352 "AESIMC.8 q0, q0\n"
3353 "AESD.8 q0, q2\n"
3354 "AESIMC.8 q0, q0\n"
3355 "AESD.8 q0, q3\n"
3356 "AESIMC.8 q0, q0\n"
3357 "AESD.8 q0, q4\n"
3358 "AESIMC.8 q0, q0\n"
3359 "AESD.8 q0, q5\n"
3360 "AESIMC.8 q0, q0\n"
3361 "AESD.8 q0, q6\n"
3362 "AESIMC.8 q0, q0\n"
3363 "AESD.8 q0, q7\n"
3364 "AESIMC.8 q0, q0\n"
3365 "AESD.8 q0, q8\n"
3366 "AESIMC.8 q0, q0\n"
3367 "AESD.8 q0, q9\n"
3368 "AESIMC.8 q0, q0\n"
3369 "AESD.8 q0, q10\n"
3370 "VEOR.32 q0, q0, q11\n"
3371
3372 "VEOR.32 q0, q0, q13\n"
3373 "SUB r11, r11, #1 \n"
3374 "VST1.32 {q0}, [%[out]]! \n"
3375 "VMOV.32 q13, q12 \n"
3376
3377 "CMP r11, #0 \n"
3378 "BEQ 2f \n"
3379 "VLD1.32 {q0}, [%[input]]! \n"
3380 "B 1b \n"
3381
3382 "2: \n"
3383 "#store current counter qalue at the end \n"
3384 "VST1.32 {q13}, [%[regOut]] \n"
3385
3386 :[out] "=r" (out), [regOut] "=r" (regPt)
3387 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3388 [blocks] "r" (numBlocks), [reg] "1" (regPt)
3389 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3390 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13"
3391 );
3392 break;
3393 #endif /* WOLFSSL_AES_128 */
3394 #ifdef WOLFSSL_AES_192
3395 case 12: /* AES 192 BLOCK */
3396 __asm__ __volatile__ (
3397 "MOV r11, %[blocks] \n"
3398 "VLD1.32 {q1}, [%[Key]]! \n"
3399 "VLD1.32 {q2}, [%[Key]]! \n"
3400 "VLD1.32 {q3}, [%[Key]]! \n"
3401 "VLD1.32 {q4}, [%[Key]]! \n"
3402 "VLD1.32 {q5}, [%[Key]]! \n"
3403 "VLD1.32 {q6}, [%[Key]]! \n"
3404 "VLD1.32 {q7}, [%[Key]]! \n"
3405 "VLD1.32 {q8}, [%[Key]]! \n"
3406 "VLD1.32 {q9}, [%[Key]]! \n"
3407 "VLD1.32 {q10}, [%[Key]]! \n"
3408 "VLD1.32 {q11}, [%[Key]]! \n"
3409 "VLD1.32 {q12}, [%[Key]]! \n"
3410 "VLD1.32 {q13}, [%[Key]]! \n"
3411 "VLD1.32 {q14}, [%[reg]] \n"
3412 "VLD1.32 {q0}, [%[input]]!\n"
3413
3414 "1: \n"
3415 "VMOV.32 q15, q0 \n"
3416 "AESD.8 q0, q1\n"
3417 "AESIMC.8 q0, q0\n"
3418 "AESD.8 q0, q2\n"
3419 "AESIMC.8 q0, q0\n"
3420 "AESD.8 q0, q3\n"
3421 "AESIMC.8 q0, q0\n"
3422 "AESD.8 q0, q4\n"
3423 "AESIMC.8 q0, q0\n"
3424 "AESD.8 q0, q5\n"
3425 "AESIMC.8 q0, q0\n"
3426 "AESD.8 q0, q6\n"
3427 "AESIMC.8 q0, q0\n"
3428 "AESD.8 q0, q7\n"
3429 "AESIMC.8 q0, q0\n"
3430 "AESD.8 q0, q8\n"
3431 "AESIMC.8 q0, q0\n"
3432 "AESD.8 q0, q9\n"
3433 "AESIMC.8 q0, q0\n"
3434 "AESD.8 q0, q10\n"
3435 "AESIMC.8 q0, q0\n"
3436 "AESD.8 q0, q11\n"
3437 "AESIMC.8 q0, q0\n"
3438 "AESD.8 q0, q12\n"
3439 "VEOR.32 q0, q0, q13\n"
3440
3441 "VEOR.32 q0, q0, q14\n"
3442 "SUB r11, r11, #1 \n"
3443 "VST1.32 {q0}, [%[out]]! \n"
3444 "VMOV.32 q14, q15 \n"
3445
3446 "CMP r11, #0 \n"
3447 "BEQ 2f \n"
3448 "VLD1.32 {q0}, [%[input]]! \n"
3449 "B 1b \n"
3450
3451 "2:\n"
3452 "#store current counter value at the end \n"
3453 "VST1.32 {q15}, [%[regOut]] \n"
3454
3455 :[out] "=r" (out), [regOut] "=r" (regPt)
3456 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3457 [blocks] "r" (numBlocks), [reg] "1" (regPt)
3458 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3459 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
3460 );
3461 break;
3462 #endif /* WOLFSSL_AES_192 */
3463 #ifdef WOLFSSL_AES_256
3464 case 14: /* AES 256 BLOCK */
3465 __asm__ __volatile__ (
3466 "MOV r11, %[blocks] \n"
3467 "VLD1.32 {q1}, [%[Key]]! \n"
3468 "VLD1.32 {q2}, [%[Key]]! \n"
3469 "VLD1.32 {q3}, [%[Key]]! \n"
3470 "VLD1.32 {q4}, [%[Key]]! \n"
3471 "VLD1.32 {q5}, [%[Key]]! \n"
3472 "VLD1.32 {q6}, [%[Key]]! \n"
3473 "VLD1.32 {q7}, [%[Key]]! \n"
3474 "VLD1.32 {q8}, [%[Key]]! \n"
3475 "VLD1.32 {q9}, [%[Key]]! \n"
3476 "VLD1.32 {q10}, [%[Key]]! \n"
3477 "VLD1.32 {q11}, [%[Key]]! \n"
3478 "VLD1.32 {q12}, [%[Key]]! \n"
3479 "VLD1.32 {q14}, [%[reg]] \n"
3480 "VLD1.32 {q0}, [%[input]]!\n"
3481
3482 "1:\n"
3483 "VMOV.32 q15, q0 \n"
3484 "AESD.8 q0, q1\n"
3485 "AESIMC.8 q0, q0\n"
3486 "AESD.8 q0, q2\n"
3487 "AESIMC.8 q0, q0\n"
3488 "AESD.8 q0, q3\n"
3489 "AESIMC.8 q0, q0\n"
3490 "AESD.8 q0, q4\n"
3491 "AESIMC.8 q0, q0\n"
3492 "AESD.8 q0, q5\n"
3493 "AESIMC.8 q0, q0\n"
3494 "AESD.8 q0, q6\n"
3495 "AESIMC.8 q0, q0\n"
3496 "AESD.8 q0, q7\n"
3497 "AESIMC.8 q0, q0\n"
3498 "AESD.8 q0, q8\n"
3499 "AESIMC.8 q0, q0\n"
3500 "AESD.8 q0, q9\n"
3501 "AESIMC.8 q0, q0\n"
3502 "AESD.8 q0, q10\n"
3503 "AESIMC.8 q0, q0\n"
3504 "AESD.8 q0, q11\n"
3505 "AESIMC.8 q0, q0\n"
3506 "VLD1.32 {q13}, [%[Key]]! \n"
3507 "AESD.8 q0, q12\n"
3508 "AESIMC.8 q0, q0\n"
3509 "AESD.8 q0, q13\n"
3510 "AESIMC.8 q0, q0\n"
3511 "VLD1.32 {q13}, [%[Key]]! \n"
3512 "AESD.8 q0, q13\n"
3513 "VLD1.32 {q13}, [%[Key]] \n"
3514 "VEOR.32 q0, q0, q13\n"
3515 "SUB %[Key], %[Key], #32 \n"
3516
3517 "VEOR.32 q0, q0, q14\n"
3518 "SUB r11, r11, #1 \n"
3519 "VST1.32 {q0}, [%[out]]! \n"
3520 "VMOV.32 q14, q15 \n"
3521
3522 "CMP r11, #0 \n"
3523 "BEQ 2f \n"
3524 "VLD1.32 {q0}, [%[input]]! \n"
3525 "B 1b \n"
3526
3527 "2:\n"
3528 "#store current counter value at the end \n"
3529 "VST1.32 {q15}, [%[regOut]] \n"
3530
3531 :[out] "=r" (out), [regOut] "=r" (regPt)
3532 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3533 [blocks] "r" (numBlocks), [reg] "1" (regPt)
3534 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3535 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
3536 );
3537 break;
3538 #endif /* WOLFSSL_AES_256 */
3539 default:
3540 WOLFSSL_MSG("Bad AES-CBC round value");
3541 return BAD_FUNC_ARG;
3542 }
3543 }
3544
3545 return 0;
3546 }
3547 #endif
3548
3549 #endif /* HAVE_AES_CBC */
3550
3551 /* AES-CTR */
3552 #ifdef WOLFSSL_AES_COUNTER
3553
3554 /* Increment AES counter */
IncrementAesCounter(byte * inOutCtr)3555 static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
3556 {
3557 int i;
3558
3559 /* in network byte order so start at end and work back */
3560 for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
3561 if (++inOutCtr[i]) /* we're done unless we overflow */
3562 return;
3563 }
3564 }
3565
wc_AesCtrEncrypt(Aes * aes,byte * out,const byte * in,word32 sz)3566 int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
3567 {
3568 byte* tmp;
3569 word32 numBlocks;
3570
3571 if (aes == NULL || out == NULL || in == NULL) {
3572 return BAD_FUNC_ARG;
3573 }
3574
3575 tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
3576
3577 /* consume any unused bytes left in aes->tmp */
3578 while (aes->left && sz) {
3579 *(out++) = *(in++) ^ *(tmp++);
3580 aes->left--;
3581 sz--;
3582 }
3583
3584 /* do as many block size ops as possible */
3585 numBlocks = sz/AES_BLOCK_SIZE;
3586 if (numBlocks > 0) {
3587 /* pointer needed because it is incremented when read, causing
3588 * an issue with call to encrypt/decrypt leftovers */
3589 word32* keyPt = aes->key;
3590 word32* regPt = aes->reg;
3591 sz -= numBlocks * AES_BLOCK_SIZE;
3592 switch(aes->rounds) {
3593 #ifdef WOLFSSL_AES_128
3594 case 10: /* AES 128 BLOCK */
3595 __asm__ __volatile__ (
3596 "MOV r11, %[blocks] \n"
3597 "VLDM %[Key]!, {q1-q4} \n"
3598
3599 "#Create vector with the value 1 \n"
3600 "VMOV.u32 q15, #1 \n"
3601 "VSHR.u64 q15, q15, #32 \n"
3602 "VLDM %[Key]!, {q5-q8} \n"
3603 "VEOR.32 q14, q14, q14 \n"
3604 "VLDM %[Key]!, {q9-q11} \n"
3605 "VEXT.8 q14, q15, q14, #8\n"
3606
3607 "VLD1.32 {q13}, [%[reg]]\n"
3608
3609 /* double block */
3610 "1: \n"
3611 "CMP r11, #1 \n"
3612 "BEQ 2f \n"
3613 "CMP r11, #0 \n"
3614 "BEQ 3f \n"
3615
3616 "VMOV.32 q0, q13 \n"
3617 "AESE.8 q0, q1\n"
3618 "AESMC.8 q0, q0\n"
3619 "VREV64.8 q13, q13 \n" /* network order */
3620 "AESE.8 q0, q2\n"
3621 "AESMC.8 q0, q0\n"
3622 "VEXT.8 q13, q13, q13, #8 \n"
3623 "SUB r11, r11, #2 \n"
3624 "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */
3625 "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */
3626 "AESE.8 q0, q3\n"
3627 "AESMC.8 q0, q0\n"
3628 "VEXT.8 q15, q15, q15, #8 \n"
3629 "VEXT.8 q13, q13, q13, #8 \n"
3630 "AESE.8 q0, q4\n"
3631 "AESMC.8 q0, q0\n"
3632 "VREV64.8 q15, q15\n" /* revert from network order */
3633 "VREV64.8 q13, q13\n" /* revert from network order */
3634 "AESE.8 q0, q5\n"
3635 "AESMC.8 q0, q0\n"
3636 "AESE.8 q15, q1\n"
3637 "AESMC.8 q15, q15\n"
3638
3639 "AESE.8 q0, q6\n"
3640 "AESMC.8 q0, q0\n"
3641 "AESE.8 q15, q2\n"
3642 "AESMC.8 q15, q15\n"
3643
3644 "AESE.8 q0, q7\n"
3645 "AESMC.8 q0, q0\n"
3646 "AESE.8 q15, q3\n"
3647 "AESMC.8 q15, q15\n"
3648
3649 "AESE.8 q0, q8\n"
3650 "AESMC.8 q0, q0\n"
3651 "AESE.8 q15, q4\n"
3652 "AESMC.8 q15, q15\n"
3653
3654 "AESE.8 q0, q9\n"
3655 "AESMC.8 q0, q0\n"
3656 "AESE.8 q15, q5\n"
3657 "AESMC.8 q15, q15\n"
3658
3659 "AESE.8 q0, q10\n"
3660 "AESE.8 q15, q6\n"
3661 "AESMC.8 q15, q15\n"
3662 "VEOR.32 q0, q0, q11\n"
3663
3664 "AESE.8 q15, q7\n"
3665 "AESMC.8 q15, q15\n"
3666 "VLD1.32 {q12}, [%[input]]! \n"
3667 "AESE.8 q15, q8\n"
3668 "AESMC.8 q15, q15\n"
3669
3670 "VEOR.32 q0, q0, q12\n"
3671 "AESE.8 q15, q9\n"
3672 "AESMC.8 q15, q15\n"
3673
3674 "VLD1.32 {q12}, [%[input]]! \n"
3675 "AESE.8 q15, q10\n"
3676 "VST1.32 {q0}, [%[out]]! \n"
3677 "VEOR.32 q15, q15, q11\n"
3678 "VEOR.32 q15, q15, q12\n"
3679 "VST1.32 {q15}, [%[out]]! \n"
3680
3681 "B 1b \n"
3682
3683 /* single block */
3684 "2: \n"
3685 "VMOV.32 q0, q13 \n"
3686 "AESE.8 q0, q1\n"
3687 "AESMC.8 q0, q0\n"
3688 "VREV64.8 q13, q13 \n" /* network order */
3689 "AESE.8 q0, q2\n"
3690 "AESMC.8 q0, q0\n"
3691 "VEXT.8 q13, q13, q13, #8 \n"
3692 "AESE.8 q0, q3\n"
3693 "AESMC.8 q0, q0\n"
3694 "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */
3695 "AESE.8 q0, q4\n"
3696 "AESMC.8 q0, q0\n"
3697 "SUB r11, r11, #1 \n"
3698 "AESE.8 q0, q5\n"
3699 "AESMC.8 q0, q0\n"
3700 "VEXT.8 q13, q13, q13, #8 \n"
3701 "AESE.8 q0, q6\n"
3702 "AESMC.8 q0, q0\n"
3703 "VREV64.8 q13, q13\n" /* revert from network order */
3704 "AESE.8 q0, q7\n"
3705 "AESMC.8 q0, q0\n"
3706 "AESE.8 q0, q8\n"
3707 "AESMC.8 q0, q0\n"
3708 "AESE.8 q0, q9\n"
3709 "AESMC.8 q0, q0\n"
3710 "AESE.8 q0, q10\n"
3711 "VLD1.32 {q12}, [%[input]]! \n"
3712 "VEOR.32 q0, q0, q11\n"
3713 "#CTR operations, increment counter and xorbuf \n"
3714 "VEOR.32 q0, q0, q12\n"
3715 "VST1.32 {q0}, [%[out]]! \n"
3716
3717 "3: \n"
3718 "#store current counter qalue at the end \n"
3719 "VST1.32 {q13}, [%[regOut]] \n"
3720
3721 :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt),
3722 "=r" (in)
3723 :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
3724 [blocks] "r" (numBlocks), [reg] "2" (regPt)
3725 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3726 "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14", "q15"
3727 );
3728 break;
3729 #endif /* WOLFSSL_AES_128 */
3730 #ifdef WOLFSSL_AES_192
3731 case 12: /* AES 192 BLOCK */
3732 __asm__ __volatile__ (
3733 "MOV r11, %[blocks] \n"
3734 "VLDM %[Key]!, {q1-q4} \n"
3735
3736 "#Create vector with the value 1 \n"
3737 "VMOV.u32 q15, #1 \n"
3738 "VSHR.u64 q15, q15, #32 \n"
3739 "VLDM %[Key]!, {q5-q8} \n"
3740 "VEOR.32 q14, q14, q14 \n"
3741 "VEXT.8 q14, q15, q14, #8\n"
3742
3743 "VLDM %[Key]!, {q9-q10} \n"
3744 "VLD1.32 {q13}, [%[reg]]\n"
3745
3746 /* double block */
3747 "1: \n"
3748 "CMP r11, #1 \n"
3749 "BEQ 2f \n"
3750 "CMP r11, #0 \n"
3751 "BEQ 3f \n"
3752
3753 "VMOV.32 q0, q13\n"
3754 "AESE.8 q0, q1\n"
3755 "AESMC.8 q0, q0\n"
3756 "VREV64.8 q13, q13 \n" /* network order */
3757 "AESE.8 q0, q2\n"
3758 "AESMC.8 q0, q0\n"
3759 "VEXT.8 q13, q13, q13, #8 \n"
3760 "SUB r11, r11, #2 \n"
3761 "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */
3762 "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */
3763 "AESE.8 q0, q3\n"
3764 "AESMC.8 q0, q0\n"
3765 "VEXT.8 q15, q15, q15, #8 \n"
3766 "VEXT.8 q13, q13, q13, #8 \n"
3767 "AESE.8 q0, q4\n"
3768 "AESMC.8 q0, q0\n"
3769 "VREV64.8 q15, q15\n" /* revert from network order */
3770 "VREV64.8 q13, q13\n" /* revert from network order */
3771 "AESE.8 q0, q5\n"
3772 "AESMC.8 q0, q0\n"
3773 "AESE.8 q15, q1\n"
3774 "AESMC.8 q15, q15\n"
3775
3776 "AESE.8 q0, q6\n"
3777 "AESMC.8 q0, q0\n"
3778 "AESE.8 q15, q2\n"
3779 "AESMC.8 q15, q15\n"
3780
3781 "AESE.8 q0, q7\n"
3782 "AESMC.8 q0, q0\n"
3783 "AESE.8 q15, q3\n"
3784 "AESMC.8 q15, q15\n"
3785
3786 "AESE.8 q0, q8\n"
3787 "AESMC.8 q0, q0\n"
3788 "AESE.8 q15, q4\n"
3789 "AESMC.8 q15, q15\n"
3790
3791 "AESE.8 q0, q9\n"
3792 "AESMC.8 q0, q0\n"
3793 "AESE.8 q15, q5\n"
3794 "AESMC.8 q15, q15\n"
3795
3796 "AESE.8 q0, q10\n"
3797 "AESMC.8 q0, q0\n"
3798 "VLD1.32 {q11}, [%[Key]]! \n"
3799 "AESE.8 q15, q6\n"
3800 "AESMC.8 q15, q15\n"
3801
3802 "AESE.8 q0, q11\n"
3803 "AESMC.8 q0, q0\n"
3804 "AESE.8 q15, q7\n"
3805 "AESMC.8 q15, q15\n"
3806
3807 "AESE.8 q15, q8\n"
3808 "AESMC.8 q15, q15\n"
3809
3810 "VLD1.32 {q12}, [%[Key]]! \n"
3811 "AESE.8 q15, q9\n"
3812 "AESMC.8 q15, q15\n"
3813 "AESE.8 q15, q10\n"
3814 "AESMC.8 q15, q15\n"
3815
3816 "AESE.8 q15, q11\n"
3817 "AESMC.8 q15, q15\n"
3818 "VLD1.32 {q11}, [%[Key]] \n"
3819 "AESE.8 q0, q12\n"
3820 "AESE.8 q15, q12\n"
3821
3822 "VLD1.32 {q12}, [%[input]]! \n"
3823 "VEOR.32 q0, q0, q11\n"
3824 "VEOR.32 q15, q15, q11\n"
3825 "VEOR.32 q0, q0, q12\n"
3826
3827 "VLD1.32 {q12}, [%[input]]! \n"
3828 "VST1.32 {q0}, [%[out]]! \n"
3829 "VEOR.32 q15, q15, q12\n"
3830 "VST1.32 {q15}, [%[out]]! \n"
3831 "SUB %[Key], %[Key], #32 \n"
3832
3833 "B 1b \n"
3834
3835
3836 /* single block */
3837 "2: \n"
3838 "VLD1.32 {q11}, [%[Key]]! \n"
3839 "VMOV.32 q0, q13 \n"
3840 "AESE.8 q0, q1\n"
3841 "AESMC.8 q0, q0\n"
3842 "VREV64.8 q13, q13 \n" /* network order */
3843 "AESE.8 q0, q2\n"
3844 "AESMC.8 q0, q0\n"
3845 "VEXT.8 q13, q13, q13, #8 \n"
3846 "AESE.8 q0, q3\n"
3847 "AESMC.8 q0, q0\n"
3848 "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */
3849 "AESE.8 q0, q4\n"
3850 "AESMC.8 q0, q0\n"
3851 "SUB r11, r11, #1 \n"
3852 "AESE.8 q0, q5\n"
3853 "AESMC.8 q0, q0\n"
3854 "VEXT.8 q13, q13, q13, #8 \n"
3855 "AESE.8 q0, q6\n"
3856 "AESMC.8 q0, q0\n"
3857 "VREV64.8 q13, q13\n" /* revert from network order */
3858 "AESE.8 q0, q7\n"
3859 "AESMC.8 q0, q0\n"
3860 "AESE.8 q0, q8\n"
3861 "AESMC.8 q0, q0\n"
3862 "AESE.8 q0, q9\n"
3863 "AESMC.8 q0, q0\n"
3864 "AESE.8 q0, q10\n"
3865 "AESMC.8 q0, q0\n"
3866 "VLD1.32 {q12}, [%[Key]]! \n"
3867 "AESE.8 q0, q11\n"
3868 "AESMC.8 q0, q0\n"
3869 "VLD1.32 {q11}, [%[Key]] \n"
3870 "AESE.8 q0, q12\n"
3871 "VLD1.32 {q12}, [%[input]]! \n"
3872 "VEOR.32 q0, q0, q11\n"
3873 "#CTR operations, increment counter and xorbuf \n"
3874 "VEOR.32 q0, q0, q12\n"
3875 "VST1.32 {q0}, [%[out]]! \n"
3876
3877 "3: \n"
3878 "#store current counter qalue at the end \n"
3879 "VST1.32 {q13}, [%[regOut]] \n"
3880
3881 :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt),
3882 "=r" (in)
3883 :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
3884 [blocks] "r" (numBlocks), [reg] "2" (regPt)
3885 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3886 "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14"
3887 );
3888 break;
3889 #endif /* WOLFSSL_AES_192 */
3890 #ifdef WOLFSSL_AES_256
3891 case 14: /* AES 256 BLOCK */
3892 __asm__ __volatile__ (
3893 "MOV r11, %[blocks] \n"
3894 "VLDM %[Key]!, {q1-q4} \n"
3895
3896 "#Create vector with the value 1 \n"
3897 "VMOV.u32 q15, #1 \n"
3898 "VSHR.u64 q15, q15, #32 \n"
3899 "VLDM %[Key]!, {q5-q8} \n"
3900 "VEOR.32 q14, q14, q14 \n"
3901 "VEXT.8 q14, q15, q14, #8\n"
3902
3903 "VLDM %[Key]!, {q9-q10} \n"
3904 "VLD1.32 {q13}, [%[reg]]\n"
3905
3906 /* double block */
3907 "1: \n"
3908 "CMP r11, #1 \n"
3909 "BEQ 2f \n"
3910 "CMP r11, #0 \n"
3911 "BEQ 3f \n"
3912
3913 "VMOV.32 q0, q13 \n"
3914 "AESE.8 q0, q1\n"
3915 "AESMC.8 q0, q0\n"
3916 "VREV64.8 q13, q13 \n" /* network order */
3917 "AESE.8 q0, q2\n"
3918 "AESMC.8 q0, q0\n"
3919 "VEXT.8 q13, q13, q13, #8 \n"
3920 "SUB r11, r11, #2 \n"
3921 "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */
3922 "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */
3923 "AESE.8 q0, q3\n"
3924 "AESMC.8 q0, q0\n"
3925 "VEXT.8 q15, q15, q15, #8 \n"
3926 "VEXT.8 q13, q13, q13, #8 \n"
3927 "AESE.8 q0, q4\n"
3928 "AESMC.8 q0, q0\n"
3929 "VREV64.8 q15, q15\n" /* revert from network order */
3930 "AESE.8 q0, q5\n"
3931 "AESMC.8 q0, q0\n"
3932 "VREV64.8 q13, q13\n" /* revert from network order */
3933 "AESE.8 q15, q1\n"
3934 "AESMC.8 q15, q15\n"
3935
3936 "AESE.8 q0, q6\n"
3937 "AESMC.8 q0, q0\n"
3938 "AESE.8 q15, q2\n"
3939 "AESMC.8 q15, q15\n"
3940
3941 "AESE.8 q0, q7\n"
3942 "AESMC.8 q0, q0\n"
3943 "AESE.8 q15, q3\n"
3944 "AESMC.8 q15, q15\n"
3945
3946 "AESE.8 q0, q8\n"
3947 "AESMC.8 q0, q0\n"
3948 "AESE.8 q15, q4\n"
3949 "AESMC.8 q15, q15\n"
3950
3951 "AESE.8 q0, q9\n"
3952 "AESMC.8 q0, q0\n"
3953 "AESE.8 q15, q5\n"
3954 "AESMC.8 q15, q15\n"
3955
3956 "AESE.8 q0, q10\n"
3957 "AESMC.8 q0, q0\n"
3958 "VLD1.32 {q11}, [%[Key]]! \n"
3959 "AESE.8 q15, q6\n"
3960 "AESMC.8 q15, q15\n"
3961
3962 "AESE.8 q0, q11\n"
3963 "AESMC.8 q0, q0\n"
3964 "AESE.8 q15, q7\n"
3965 "AESMC.8 q15, q15\n"
3966
3967 "AESE.8 q15, q8\n"
3968 "AESMC.8 q15, q15\n"
3969
3970 "AESE.8 q15, q9\n"
3971 "AESMC.8 q15, q15\n"
3972 "VLD1.32 {q12}, [%[Key]]! \n"
3973 "AESE.8 q15, q10\n"
3974 "AESMC.8 q15, q15\n"
3975
3976 "AESE.8 q15, q11\n"
3977 "AESMC.8 q15, q15\n"
3978
3979 "VLD1.32 {q11}, [%[Key]]! \n"
3980 "AESE.8 q0, q12\n" /* rnd 12*/
3981 "AESMC.8 q0, q0\n"
3982 "AESE.8 q15, q12\n" /* rnd 12 */
3983 "AESMC.8 q15, q15\n"
3984
3985 "VLD1.32 {q12}, [%[Key]]! \n"
3986 "AESE.8 q0, q11\n" /* rnd 13 */
3987 "AESMC.8 q0, q0\n"
3988 "AESE.8 q15, q11\n" /* rnd 13 */
3989 "AESMC.8 q15, q15\n"
3990
3991 "VLD1.32 {q11}, [%[Key]] \n"
3992 "AESE.8 q0, q12\n" /* rnd 14 */
3993 "AESE.8 q15, q12\n" /* rnd 14 */
3994
3995 "VLD1.32 {q12}, [%[input]]! \n"
3996 "VEOR.32 q0, q0, q11\n" /* rnd 15 */
3997 "VEOR.32 q15, q15, q11\n" /* rnd 15 */
3998 "VEOR.32 q0, q0, q12\n"
3999
4000 "VLD1.32 {q12}, [%[input]]! \n"
4001 "VST1.32 {q0}, [%[out]]! \n"
4002 "VEOR.32 q15, q15, q12\n"
4003 "VST1.32 {q15}, [%[out]]! \n"
4004 "SUB %[Key], %[Key], #64 \n"
4005
4006 /* single block */
4007 "B 1b \n"
4008
4009 "2: \n"
4010 "VLD1.32 {q11}, [%[Key]]! \n"
4011 "VMOV.32 q0, q13 \n"
4012 "AESE.8 q0, q1\n"
4013 "AESMC.8 q0, q0\n"
4014 "VREV64.8 q13, q13 \n" /* network order */
4015 "AESE.8 q0, q2\n"
4016 "AESMC.8 q0, q0\n"
4017 "VEXT.8 q13, q13, q13, #8 \n"
4018 "AESE.8 q0, q3\n"
4019 "AESMC.8 q0, q0\n"
4020 "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */
4021 "AESE.8 q0, q4\n"
4022 "AESMC.8 q0, q0\n"
4023 "AESE.8 q0, q5\n"
4024 "AESMC.8 q0, q0\n"
4025 "VEXT.8 q13, q13, q13, #8 \n"
4026 "AESE.8 q0, q6\n"
4027 "AESMC.8 q0, q0\n"
4028 "VREV64.8 q13, q13\n" /* revert from network order */
4029 "AESE.8 q0, q7\n"
4030 "AESMC.8 q0, q0\n"
4031 "AESE.8 q0, q8\n"
4032 "AESMC.8 q0, q0\n"
4033 "AESE.8 q0, q9\n"
4034 "AESMC.8 q0, q0\n"
4035 "AESE.8 q0, q10\n"
4036 "AESMC.8 q0, q0\n"
4037 "VLD1.32 {q12}, [%[Key]]! \n"
4038 "AESE.8 q0, q11\n"
4039 "AESMC.8 q0, q0\n"
4040 "VLD1.32 {q11}, [%[Key]]! \n"
4041 "AESE.8 q0, q12\n" /* rnd 12 */
4042 "AESMC.8 q0, q0\n"
4043 "VLD1.32 {q12}, [%[Key]]! \n"
4044 "AESE.8 q0, q11\n" /* rnd 13 */
4045 "AESMC.8 q0, q0\n"
4046 "VLD1.32 {q11}, [%[Key]] \n"
4047 "AESE.8 q0, q12\n" /* rnd 14 */
4048 "VLD1.32 {q12}, [%[input]]! \n"
4049 "VEOR.32 q0, q0, q11\n" /* rnd 15 */
4050 "#CTR operations, increment counter and xorbuf \n"
4051 "VEOR.32 q0, q0, q12\n"
4052 "VST1.32 {q0}, [%[out]]! \n"
4053
4054 "3: \n"
4055 "#store current counter qalue at the end \n"
4056 "VST1.32 {q13}, [%[regOut]] \n"
4057
4058 :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt),
4059 "=r" (in)
4060 :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
4061 [blocks] "r" (numBlocks), [reg] "2" (regPt)
4062 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
4063 "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14"
4064 );
4065 break;
4066 #endif /* WOLFSSL_AES_256 */
4067 default:
4068 WOLFSSL_MSG("Bad AES-CTR round qalue");
4069 return BAD_FUNC_ARG;
4070 }
4071
4072 aes->left = 0;
4073 }
4074
4075 /* handle non block size remaining */
4076 if (sz) {
4077 wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp);
4078 IncrementAesCounter((byte*)aes->reg);
4079
4080 aes->left = AES_BLOCK_SIZE;
4081 tmp = (byte*)aes->tmp;
4082
4083 while (sz--) {
4084 *(out++) = *(in++) ^ *(tmp++);
4085 aes->left--;
4086 }
4087 }
4088
4089 return 0;
4090 }
4091
4092 #endif /* WOLFSSL_AES_COUNTER */
4093
4094 #ifdef HAVE_AESGCM
4095 /*
4096 * Uses Karatsuba algorithm. Reduction algorithm is based on "Implementing GCM
4097 * on ARMv8". Shifting left to account for bit reflection is based on
4098 * "Carry-Less Multiplication and Its Usage for Computing the GCM mode"
4099 */
GMULT(byte * X,byte * Y)4100 static void GMULT(byte* X, byte* Y)
4101 {
4102 __asm__ __volatile__ (
4103 "VLD1.32 {q0}, [%[x]] \n"
4104
4105 /* In GCM format bits are big endian, switch location of bytes to
4106 * allow for logical shifts and carries.
4107 */
4108 "VREV64.8 q0, q0 \n"
4109 "VLD1.32 {q1}, [%[y]] \n" /* converted on set key */
4110 "VSWP.8 d0, d1 \n"
4111
4112 "VMULL.p64 q5, d0, d2 \n"
4113 "VMULL.p64 q6, d1, d3 \n"
4114 "VEOR d15, d2, d3 \n"
4115 "VEOR d14, d0, d1 \n"
4116 "VMULL.p64 q7, d15, d14 \n"
4117 "VEOR q7, q5 \n"
4118 "VEOR q7, q6 \n"
4119 "VEOR d11, d14 \n"
4120 "VEOR d12, d15\n"
4121
4122 /* shift to left by 1 to account for reflection */
4123 "VMOV q7, q6 \n"
4124 "VSHL.u64 q6, q6, #1 \n"
4125 "VSHR.u64 q7, q7, #63 \n"
4126 "VEOR d13, d14 \n"
4127 "VMOV q8, q5 \n"
4128 "VSHL.u64 q5, q5, #1 \n"
4129 "VSHR.u64 q8, q8, #63 \n"
4130 "VEOR d12, d17 \n"
4131 "VEOR d11, d16 \n"
4132
4133 /* create constant 0xc200000000000000 */
4134 "VMOV.i32 d16, 0xc2000000 \n"
4135 "VSHL.u64 d16, d16, #32 \n"
4136
4137 /* reduce product of multiplication */
4138 "VMULL.p64 q9, d10, d16 \n"
4139 "VEOR d11, d18 \n"
4140 "VEOR d12, d19 \n"
4141 "VMULL.p64 q9, d11, d16 \n"
4142 "VEOR q6, q9 \n"
4143 "VEOR q10, q5, q6 \n"
4144
4145 /* convert to GCM format */
4146 "VREV64.8 q10, q10 \n"
4147 "VSWP.8 d20, d21 \n"
4148
4149 "VST1.32 {q10}, [%[xOut]] \n"
4150
4151 : [xOut] "=r" (X), [yOut] "=r" (Y)
4152 : [x] "0" (X), [y] "1" (Y)
4153 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6" ,"q7", "q8",
4154 "q9", "q10", "q11" ,"q12", "q13", "q14", "q15"
4155 );
4156 }
4157
4158
GHASH(Aes * aes,const byte * a,word32 aSz,const byte * c,word32 cSz,byte * s,word32 sSz)4159 void GHASH(Aes* aes, const byte* a, word32 aSz,
4160 const byte* c, word32 cSz, byte* s, word32 sSz)
4161 {
4162 byte x[AES_BLOCK_SIZE];
4163 byte scratch[AES_BLOCK_SIZE];
4164 word32 blocks, partial;
4165 byte* h = aes->H;
4166
4167 XMEMSET(x, 0, AES_BLOCK_SIZE);
4168
4169 /* Hash in A, the Additional Authentication Data */
4170 if (aSz != 0 && a != NULL) {
4171 blocks = aSz / AES_BLOCK_SIZE;
4172 partial = aSz % AES_BLOCK_SIZE;
4173 while (blocks--) {
4174 xorbuf(x, a, AES_BLOCK_SIZE);
4175 GMULT(x, h);
4176 a += AES_BLOCK_SIZE;
4177 }
4178 if (partial != 0) {
4179 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
4180 XMEMCPY(scratch, a, partial);
4181 xorbuf(x, scratch, AES_BLOCK_SIZE);
4182 GMULT(x, h);
4183 }
4184 }
4185
4186 /* Hash in C, the Ciphertext */
4187 if (cSz != 0 && c != NULL) {
4188 blocks = cSz / AES_BLOCK_SIZE;
4189 partial = cSz % AES_BLOCK_SIZE;
4190 while (blocks--) {
4191 xorbuf(x, c, AES_BLOCK_SIZE);
4192 GMULT(x, h);
4193 c += AES_BLOCK_SIZE;
4194 }
4195 if (partial != 0) {
4196 XMEMSET(scratch, 0, AES_BLOCK_SIZE);
4197 XMEMCPY(scratch, c, partial);
4198 xorbuf(x, scratch, AES_BLOCK_SIZE);
4199 GMULT(x, h);
4200 }
4201 }
4202
4203 /* Hash in the lengths of A and C in bits */
4204 FlattenSzInBits(&scratch[0], aSz);
4205 FlattenSzInBits(&scratch[8], cSz);
4206 xorbuf(x, scratch, AES_BLOCK_SIZE);
4207 GMULT(x, h);
4208
4209 /* Copy the result into s. */
4210 XMEMCPY(s, x, sSz);
4211 }
4212
4213
4214 /* Aarch32
4215 * Encrypt and tag data using AES with GCM mode.
4216 * aes: Aes structure having already been set with set key function
4217 * out: encrypted data output buffer
4218 * in: plain text input buffer
4219 * sz: size of plain text and out buffer
4220 * iv: initialization vector
4221 * ivSz: size of iv buffer
4222 * authTag: buffer to hold tag
4223 * authTagSz: size of tag buffer
4224 * authIn: additional data buffer
4225 * authInSz: size of additional data buffer
4226 */
wc_AesGcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)4227 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
4228 const byte* iv, word32 ivSz,
4229 byte* authTag, word32 authTagSz,
4230 const byte* authIn, word32 authInSz)
4231 {
4232 word32 blocks = sz / AES_BLOCK_SIZE;
4233 word32 partial = sz % AES_BLOCK_SIZE;
4234 const byte* p = in;
4235 byte* c = out;
4236 byte counter[AES_BLOCK_SIZE];
4237 byte initialCounter[AES_BLOCK_SIZE];
4238 byte *ctr ;
4239 byte scratch[AES_BLOCK_SIZE];
4240 ctr = counter ;
4241
4242 /* sanity checks */
4243 if (aes == NULL || (iv == NULL && ivSz > 0) ||
4244 (authTag == NULL) ||
4245 (authIn == NULL && authInSz > 0) ||
4246 (ivSz == 0)) {
4247 WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
4248 return BAD_FUNC_ARG;
4249 }
4250
4251 if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || authTagSz > AES_BLOCK_SIZE) {
4252 WOLFSSL_MSG("GcmEncrypt authTagSz error");
4253 return BAD_FUNC_ARG;
4254 }
4255
4256 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
4257 if (ivSz == GCM_NONCE_MID_SZ) {
4258 XMEMCPY(initialCounter, iv, ivSz);
4259 initialCounter[AES_BLOCK_SIZE - 1] = 1;
4260 }
4261 else {
4262 GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
4263 }
4264 XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
4265
4266 while (blocks--) {
4267 IncrementGcmCounter(ctr);
4268 wc_AesEncrypt(aes, ctr, scratch);
4269 xorbuf(scratch, p, AES_BLOCK_SIZE);
4270 XMEMCPY(c, scratch, AES_BLOCK_SIZE);
4271 p += AES_BLOCK_SIZE;
4272 c += AES_BLOCK_SIZE;
4273 }
4274
4275 if (partial != 0) {
4276 IncrementGcmCounter(ctr);
4277 wc_AesEncrypt(aes, ctr, scratch);
4278 xorbuf(scratch, p, partial);
4279 XMEMCPY(c, scratch, partial);
4280
4281 }
4282
4283 GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
4284 wc_AesEncrypt(aes, initialCounter, scratch);
4285 if (authTagSz > AES_BLOCK_SIZE) {
4286 xorbuf(authTag, scratch, AES_BLOCK_SIZE);
4287 }
4288 else {
4289 xorbuf(authTag, scratch, authTagSz);
4290 }
4291
4292 return 0;
4293 }
4294
4295
4296 #ifdef HAVE_AES_DECRYPT
4297 /*
4298 * Check tag and decrypt data using AES with GCM mode.
4299 * aes: Aes structure having already been set with set key function
4300 * out: decrypted data output buffer
4301 * in: cipher text buffer
4302 * sz: size of plain text and out buffer
4303 * iv: initialization vector
4304 * ivSz: size of iv buffer
4305 * authTag: buffer holding tag
4306 * authTagSz: size of tag buffer
4307 * authIn: additional data buffer
4308 * authInSz: size of additional data buffer
4309 */
wc_AesGcmDecrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,const byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)4310 int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
4311 const byte* iv, word32 ivSz,
4312 const byte* authTag, word32 authTagSz,
4313 const byte* authIn, word32 authInSz)
4314 {
4315 word32 blocks = sz / AES_BLOCK_SIZE;
4316 word32 partial = sz % AES_BLOCK_SIZE;
4317 const byte* c = in;
4318 byte* p = out;
4319 byte counter[AES_BLOCK_SIZE];
4320 byte initialCounter[AES_BLOCK_SIZE];
4321 byte *ctr ;
4322 byte scratch[AES_BLOCK_SIZE];
4323 ctr = counter ;
4324
4325 /* sanity checks */
4326 if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
4327 authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 ||
4328 ivSz == 0) {
4329 WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
4330 return BAD_FUNC_ARG;
4331 }
4332
4333 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
4334 if (ivSz == GCM_NONCE_MID_SZ) {
4335 XMEMCPY(initialCounter, iv, ivSz);
4336 initialCounter[AES_BLOCK_SIZE - 1] = 1;
4337 }
4338 else {
4339 GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
4340 }
4341 XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
4342
4343 /* Calculate the authTag again using the received auth data and the
4344 * cipher text. */
4345 {
4346 byte Tprime[AES_BLOCK_SIZE];
4347 byte EKY0[AES_BLOCK_SIZE];
4348
4349 GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime));
4350 wc_AesEncrypt(aes, ctr, EKY0);
4351 xorbuf(Tprime, EKY0, sizeof(Tprime));
4352
4353 if (ConstantCompare(authTag, Tprime, authTagSz) != 0) {
4354 return AES_GCM_AUTH_E;
4355 }
4356 }
4357
4358 while (blocks--) {
4359 IncrementGcmCounter(ctr);
4360 wc_AesEncrypt(aes, ctr, scratch);
4361 xorbuf(scratch, c, AES_BLOCK_SIZE);
4362 XMEMCPY(p, scratch, AES_BLOCK_SIZE);
4363 p += AES_BLOCK_SIZE;
4364 c += AES_BLOCK_SIZE;
4365 }
4366 if (partial != 0) {
4367 IncrementGcmCounter(ctr);
4368 wc_AesEncrypt(aes, ctr, scratch);
4369
4370 /* check if pointer is null after main AES-GCM blocks
4371 * helps static analysis */
4372 if (p == NULL || c == NULL) {
4373 return BAD_STATE_E;
4374 }
4375 xorbuf(scratch, c, partial);
4376 XMEMCPY(p, scratch, partial);
4377 }
4378 return 0;
4379 }
4380 #endif /* HAVE_AES_DECRYPT */
4381 #endif /* HAVE_AESGCM */
4382
4383 #endif /* aarch64 */
4384
4385 #ifdef HAVE_AESGCM
4386 #ifdef WOLFSSL_AESGCM_STREAM
4387 /* Access initialization counter data. */
4388 #define AES_INITCTR(aes) ((aes)->streamData + 0 * AES_BLOCK_SIZE)
4389 /* Access counter data. */
4390 #define AES_COUNTER(aes) ((aes)->streamData + 1 * AES_BLOCK_SIZE)
4391 /* Access tag data. */
4392 #define AES_TAG(aes) ((aes)->streamData + 2 * AES_BLOCK_SIZE)
4393 /* Access last GHASH block. */
4394 #define AES_LASTGBLOCK(aes) ((aes)->streamData + 3 * AES_BLOCK_SIZE)
4395 /* Access last encrypted block. */
4396 #define AES_LASTBLOCK(aes) ((aes)->streamData + 4 * AES_BLOCK_SIZE)
4397
4398 /* GHASH one block of data.
4399 *
4400 * XOR block into tag and GMULT with H.
4401 *
4402 * @param [in, out] aes AES GCM object.
4403 * @param [in] block Block of AAD or cipher text.
4404 */
4405 #define GHASH_ONE_BLOCK(aes, block) \
4406 do { \
4407 xorbuf(AES_TAG(aes), block, AES_BLOCK_SIZE); \
4408 GMULT(AES_TAG(aes), aes->H); \
4409 } \
4410 while (0)
4411
4412 /* Hash in the lengths of the AAD and cipher text in bits.
4413 *
4414 * Default implementation.
4415 *
4416 * @param [in, out] aes AES GCM object.
4417 */
4418 #define GHASH_LEN_BLOCK(aes) \
4419 do { \
4420 byte scratch[AES_BLOCK_SIZE]; \
4421 FlattenSzInBits(&scratch[0], aes->aSz); \
4422 FlattenSzInBits(&scratch[8], aes->cSz); \
4423 GHASH_ONE_BLOCK(aes, scratch); \
4424 } \
4425 while (0)
4426
IncCtr(byte * ctr,word32 ctrSz)4427 static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
4428 {
4429 int i;
4430 for (i = ctrSz-1; i >= 0; i--) {
4431 if (++ctr[i])
4432 break;
4433 }
4434 }
4435
4436 /* Initialize a GHASH for streaming operations.
4437 *
4438 * @param [in, out] aes AES GCM object.
4439 */
GHASH_INIT(Aes * aes)4440 static void GHASH_INIT(Aes* aes) {
4441 /* Set tag to all zeros as initial value. */
4442 XMEMSET(AES_TAG(aes), 0, AES_BLOCK_SIZE);
4443 /* Reset counts of AAD and cipher text. */
4444 aes->aOver = 0;
4445 aes->cOver = 0;
4446 }
4447
4448 /* Update the GHASH with AAD and/or cipher text.
4449 *
4450 * @param [in,out] aes AES GCM object.
4451 * @param [in] a Additional authentication data buffer.
4452 * @param [in] aSz Size of data in AAD buffer.
4453 * @param [in] c Cipher text buffer.
4454 * @param [in] cSz Size of data in cipher text buffer.
4455 */
GHASH_UPDATE(Aes * aes,const byte * a,word32 aSz,const byte * c,word32 cSz)4456 static void GHASH_UPDATE(Aes* aes, const byte* a, word32 aSz, const byte* c,
4457 word32 cSz)
4458 {
4459 word32 blocks;
4460 word32 partial;
4461
4462 /* Hash in A, the Additional Authentication Data */
4463 if (aSz != 0 && a != NULL) {
4464 /* Update count of AAD we have hashed. */
4465 aes->aSz += aSz;
4466 /* Check if we have unprocessed data. */
4467 if (aes->aOver > 0) {
4468 /* Calculate amount we can use - fill up the block. */
4469 byte sz = AES_BLOCK_SIZE - aes->aOver;
4470 if (sz > aSz) {
4471 sz = aSz;
4472 }
4473 /* Copy extra into last GHASH block array and update count. */
4474 XMEMCPY(AES_LASTGBLOCK(aes) + aes->aOver, a, sz);
4475 aes->aOver += sz;
4476 if (aes->aOver == AES_BLOCK_SIZE) {
4477 /* We have filled up the block and can process. */
4478 GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes));
4479 /* Reset count. */
4480 aes->aOver = 0;
4481 }
4482 /* Used up some data. */
4483 aSz -= sz;
4484 a += sz;
4485 }
4486
4487 /* Calculate number of blocks of AAD and the leftover. */
4488 blocks = aSz / AES_BLOCK_SIZE;
4489 partial = aSz % AES_BLOCK_SIZE;
4490 /* GHASH full blocks now. */
4491 while (blocks--) {
4492 GHASH_ONE_BLOCK(aes, a);
4493 a += AES_BLOCK_SIZE;
4494 }
4495 if (partial != 0) {
4496 /* Cache the partial block. */
4497 XMEMCPY(AES_LASTGBLOCK(aes), a, partial);
4498 aes->aOver = (byte)partial;
4499 }
4500 }
4501 if (aes->aOver > 0 && cSz > 0 && c != NULL) {
4502 /* No more AAD coming and we have a partial block. */
4503 /* Fill the rest of the block with zeros. */
4504 byte sz = AES_BLOCK_SIZE - aes->aOver;
4505 XMEMSET(AES_LASTGBLOCK(aes) + aes->aOver, 0, sz);
4506 /* GHASH last AAD block. */
4507 GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes));
4508 /* Clear partial count for next time through. */
4509 aes->aOver = 0;
4510 }
4511
4512 /* Hash in C, the Ciphertext */
4513 if (cSz != 0 && c != NULL) {
4514 /* Update count of cipher text we have hashed. */
4515 aes->cSz += cSz;
4516 if (aes->cOver > 0) {
4517 /* Calculate amount we can use - fill up the block. */
4518 byte sz = AES_BLOCK_SIZE - aes->cOver;
4519 if (sz > cSz) {
4520 sz = cSz;
4521 }
4522 XMEMCPY(AES_LASTGBLOCK(aes) + aes->cOver, c, sz);
4523 /* Update count of unsed encrypted counter. */
4524 aes->cOver += sz;
4525 if (aes->cOver == AES_BLOCK_SIZE) {
4526 /* We have filled up the block and can process. */
4527 GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes));
4528 /* Reset count. */
4529 aes->cOver = 0;
4530 }
4531 /* Used up some data. */
4532 cSz -= sz;
4533 c += sz;
4534 }
4535
4536 /* Calculate number of blocks of cipher text and the leftover. */
4537 blocks = cSz / AES_BLOCK_SIZE;
4538 partial = cSz % AES_BLOCK_SIZE;
4539 /* GHASH full blocks now. */
4540 while (blocks--) {
4541 GHASH_ONE_BLOCK(aes, c);
4542 c += AES_BLOCK_SIZE;
4543 }
4544 if (partial != 0) {
4545 /* Cache the partial block. */
4546 XMEMCPY(AES_LASTGBLOCK(aes), c, partial);
4547 aes->cOver = (byte)partial;
4548 }
4549 }
4550 }
4551
4552 /* Finalize the GHASH calculation.
4553 *
4554 * Complete hashing cipher text and hash the AAD and cipher text lengths.
4555 *
4556 * @param [in, out] aes AES GCM object.
4557 * @param [out] s Authentication tag.
4558 * @param [in] sSz Size of authentication tag required.
4559 */
GHASH_FINAL(Aes * aes,byte * s,word32 sSz)4560 static void GHASH_FINAL(Aes* aes, byte* s, word32 sSz)
4561 {
4562 /* AAD block incomplete when > 0 */
4563 byte over = aes->aOver;
4564
4565 if (aes->cOver > 0) {
4566 /* Cipher text block incomplete. */
4567 over = aes->cOver;
4568 }
4569 if (over > 0) {
4570 /* Zeroize the unused part of the block. */
4571 XMEMSET(AES_LASTGBLOCK(aes) + over, 0, AES_BLOCK_SIZE - over);
4572 /* Hash the last block of cipher text. */
4573 GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes));
4574 }
4575 /* Hash in the lengths of AAD and cipher text in bits */
4576 GHASH_LEN_BLOCK(aes);
4577 /* Copy the result into s. */
4578 XMEMCPY(s, AES_TAG(aes), sSz);
4579 }
4580
4581 /* Initialize the AES GCM cipher with an IV. C implementation.
4582 *
4583 * @param [in, out] aes AES object.
4584 * @param [in] iv IV/nonce buffer.
4585 * @param [in] ivSz Length of IV/nonce data.
4586 */
AesGcmInit_C(Aes * aes,const byte * iv,word32 ivSz)4587 static void AesGcmInit_C(Aes* aes, const byte* iv, word32 ivSz)
4588 {
4589 ALIGN32 byte counter[AES_BLOCK_SIZE];
4590
4591 if (ivSz == GCM_NONCE_MID_SZ) {
4592 /* Counter is IV with bottom 4 bytes set to: 0x00,0x00,0x00,0x01. */
4593 XMEMCPY(counter, iv, ivSz);
4594 XMEMSET(counter + GCM_NONCE_MID_SZ, 0,
4595 AES_BLOCK_SIZE - GCM_NONCE_MID_SZ - 1);
4596 counter[AES_BLOCK_SIZE - 1] = 1;
4597 }
4598 else {
4599 /* Counter is GHASH of IV. */
4600 #ifdef OPENSSL_EXTRA
4601 word32 aadTemp = aes->aadLen;
4602 aes->aadLen = 0;
4603 #endif
4604 GHASH(aes, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE);
4605 GMULT(counter, aes->H);
4606 #ifdef OPENSSL_EXTRA
4607 aes->aadLen = aadTemp;
4608 #endif
4609 }
4610
4611 /* Copy in the counter for use with cipher. */
4612 XMEMCPY(AES_COUNTER(aes), counter, AES_BLOCK_SIZE);
4613 /* Encrypt initial counter into a buffer for GCM. */
4614 wc_AesEncrypt(aes, counter, AES_INITCTR(aes));
4615 /* Reset state fields. */
4616 aes->over = 0;
4617 aes->aSz = 0;
4618 aes->cSz = 0;
4619 /* Initialization for GHASH. */
4620 GHASH_INIT(aes);
4621 }
4622
4623 /* Update the AES GCM cipher with data. C implementation.
4624 *
4625 * Only enciphers data.
4626 *
4627 * @param [in, out] aes AES object.
4628 * @param [in] out Cipher text or plaintext buffer.
4629 * @param [in] in Plaintext or cipher text buffer.
4630 * @param [in] sz Length of data.
4631 */
AesGcmCryptUpdate_C(Aes * aes,byte * out,const byte * in,word32 sz)4632 static void AesGcmCryptUpdate_C(Aes* aes, byte* out, const byte* in, word32 sz)
4633 {
4634 word32 blocks;
4635 word32 partial;
4636
4637 /* Check if previous encrypted block was not used up. */
4638 if (aes->over > 0) {
4639 byte pSz = AES_BLOCK_SIZE - aes->over;
4640 if (pSz > sz) pSz = sz;
4641
4642 /* Use some/all of last encrypted block. */
4643 xorbufout(out, AES_LASTBLOCK(aes) + aes->over, in, pSz);
4644 aes->over = (aes->over + pSz) & (AES_BLOCK_SIZE - 1);
4645
4646 /* Some data used. */
4647 sz -= pSz;
4648 in += pSz;
4649 out += pSz;
4650 }
4651
4652 /* Calculate the number of blocks needing to be encrypted and any leftover.
4653 */
4654 blocks = sz / AES_BLOCK_SIZE;
4655 partial = sz & (AES_BLOCK_SIZE - 1);
4656
4657 /* Encrypt block by block. */
4658 while (blocks--) {
4659 ALIGN32 byte scratch[AES_BLOCK_SIZE];
4660 IncrementGcmCounter(AES_COUNTER(aes));
4661 /* Encrypt counter into a buffer. */
4662 wc_AesEncrypt(aes, AES_COUNTER(aes), scratch);
4663 /* XOR plain text into encrypted counter into cipher text buffer. */
4664 xorbufout(out, scratch, in, AES_BLOCK_SIZE);
4665 /* Data complete. */
4666 in += AES_BLOCK_SIZE;
4667 out += AES_BLOCK_SIZE;
4668 }
4669
4670 if (partial != 0) {
4671 /* Generate an extra block and use up as much as needed. */
4672 IncrementGcmCounter(AES_COUNTER(aes));
4673 /* Encrypt counter into cache. */
4674 wc_AesEncrypt(aes, AES_COUNTER(aes), AES_LASTBLOCK(aes));
4675 /* XOR plain text into encrypted counter into cipher text buffer. */
4676 xorbufout(out, AES_LASTBLOCK(aes), in, partial);
4677 /* Keep amount of encrypted block used. */
4678 aes->over = partial;
4679 }
4680 }
4681
4682 /* Calculates authentication tag for AES GCM. C implementation.
4683 *
4684 * @param [in, out] aes AES object.
4685 * @param [out] authTag Buffer to store authentication tag in.
4686 * @param [in] authTagSz Length of tag to create.
4687 */
AesGcmFinal_C(Aes * aes,byte * authTag,word32 authTagSz)4688 static void AesGcmFinal_C(Aes* aes, byte* authTag, word32 authTagSz)
4689 {
4690 /* Calculate authentication tag. */
4691 GHASH_FINAL(aes, authTag, authTagSz);
4692 /* XOR in as much of encrypted counter as is required. */
4693 xorbuf(authTag, AES_INITCTR(aes), authTagSz);
4694 #ifdef OPENSSL_EXTRA
4695 /* store AAD size for next call */
4696 aes->aadLen = aes->aSz;
4697 #endif
4698 /* Zeroize last block to protect sensitive data. */
4699 ForceZero(AES_LASTBLOCK(aes), AES_BLOCK_SIZE);
4700 }
4701
4702 /* Initialize an AES GCM cipher for encryption or decryption.
4703 *
4704 * Must call wc_AesInit() before calling this function.
4705 *
4706 * @param [in, out] aes AES object.
4707 * @param [in] key Buffer holding key.
4708 * @param [in] len Length of key in bytes.
4709 * @param [in] iv Buffer holding IV/nonce.
4710 * @param [in] ivSz Length of IV/nonce in bytes.
4711 * @return 0 on success.
4712 * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4713 * is NULL, or the IV is NULL and no previous IV has been set.
4714 * @return MEMORY_E when dynamic memory allocation fails. (WOLFSSL_SMALL_STACK)
4715 */
wc_AesGcmInit(Aes * aes,const byte * key,word32 len,const byte * iv,word32 ivSz)4716 int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv,
4717 word32 ivSz)
4718 {
4719 int ret = 0;
4720
4721 /* Check validity of parameters. */
4722 if ((aes == NULL) || ((len > 0) && (key == NULL)) ||
4723 ((ivSz == 0) && (iv != NULL)) || (ivSz > AES_BLOCK_SIZE) ||
4724 ((ivSz > 0) && (iv == NULL))) {
4725 ret = BAD_FUNC_ARG;
4726 }
4727
4728 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_AESNI)
4729 if ((ret == 0) && (aes->streamData == NULL)) {
4730 /* Allocate buffers for streaming. */
4731 aes->streamData = (byte*)XMALLOC(5 * AES_BLOCK_SIZE, aes->heap,
4732 DYNAMIC_TYPE_AES);
4733 if (aes->streamData == NULL) {
4734 ret = MEMORY_E;
4735 }
4736 }
4737 #endif
4738
4739 /* Set the key if passed in. */
4740 if ((ret == 0) && (key != NULL)) {
4741 ret = wc_AesGcmSetKey(aes, key, len);
4742 }
4743
4744 if (ret == 0) {
4745 /* Setup with IV if needed. */
4746 if (iv != NULL) {
4747 /* Cache the IV in AES GCM object. */
4748 XMEMCPY((byte*)aes->reg, iv, ivSz);
4749 aes->nonceSz = ivSz;
4750 }
4751 else if (aes->nonceSz != 0) {
4752 /* Copy out the cached copy. */
4753 iv = (byte*)aes->reg;
4754 ivSz = aes->nonceSz;
4755 }
4756
4757 if (iv != NULL) {
4758 /* Initialize with the IV. */
4759 AesGcmInit_C(aes, iv, ivSz);
4760
4761 aes->nonceSet = 1;
4762 }
4763 }
4764
4765 return ret;
4766 }
4767
4768 /* Initialize an AES GCM cipher for encryption.
4769 *
4770 * Must call wc_AesInit() before calling this function.
4771 *
4772 * @param [in, out] aes AES object.
4773 * @param [in] key Buffer holding key.
4774 * @param [in] len Length of key in bytes.
4775 * @param [in] iv Buffer holding IV/nonce.
4776 * @param [in] ivSz Length of IV/nonce in bytes.
4777 * @return 0 on success.
4778 * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4779 * is NULL, or the IV is NULL and no previous IV has been set.
4780 */
wc_AesGcmEncryptInit(Aes * aes,const byte * key,word32 len,const byte * iv,word32 ivSz)4781 int wc_AesGcmEncryptInit(Aes* aes, const byte* key, word32 len, const byte* iv,
4782 word32 ivSz)
4783 {
4784 return wc_AesGcmInit(aes, key, len, iv, ivSz);
4785 }
4786
4787 /* Initialize an AES GCM cipher for encryption or decryption. Get IV.
4788 *
4789 * Must call wc_AesInit() before calling this function.
4790 *
4791 * @param [in, out] aes AES object.
4792 * @param [in] key Buffer holding key.
4793 * @param [in] len Length of key in bytes.
4794 * @param [in] iv Buffer holding IV/nonce.
4795 * @param [in] ivSz Length of IV/nonce in bytes.
4796 * @return 0 on success.
4797 * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4798 * is NULL, or the IV is NULL and no previous IV has been set.
4799 */
wc_AesGcmEncryptInit_ex(Aes * aes,const byte * key,word32 len,byte * ivOut,word32 ivOutSz)4800 int wc_AesGcmEncryptInit_ex(Aes* aes, const byte* key, word32 len, byte* ivOut,
4801 word32 ivOutSz)
4802 {
4803 XMEMCPY(ivOut, aes->reg, ivOutSz);
4804 return wc_AesGcmInit(aes, key, len, NULL, 0);
4805 }
4806
4807 /* Update the AES GCM for encryption with data and/or authentication data.
4808 *
4809 * All the AAD must be passed to update before the plaintext.
4810 * Last part of AAD can be passed with first part of plaintext.
4811 *
4812 * Must set key and IV before calling this function.
4813 * Must call wc_AesGcmInit() before calling this function.
4814 *
4815 * @param [in, out] aes AES object.
4816 * @param [out] out Buffer to hold cipher text.
4817 * @param [in] in Buffer holding plaintext.
4818 * @param [in] sz Length of plaintext in bytes.
4819 * @param [in] authIn Buffer holding authentication data.
4820 * @param [in] authInSz Length of authentication data in bytes.
4821 * @return 0 on success.
4822 * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4823 * is NULL.
4824 */
wc_AesGcmEncryptUpdate(Aes * aes,byte * out,const byte * in,word32 sz,const byte * authIn,word32 authInSz)4825 int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
4826 const byte* authIn, word32 authInSz)
4827 {
4828 int ret = 0;
4829
4830 /* Check validity of parameters. */
4831 if ((aes == NULL) || ((authInSz > 0) && (authIn == NULL)) || ((sz > 0) &&
4832 ((out == NULL) || (in == NULL)))) {
4833 ret = BAD_FUNC_ARG;
4834 }
4835
4836 /* Check key has been set. */
4837 if ((ret == 0) && (!aes->gcmKeySet)) {
4838 ret = MISSING_KEY;
4839 }
4840 /* Check IV has been set. */
4841 if ((ret == 0) && (!aes->nonceSet)) {
4842 ret = MISSING_IV;
4843 }
4844
4845 if ((ret == 0) && aes->ctrSet && (aes->aSz == 0) && (aes->cSz == 0)) {
4846 aes->invokeCtr[0]++;
4847 if (aes->invokeCtr[0] == 0) {
4848 aes->invokeCtr[1]++;
4849 if (aes->invokeCtr[1] == 0)
4850 ret = AES_GCM_OVERFLOW_E;
4851 }
4852 }
4853
4854 if (ret == 0) {
4855 /* Encrypt the plaintext. */
4856 AesGcmCryptUpdate_C(aes, out, in, sz);
4857 /* Update the authenication tag with any authentication data and the
4858 * new cipher text. */
4859 GHASH_UPDATE(aes, authIn, authInSz, out, sz);
4860 }
4861
4862 return ret;
4863 }
4864
4865 /* Finalize the AES GCM for encryption and return the authentication tag.
4866 *
4867 * Must set key and IV before calling this function.
4868 * Must call wc_AesGcmInit() before calling this function.
4869 *
4870 * @param [in, out] aes AES object.
4871 * @param [out] authTag Buffer to hold authentication tag.
4872 * @param [in] authTagSz Length of authentication tag in bytes.
4873 * @return 0 on success.
4874 */
wc_AesGcmEncryptFinal(Aes * aes,byte * authTag,word32 authTagSz)4875 int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, word32 authTagSz)
4876 {
4877 int ret = 0;
4878
4879 /* Check validity of parameters. */
4880 if ((aes == NULL) || (authTag == NULL) || (authTagSz > AES_BLOCK_SIZE) ||
4881 (authTagSz == 0)) {
4882 ret = BAD_FUNC_ARG;
4883 }
4884
4885 /* Check key has been set. */
4886 if ((ret == 0) && (!aes->gcmKeySet)) {
4887 ret = MISSING_KEY;
4888 }
4889 /* Check IV has been set. */
4890 if ((ret == 0) && (!aes->nonceSet)) {
4891 ret = MISSING_IV;
4892 }
4893
4894 if (ret == 0) {
4895 /* Calculate authentication tag. */
4896 AesGcmFinal_C(aes, authTag, authTagSz);
4897 }
4898
4899 if ((ret == 0) && aes->ctrSet) {
4900 IncCtr((byte*)aes->reg, aes->nonceSz);
4901 }
4902
4903 return ret;
4904 }
4905
4906 #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT)
4907 /* Initialize an AES GCM cipher for decryption.
4908 *
4909 * Must call wc_AesInit() before calling this function.
4910 *
4911 * @param [in, out] aes AES object.
4912 * @param [in] key Buffer holding key.
4913 * @param [in] len Length of key in bytes.
4914 * @param [in] iv Buffer holding IV/nonce.
4915 * @param [in] ivSz Length of IV/nonce in bytes.
4916 * @return 0 on success.
4917 * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4918 * is NULL, or the IV is NULL and no previous IV has been set.
4919 */
wc_AesGcmDecryptInit(Aes * aes,const byte * key,word32 len,const byte * iv,word32 ivSz)4920 int wc_AesGcmDecryptInit(Aes* aes, const byte* key, word32 len, const byte* iv,
4921 word32 ivSz)
4922 {
4923 return wc_AesGcmInit(aes, key, len, iv, ivSz);
4924 }
4925
4926 /* Update the AES GCM for decryption with data and/or authentication data.
4927 *
4928 * All the AAD must be passed to update before the cipher text.
4929 * Last part of AAD can be passed with first part of cipher text.
4930 *
4931 * Must set key and IV before calling this function.
4932 * Must call wc_AesGcmInit() before calling this function.
4933 *
4934 * @param [in, out] aes AES object.
4935 * @param [out] out Buffer to hold plaintext.
4936 * @param [in] in Buffer holding cipher text.
4937 * @param [in] sz Length of cipher text in bytes.
4938 * @param [in] authIn Buffer holding authentication data.
4939 * @param [in] authInSz Length of authentication data in bytes.
4940 * @return 0 on success.
4941 * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4942 * is NULL.
4943 */
wc_AesGcmDecryptUpdate(Aes * aes,byte * out,const byte * in,word32 sz,const byte * authIn,word32 authInSz)4944 int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
4945 const byte* authIn, word32 authInSz)
4946 {
4947 int ret = 0;
4948
4949 /* Check validity of parameters. */
4950 if ((aes == NULL) || ((authInSz > 0) && (authIn == NULL)) || ((sz > 0) &&
4951 ((out == NULL) || (in == NULL)))) {
4952 ret = BAD_FUNC_ARG;
4953 }
4954
4955 /* Check key has been set. */
4956 if ((ret == 0) && (!aes->gcmKeySet)) {
4957 ret = MISSING_KEY;
4958 }
4959 /* Check IV has been set. */
4960 if ((ret == 0) && (!aes->nonceSet)) {
4961 ret = MISSING_IV;
4962 }
4963
4964 if (ret == 0) {
4965 /* Decrypt with AAD and/or cipher text. */
4966 /* Update the authenication tag with any authentication data and
4967 * cipher text. */
4968 GHASH_UPDATE(aes, authIn, authInSz, in, sz);
4969 /* Decrypt the cipher text. */
4970 AesGcmCryptUpdate_C(aes, out, in, sz);
4971 }
4972
4973 return ret;
4974 }
4975
4976 /* Finalize the AES GCM for decryption and check the authentication tag.
4977 *
4978 * Must set key and IV before calling this function.
4979 * Must call wc_AesGcmInit() before calling this function.
4980 *
4981 * @param [in, out] aes AES object.
4982 * @param [in] authTag Buffer holding authentication tag.
4983 * @param [in] authTagSz Length of authentication tag in bytes.
4984 * @return 0 on success.
4985 */
wc_AesGcmDecryptFinal(Aes * aes,const byte * authTag,word32 authTagSz)4986 int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz)
4987 {
4988 int ret = 0;
4989
4990 /* Check validity of parameters. */
4991 if ((aes == NULL) || (authTag == NULL) || (authTagSz > AES_BLOCK_SIZE) ||
4992 (authTagSz == 0)) {
4993 ret = BAD_FUNC_ARG;
4994 }
4995
4996 /* Check key has been set. */
4997 if ((ret == 0) && (!aes->gcmKeySet)) {
4998 ret = MISSING_KEY;
4999 }
5000 /* Check IV has been set. */
5001 if ((ret == 0) && (!aes->nonceSet)) {
5002 ret = MISSING_IV;
5003 }
5004
5005 if (ret == 0) {
5006 /* Calculate authentication tag and compare with one passed in.. */
5007 ALIGN32 byte calcTag[AES_BLOCK_SIZE];
5008 /* Calculate authentication tag. */
5009 AesGcmFinal_C(aes, calcTag, authTagSz);
5010 /* Check calculated tag matches the one passed in. */
5011 if (ConstantCompare(authTag, calcTag, authTagSz) != 0) {
5012 ret = AES_GCM_AUTH_E;
5013 }
5014 }
5015
5016 return ret;
5017 }
5018 #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */
5019 #endif /* WOLFSSL_AESGCM_STREAM */
5020 #endif /* HAVE_AESGCM */
5021
5022
5023 #ifdef HAVE_AESCCM
5024 /* Software version of AES-CCM from wolfcrypt/src/aes.c
5025 * Gets some speed up from hardware acceleration of wc_AesEncrypt */
5026
roll_x(Aes * aes,const byte * in,word32 inSz,byte * out)5027 static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out)
5028 {
5029 /* process the bulk of the data */
5030 while (inSz >= AES_BLOCK_SIZE) {
5031 xorbuf(out, in, AES_BLOCK_SIZE);
5032 in += AES_BLOCK_SIZE;
5033 inSz -= AES_BLOCK_SIZE;
5034
5035 wc_AesEncrypt(aes, out, out);
5036 }
5037
5038 /* process remainder of the data */
5039 if (inSz > 0) {
5040 xorbuf(out, in, inSz);
5041 wc_AesEncrypt(aes, out, out);
5042 }
5043 }
5044
5045
roll_auth(Aes * aes,const byte * in,word32 inSz,byte * out)5046 static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out)
5047 {
5048 word32 authLenSz;
5049 word32 remainder;
5050
5051 /* encode the length in */
5052 if (inSz <= 0xFEFF) {
5053 authLenSz = 2;
5054 out[0] ^= ((inSz & 0xFF00) >> 8);
5055 out[1] ^= (inSz & 0x00FF);
5056 }
5057 else if (inSz <= 0xFFFFFFFF) {
5058 authLenSz = 6;
5059 out[0] ^= 0xFF; out[1] ^= 0xFE;
5060 out[2] ^= ((inSz & 0xFF000000) >> 24);
5061 out[3] ^= ((inSz & 0x00FF0000) >> 16);
5062 out[4] ^= ((inSz & 0x0000FF00) >> 8);
5063 out[5] ^= (inSz & 0x000000FF);
5064 }
5065 /* Note, the protocol handles auth data up to 2^64, but we are
5066 * using 32-bit sizes right now, so the bigger data isn't handled
5067 * else if (inSz <= 0xFFFFFFFFFFFFFFFF) {} */
5068 else
5069 return;
5070
5071 /* start fill out the rest of the first block */
5072 remainder = AES_BLOCK_SIZE - authLenSz;
5073 if (inSz >= remainder) {
5074 /* plenty of bulk data to fill the remainder of this block */
5075 xorbuf(out + authLenSz, in, remainder);
5076 inSz -= remainder;
5077 in += remainder;
5078 }
5079 else {
5080 /* not enough bulk data, copy what is available, and pad zero */
5081 xorbuf(out + authLenSz, in, inSz);
5082 inSz = 0;
5083 }
5084 wc_AesEncrypt(aes, out, out);
5085
5086 if (inSz > 0)
5087 roll_x(aes, in, inSz, out);
5088 }
5089
5090
AesCcmCtrInc(byte * B,word32 lenSz)5091 static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz)
5092 {
5093 word32 i;
5094
5095 for (i = 0; i < lenSz; i++) {
5096 if (++B[AES_BLOCK_SIZE - 1 - i] != 0) return;
5097 }
5098 }
5099
5100
5101 /* return 0 on success */
wc_AesCcmEncrypt(Aes * aes,byte * out,const byte * in,word32 inSz,const byte * nonce,word32 nonceSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)5102 int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
5103 const byte* nonce, word32 nonceSz,
5104 byte* authTag, word32 authTagSz,
5105 const byte* authIn, word32 authInSz)
5106 {
5107 byte A[AES_BLOCK_SIZE];
5108 byte B[AES_BLOCK_SIZE];
5109 byte lenSz;
5110 word32 i;
5111 byte mask = 0xFF;
5112 word32 wordSz = (word32)sizeof(word32);
5113
5114 /* sanity check on arguments */
5115 if (aes == NULL || out == NULL || in == NULL || nonce == NULL
5116 || authTag == NULL || nonceSz < 7 || nonceSz > 13)
5117 return BAD_FUNC_ARG;
5118
5119 if (wc_AesCcmCheckTagSize(authTagSz) != 0) {
5120 return BAD_FUNC_ARG;
5121 }
5122
5123 XMEMCPY(B+1, nonce, nonceSz);
5124 lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
5125 B[0] = (authInSz > 0 ? 64 : 0)
5126 + (8 * (((byte)authTagSz - 2) / 2))
5127 + (lenSz - 1);
5128 for (i = 0; i < lenSz; i++) {
5129 if (mask && i >= wordSz)
5130 mask = 0x00;
5131 B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask;
5132 }
5133
5134 wc_AesEncrypt(aes, B, A);
5135
5136 if (authInSz > 0)
5137 roll_auth(aes, authIn, authInSz, A);
5138 if (inSz > 0)
5139 roll_x(aes, in, inSz, A);
5140 XMEMCPY(authTag, A, authTagSz);
5141
5142 B[0] = lenSz - 1;
5143 for (i = 0; i < lenSz; i++)
5144 B[AES_BLOCK_SIZE - 1 - i] = 0;
5145 wc_AesEncrypt(aes, B, A);
5146 xorbuf(authTag, A, authTagSz);
5147
5148 B[15] = 1;
5149 while (inSz >= AES_BLOCK_SIZE) {
5150 wc_AesEncrypt(aes, B, A);
5151 xorbuf(A, in, AES_BLOCK_SIZE);
5152 XMEMCPY(out, A, AES_BLOCK_SIZE);
5153
5154 AesCcmCtrInc(B, lenSz);
5155 inSz -= AES_BLOCK_SIZE;
5156 in += AES_BLOCK_SIZE;
5157 out += AES_BLOCK_SIZE;
5158 }
5159 if (inSz > 0) {
5160 wc_AesEncrypt(aes, B, A);
5161 xorbuf(A, in, inSz);
5162 XMEMCPY(out, A, inSz);
5163 }
5164
5165 ForceZero(A, AES_BLOCK_SIZE);
5166 ForceZero(B, AES_BLOCK_SIZE);
5167
5168 return 0;
5169 }
5170
5171 #ifdef HAVE_AES_DECRYPT
wc_AesCcmDecrypt(Aes * aes,byte * out,const byte * in,word32 inSz,const byte * nonce,word32 nonceSz,const byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)5172 int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
5173 const byte* nonce, word32 nonceSz,
5174 const byte* authTag, word32 authTagSz,
5175 const byte* authIn, word32 authInSz)
5176 {
5177 byte A[AES_BLOCK_SIZE];
5178 byte B[AES_BLOCK_SIZE];
5179 byte* o;
5180 byte lenSz;
5181 word32 i, oSz;
5182 int result = 0;
5183 byte mask = 0xFF;
5184 word32 wordSz = (word32)sizeof(word32);
5185
5186 /* sanity check on arguments */
5187 if (aes == NULL || out == NULL || in == NULL || nonce == NULL
5188 || authTag == NULL || nonceSz < 7 || nonceSz > 13)
5189 return BAD_FUNC_ARG;
5190
5191 if (wc_AesCcmCheckTagSize(authTagSz) != 0) {
5192 return BAD_FUNC_ARG;
5193 }
5194
5195 o = out;
5196 oSz = inSz;
5197 XMEMCPY(B+1, nonce, nonceSz);
5198 lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
5199
5200 B[0] = lenSz - 1;
5201 for (i = 0; i < lenSz; i++)
5202 B[AES_BLOCK_SIZE - 1 - i] = 0;
5203 B[15] = 1;
5204
5205 while (oSz >= AES_BLOCK_SIZE) {
5206 wc_AesEncrypt(aes, B, A);
5207 xorbuf(A, in, AES_BLOCK_SIZE);
5208 XMEMCPY(o, A, AES_BLOCK_SIZE);
5209
5210 AesCcmCtrInc(B, lenSz);
5211 oSz -= AES_BLOCK_SIZE;
5212 in += AES_BLOCK_SIZE;
5213 o += AES_BLOCK_SIZE;
5214 }
5215 if (inSz > 0) {
5216 wc_AesEncrypt(aes, B, A);
5217 xorbuf(A, in, oSz);
5218 XMEMCPY(o, A, oSz);
5219 }
5220
5221 for (i = 0; i < lenSz; i++)
5222 B[AES_BLOCK_SIZE - 1 - i] = 0;
5223 wc_AesEncrypt(aes, B, A);
5224
5225 o = out;
5226 oSz = inSz;
5227
5228 B[0] = (authInSz > 0 ? 64 : 0)
5229 + (8 * (((byte)authTagSz - 2) / 2))
5230 + (lenSz - 1);
5231 for (i = 0; i < lenSz; i++) {
5232 if (mask && i >= wordSz)
5233 mask = 0x00;
5234 B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask;
5235 }
5236
5237 wc_AesEncrypt(aes, B, A);
5238
5239 if (authInSz > 0)
5240 roll_auth(aes, authIn, authInSz, A);
5241 if (inSz > 0)
5242 roll_x(aes, o, oSz, A);
5243
5244 B[0] = lenSz - 1;
5245 for (i = 0; i < lenSz; i++)
5246 B[AES_BLOCK_SIZE - 1 - i] = 0;
5247 wc_AesEncrypt(aes, B, B);
5248 xorbuf(A, B, authTagSz);
5249
5250 if (ConstantCompare(A, authTag, authTagSz) != 0) {
5251 /* If the authTag check fails, don't keep the decrypted data.
5252 * Unfortunately, you need the decrypted data to calculate the
5253 * check value. */
5254 XMEMSET(out, 0, inSz);
5255 result = AES_CCM_AUTH_E;
5256 }
5257
5258 ForceZero(A, AES_BLOCK_SIZE);
5259 ForceZero(B, AES_BLOCK_SIZE);
5260 o = NULL;
5261
5262 return result;
5263 }
5264 #endif /* HAVE_AES_DECRYPT */
5265 #endif /* HAVE_AESCCM */
5266
5267
5268
5269 #ifdef HAVE_AESGCM /* common GCM functions 32 and 64 bit */
wc_AesGcmSetKey(Aes * aes,const byte * key,word32 len)5270 int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
5271 {
5272 int ret;
5273 byte iv[AES_BLOCK_SIZE];
5274
5275 if (!((len == 16) || (len == 24) || (len == 32)))
5276 return BAD_FUNC_ARG;
5277
5278 XMEMSET(iv, 0, AES_BLOCK_SIZE);
5279 ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION);
5280
5281 if (ret == 0) {
5282 #ifdef WOLFSSL_AESGCM_STREAM
5283 aes->gcmKeySet = 1;
5284 #endif
5285
5286 wc_AesEncrypt(aes, iv, aes->H);
5287 #if defined(__aarch64__)
5288 {
5289 word32* pt = (word32*)aes->H;
5290 __asm__ volatile (
5291 "LD1 {v0.16b}, [%[h]] \n"
5292 "RBIT v0.16b, v0.16b \n"
5293 "ST1 {v0.16b}, [%[out]] \n"
5294 : [out] "=r" (pt)
5295 : [h] "0" (pt)
5296 : "cc", "memory", "v0"
5297 );
5298 }
5299 #else
5300 {
5301 word32* pt = (word32*)aes->H;
5302 __asm__ volatile (
5303 "VLD1.32 {q0}, [%[h]] \n"
5304 "VREV64.8 q0, q0 \n"
5305 "VSWP.8 d0, d1 \n"
5306 "VST1.32 {q0}, [%[out]] \n"
5307 : [out] "=r" (pt)
5308 : [h] "0" (pt)
5309 : "cc", "memory", "q0"
5310 );
5311 }
5312 #endif
5313 }
5314
5315 return ret;
5316 }
5317
5318 #endif /* HAVE_AESGCM */
5319
5320 /* AES-DIRECT */
5321 #if defined(WOLFSSL_AES_DIRECT)
5322 /* Allow direct access to one block encrypt */
wc_AesEncryptDirect(Aes * aes,byte * out,const byte * in)5323 void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
5324 {
5325 if (aes == NULL || out == NULL || in == NULL) {
5326 WOLFSSL_MSG("Invalid input to wc_AesEncryptDirect");
5327 return;
5328 }
5329 wc_AesEncrypt(aes, in, out);
5330 }
5331 #ifdef HAVE_AES_DECRYPT
5332 /* Allow direct access to one block decrypt */
wc_AesDecryptDirect(Aes * aes,byte * out,const byte * in)5333 void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
5334 {
5335 if (aes == NULL || out == NULL || in == NULL) {
5336 WOLFSSL_MSG("Invalid input to wc_AesDecryptDirect");
5337 return;
5338 }
5339 wc_AesDecrypt(aes, in, out);
5340 }
5341 #endif /* HAVE_AES_DECRYPT */
5342 #endif /* WOLFSSL_AES_DIRECT */
5343 #endif /* !NO_AES && WOLFSSL_ARMASM */
5344