1 /* armv8-aes.c
2  *
3  * Copyright (C) 2006-2021 wolfSSL Inc.
4  *
5  * This file is part of wolfSSL.
6  *
7  * wolfSSL is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * wolfSSL is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20  */
21 
22 
23 /*
24  * There are two versions one for 64 (Aarch64)  and one for 32 bit (Aarch32).
25  * If changing one check the other.
26  */
27 
28 
29 #ifdef HAVE_CONFIG_H
30     #include <config.h>
31 #endif
32 
33 #include <wolfssl/wolfcrypt/settings.h>
34 
35 #if !defined(NO_AES) && defined(WOLFSSL_ARMASM)
36 
37 #ifdef HAVE_FIPS
38 #undef HAVE_FIPS
39 #endif
40 
41 #include <wolfssl/wolfcrypt/aes.h>
42 #include <wolfssl/wolfcrypt/error-crypt.h>
43 #include <wolfssl/wolfcrypt/logging.h>
44 #ifdef NO_INLINE
45     #include <wolfssl/wolfcrypt/misc.h>
46 #else
47     #define WOLFSSL_MISC_INCLUDED
48     #include <wolfcrypt/src/misc.c>
49 #endif
50 
51 #ifdef _MSC_VER
52     /* 4127 warning constant while(1)  */
53     #pragma warning(disable: 4127)
54 #endif
55 
56 
57 static const byte rcon[] = {
58     0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,0x1B, 0x36
59     /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
60 };
61 
62 /* get table value from hardware */
63 #ifdef __aarch64__
64     #define SBOX(x)                      \
65         do {                             \
66             __asm__ volatile (           \
67                 "DUP v1.4s, %w[in]  \n"  \
68                 "MOVI v0.16b, #0     \n" \
69                 "AESE v0.16b, v1.16b \n" \
70                 "UMOV %w[out], v0.s[0] \n" \
71                 : [out] "=r"((x))        \
72                 : [in] "r" ((x))         \
73                 : "cc", "memory", "v0", "v1"\
74             ); \
75         } while(0)
76 
77     #define IMIX(x) \
78         do {        \
79             __asm__ volatile (             \
80                 "LD1 {v0.16b}, [%[in]] \n" \
81                 "AESIMC v0.16b, v0.16b \n" \
82                 "ST1 {v0.16b}, [%[out]]\n" \
83                 : [out] "=r" ((x))         \
84                 : [in] "0" ((x))           \
85                 : "cc", "memory", "v0"     \
86             );                             \
87         } while(0)
88 #else /* if not defined __aarch64__ then use 32 bit version */
89     #define SBOX(x)                      \
90         do {                             \
91             __asm__ volatile (           \
92                 "VDUP.32 q1, %[in]   \n" \
93                 "VMOV.i32 q0, #0     \n" \
94                 "AESE.8 q0, q1      \n" \
95                 "VMOV.32 %[out], d0[0] \n" \
96                 : [out] "=r"((x))        \
97                 : [in] "r" ((x))         \
98                 : "cc", "memory", "q0", "q1"\
99             ); \
100         } while(0)
101 
102     #define IMIX(x) \
103         do {        \
104             __asm__ volatile (           \
105                 "VLD1.32 {q0}, [%[in]] \n" \
106                 "AESIMC.8 q0, q0    \n" \
107                 "VST1.32 {q0}, [%[out]] \n" \
108                 : [out] "=r" ((x))       \
109                 : [in] "0" ((x))         \
110                 : "cc", "memory", "q0"   \
111             );                           \
112         } while(0)
113 #endif /* aarch64 */
114 
115 
116 #ifdef HAVE_AESGCM
117 
IncrementGcmCounter(byte * inOutCtr)118 static WC_INLINE void IncrementGcmCounter(byte* inOutCtr)
119 {
120     int i;
121 
122     /* in network byte order so start at end and work back */
123     for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) {
124         if (++inOutCtr[i])  /* we're done unless we overflow */
125             return;
126     }
127 }
128 
129 
FlattenSzInBits(byte * buf,word32 sz)130 static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz)
131 {
132     /* Multiply the sz by 8 */
133     word32 szHi = (sz >> (8*sizeof(sz) - 3));
134     sz <<= 3;
135 
136     /* copy over the words of the sz into the destination buffer */
137     buf[0] = (szHi >> 24) & 0xff;
138     buf[1] = (szHi >> 16) & 0xff;
139     buf[2] = (szHi >>  8) & 0xff;
140     buf[3] = szHi & 0xff;
141     buf[4] = (sz >> 24) & 0xff;
142     buf[5] = (sz >> 16) & 0xff;
143     buf[6] = (sz >>  8) & 0xff;
144     buf[7] = sz & 0xff;
145 }
146 
147 #endif /* HAVE_AESGCM */
148 
149 /* Similar to wolfSSL software implementation of expanding the AES key.
150  * Changed out the locations of where table look ups where made to
151  * use hardware instruction. Also altered decryption key to match. */
wc_AesSetKey(Aes * aes,const byte * userKey,word32 keylen,const byte * iv,int dir)152 int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
153             const byte* iv, int dir)
154 {
155     word32 temp;
156     word32 *rk;
157     unsigned int i = 0;
158 
159 #if defined(AES_MAX_KEY_SIZE)
160     const word32 max_key_len = (AES_MAX_KEY_SIZE / 8);
161 #endif
162 
163     if (!((keylen == 16) || (keylen == 24) || (keylen == 32)) ||
164            aes == NULL || userKey == NULL)
165         return BAD_FUNC_ARG;
166 
167     rk = aes->key;
168 #if defined(AES_MAX_KEY_SIZE)
169     /* Check key length */
170     if (keylen > max_key_len) {
171         return BAD_FUNC_ARG;
172     }
173 #endif
174 
175     #ifdef WOLFSSL_AES_COUNTER
176         aes->left = 0;
177     #endif /* WOLFSSL_AES_COUNTER */
178 
179     aes->keylen = keylen;
180     aes->rounds = keylen/4 + 6;
181     XMEMCPY(rk, userKey, keylen);
182 
183     switch(keylen)
184     {
185 #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \
186         defined(WOLFSSL_AES_128)
187     case 16:
188         while (1)
189         {
190             temp  = rk[3];
191             SBOX(temp);
192             temp = rotrFixed(temp, 8);
193             rk[4] = rk[0] ^ temp ^ rcon[i];
194             rk[5] = rk[4] ^ rk[1];
195             rk[6] = rk[5] ^ rk[2];
196             rk[7] = rk[6] ^ rk[3];
197             if (++i == 10)
198                 break;
199             rk += 4;
200         }
201         break;
202 #endif /* 128 */
203 
204 #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192 && \
205         defined(WOLFSSL_AES_192)
206     case 24:
207         /* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */
208         while (1)
209         {
210             temp  = rk[5];
211             SBOX(temp);
212             temp = rotrFixed(temp, 8);
213             rk[ 6] = rk[ 0] ^ temp ^ rcon[i];
214             rk[ 7] = rk[ 1] ^ rk[ 6];
215             rk[ 8] = rk[ 2] ^ rk[ 7];
216             rk[ 9] = rk[ 3] ^ rk[ 8];
217             if (++i == 8)
218                 break;
219             rk[10] = rk[ 4] ^ rk[ 9];
220             rk[11] = rk[ 5] ^ rk[10];
221             rk += 6;
222         }
223         break;
224 #endif /* 192 */
225 
226 #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256 && \
227         defined(WOLFSSL_AES_256)
228     case 32:
229         while (1)
230         {
231             temp  = rk[7];
232             SBOX(temp);
233             temp = rotrFixed(temp, 8);
234             rk[8] = rk[0] ^ temp ^ rcon[i];
235             rk[ 9] = rk[ 1] ^ rk[ 8];
236             rk[10] = rk[ 2] ^ rk[ 9];
237             rk[11] = rk[ 3] ^ rk[10];
238             if (++i == 7)
239                 break;
240             temp  = rk[11];
241             SBOX(temp);
242             rk[12] = rk[ 4] ^ temp;
243             rk[13] = rk[ 5] ^ rk[12];
244             rk[14] = rk[ 6] ^ rk[13];
245             rk[15] = rk[ 7] ^ rk[14];
246 
247             rk += 8;
248         }
249         break;
250 #endif /* 256 */
251 
252     default:
253         return BAD_FUNC_ARG;
254     }
255 
256     if (dir == AES_DECRYPTION)
257     {
258 #ifdef HAVE_AES_DECRYPT
259         unsigned int j;
260         rk = aes->key;
261 
262         /* invert the order of the round keys: */
263         for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) {
264             temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
265             temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
266             temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
267             temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
268         }
269         /* apply the inverse MixColumn transform to all round keys but the
270            first and the last: */
271         for (i = 1; i < aes->rounds; i++) {
272             rk += 4;
273             IMIX(rk);
274         }
275 #else
276     WOLFSSL_MSG("AES Decryption not compiled in");
277     return BAD_FUNC_ARG;
278 #endif /* HAVE_AES_DECRYPT */
279     }
280 
281     return wc_AesSetIV(aes, iv);
282 }
283 
284 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
wc_AesSetKeyDirect(Aes * aes,const byte * userKey,word32 keylen,const byte * iv,int dir)285     int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
286                         const byte* iv, int dir)
287     {
288         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
289     }
290 #endif
291 
292 /* wc_AesSetIV is shared between software and hardware */
wc_AesSetIV(Aes * aes,const byte * iv)293 int wc_AesSetIV(Aes* aes, const byte* iv)
294 {
295     if (aes == NULL)
296         return BAD_FUNC_ARG;
297 
298     if (iv)
299         XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
300     else
301         XMEMSET(aes->reg,  0, AES_BLOCK_SIZE);
302 
303     return 0;
304 }
305 
306 
307 #ifdef __aarch64__
308 /* AES CCM/GCM use encrypt direct but not decrypt */
309 #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \
310     defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
wc_AesEncrypt(Aes * aes,const byte * inBlock,byte * outBlock)311     static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
312     {
313             word32* keyPt = aes->key;
314 
315             /*
316               AESE exor's input with round key
317                    shift rows of exor'ed result
318                    sub bytes for shifted rows
319              */
320 
321             __asm__ __volatile__ (
322                 "LD1 {v0.16b}, [%[CtrIn]] \n"
323                 "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
324 
325                 "AESE v0.16b, v1.16b  \n"
326                 "AESMC v0.16b, v0.16b \n"
327                 "AESE v0.16b, v2.16b  \n"
328                 "AESMC v0.16b, v0.16b \n"
329                 "AESE v0.16b, v3.16b  \n"
330                 "AESMC v0.16b, v0.16b \n"
331                 "AESE v0.16b, v4.16b  \n"
332                 "AESMC v0.16b, v0.16b \n"
333 
334                 "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
335                 "AESE v0.16b, v1.16b  \n"
336                 "AESMC v0.16b, v0.16b \n"
337                 "AESE v0.16b, v2.16b  \n"
338                 "AESMC v0.16b, v0.16b \n"
339                 "AESE v0.16b, v3.16b  \n"
340                 "AESMC v0.16b, v0.16b \n"
341                 "AESE v0.16b, v4.16b  \n"
342                 "AESMC v0.16b, v0.16b \n"
343 
344                 "LD1 {v1.2d-v2.2d}, [%[Key]], #32  \n"
345                 "AESE v0.16b, v1.16b  \n"
346                 "AESMC v0.16b, v0.16b \n"
347                 "AESE v0.16b, v2.16b  \n"
348 
349                 "#subtract rounds done so far and see if should continue\n"
350                 "MOV w12, %w[R]    \n"
351                 "SUB w12, w12, #10 \n"
352                 "CBZ w12, 1f       \n"
353                 "LD1 {v1.2d-v2.2d}, [%[Key]], #32  \n"
354                 "AESMC v0.16b, v0.16b \n"
355                 "AESE v0.16b, v1.16b  \n"
356                 "AESMC v0.16b, v0.16b \n"
357                 "AESE v0.16b, v2.16b  \n"
358 
359                 "SUB w12, w12, #2 \n"
360                 "CBZ w12, 1f      \n"
361                 "LD1 {v1.2d-v2.2d}, [%[Key]], #32  \n"
362                 "AESMC v0.16b, v0.16b \n"
363                 "AESE v0.16b, v1.16b  \n"
364                 "AESMC v0.16b, v0.16b \n"
365                 "AESE v0.16b, v2.16b  \n"
366 
367                 "#Final AddRoundKey then store result \n"
368                 "1: \n"
369                 "LD1 {v1.2d}, [%[Key]], #16 \n"
370                 "EOR v0.16b, v0.16b, v1.16b  \n"
371                 "ST1 {v0.16b}, [%[CtrOut]]   \n"
372 
373                 :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds),
374                  "=r" (inBlock)
375                 :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds),
376                  [CtrIn] "3" (inBlock)
377                 : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4"
378             );
379 
380         return 0;
381     }
382 #endif /* AES_GCM, AES_CCM, DIRECT or COUNTER */
383 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
384     #ifdef HAVE_AES_DECRYPT
wc_AesDecrypt(Aes * aes,const byte * inBlock,byte * outBlock)385     static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
386     {
387             word32* keyPt = aes->key;
388 
389             /*
390               AESE exor's input with round key
391                    shift rows of exor'ed result
392                    sub bytes for shifted rows
393              */
394 
395             __asm__ __volatile__ (
396                 "LD1 {v0.16b}, [%[CtrIn]] \n"
397                 "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
398 
399                 "AESD v0.16b, v1.16b   \n"
400                 "AESIMC v0.16b, v0.16b \n"
401                 "AESD v0.16b, v2.16b   \n"
402                 "AESIMC v0.16b, v0.16b \n"
403                 "AESD v0.16b, v3.16b   \n"
404                 "AESIMC v0.16b, v0.16b \n"
405                 "AESD v0.16b, v4.16b   \n"
406                 "AESIMC v0.16b, v0.16b \n"
407 
408                 "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
409                 "AESD v0.16b, v1.16b   \n"
410                 "AESIMC v0.16b, v0.16b \n"
411                 "AESD v0.16b, v2.16b   \n"
412                 "AESIMC v0.16b, v0.16b \n"
413                 "AESD v0.16b, v3.16b   \n"
414                 "AESIMC v0.16b, v0.16b \n"
415                 "AESD v0.16b, v4.16b   \n"
416                 "AESIMC v0.16b, v0.16b \n"
417 
418                 "LD1 {v1.2d-v2.2d}, [%[Key]], #32  \n"
419                 "AESD v0.16b, v1.16b   \n"
420                 "AESIMC v0.16b, v0.16b \n"
421                 "AESD v0.16b, v2.16b   \n"
422 
423                 "#subtract rounds done so far and see if should continue\n"
424                 "MOV w12, %w[R]    \n"
425                 "SUB w12, w12, #10 \n"
426                 "CBZ w12, 1f       \n"
427                 "LD1 {v1.2d-v2.2d}, [%[Key]], #32  \n"
428                 "AESIMC v0.16b, v0.16b \n"
429                 "AESD v0.16b, v1.16b   \n"
430                 "AESIMC v0.16b, v0.16b \n"
431                 "AESD v0.16b, v2.16b   \n"
432 
433                 "SUB w12, w12, #2  \n"
434                 "CBZ w12, 1f       \n"
435                 "LD1 {v1.2d-v2.2d}, [%[Key]], #32  \n"
436                 "AESIMC v0.16b, v0.16b \n"
437                 "AESD v0.16b, v1.16b   \n"
438                 "AESIMC v0.16b, v0.16b \n"
439                 "AESD v0.16b, v2.16b   \n"
440 
441                 "#Final AddRoundKey then store result \n"
442                 "1: \n"
443                 "LD1 {v1.2d}, [%[Key]], #16 \n"
444                 "EOR v0.16b, v0.16b, v1.16b  \n"
445                 "ST1 {v0.4s}, [%[CtrOut]]    \n"
446 
447                 :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds),
448                  "=r" (inBlock)
449                 :[Key] "1" (aes->key), "0" (outBlock), [R] "2" (aes->rounds),
450                  [CtrIn] "3" (inBlock)
451                 : "cc", "memory", "w12", "v0", "v1", "v2", "v3", "v4"
452             );
453 
454         return 0;
455 }
456     #endif /* HAVE_AES_DECRYPT */
457 #endif /* DIRECT or COUNTER */
458 
459 /* AES-CBC */
460 #ifdef HAVE_AES_CBC
wc_AesCbcEncrypt(Aes * aes,byte * out,const byte * in,word32 sz)461     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
462     {
463         word32 numBlocks = sz / AES_BLOCK_SIZE;
464 
465         if (aes == NULL || out == NULL || in == NULL) {
466             return BAD_FUNC_ARG;
467         }
468 
469         if (sz == 0) {
470             return 0;
471         }
472 
473 #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
474         if (sz % AES_BLOCK_SIZE) {
475             return BAD_LENGTH_E;
476         }
477 #endif
478 
479         /* do as many block size ops as possible */
480         if (numBlocks > 0) {
481             word32* key = aes->key;
482             word32* reg = aes->reg;
483             /*
484             AESE exor's input with round key
485             shift rows of exor'ed result
486             sub bytes for shifted rows
487 
488             note: grouping AESE & AESMC together as pairs reduces latency
489             */
490             switch(aes->rounds) {
491 #ifdef WOLFSSL_AES_128
492             case 10: /* AES 128 BLOCK */
493                 __asm__ __volatile__ (
494                 "MOV w11, %w[blocks] \n"
495                 "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
496                 "LD1 {v5.2d-v8.2d}, [%[Key]], #64  \n"
497                 "LD1 {v9.2d-v11.2d},[%[Key]], #48  \n"
498                 "LD1 {v0.2d}, [%[reg]] \n"
499 
500                 "LD1 {v12.2d}, [%[input]], #16 \n"
501                 "1:\n"
502                 "#CBC operations, xorbuf in with current aes->reg \n"
503                 "EOR v0.16b, v0.16b, v12.16b \n"
504                 "AESE v0.16b, v1.16b  \n"
505                 "AESMC v0.16b, v0.16b \n"
506                 "AESE v0.16b, v2.16b  \n"
507                 "AESMC v0.16b, v0.16b \n"
508                 "AESE v0.16b, v3.16b  \n"
509                 "AESMC v0.16b, v0.16b \n"
510                 "AESE v0.16b, v4.16b  \n"
511                 "AESMC v0.16b, v0.16b \n"
512                 "AESE v0.16b, v5.16b  \n"
513                 "AESMC v0.16b, v0.16b \n"
514                 "AESE v0.16b, v6.16b  \n"
515                 "AESMC v0.16b, v0.16b \n"
516                 "AESE v0.16b, v7.16b  \n"
517                 "AESMC v0.16b, v0.16b \n"
518                 "AESE v0.16b, v8.16b  \n"
519                 "AESMC v0.16b, v0.16b \n"
520                 "AESE v0.16b, v9.16b  \n"
521                 "AESMC v0.16b, v0.16b \n"
522                 "AESE v0.16b, v10.16b  \n"
523                 "SUB w11, w11, #1 \n"
524                 "EOR v0.16b, v0.16b, v11.16b  \n"
525                 "ST1 {v0.2d}, [%[out]], #16   \n"
526 
527                 "CBZ w11, 2f \n"
528                 "LD1 {v12.2d}, [%[input]], #16 \n"
529                 "B 1b \n"
530 
531                 "2:\n"
532                 "#store current counter value at the end \n"
533                 "ST1 {v0.2d}, [%[regOut]] \n"
534 
535                 :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
536                 :"0" (out), [Key] "r" (key), [input] "2" (in),
537                  [blocks] "r" (numBlocks), [reg] "1" (reg)
538                 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
539                 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13"
540                 );
541                 break;
542 #endif /* WOLFSSL_AES_128 */
543 #ifdef WOLFSSL_AES_192
544             case 12: /* AES 192 BLOCK */
545                 __asm__ __volatile__ (
546                 "MOV w11, %w[blocks] \n"
547                 "LD1 {v1.2d-v4.2d}, %[Key], #64  \n"
548                 "LD1 {v5.2d-v8.2d}, %[Key], #64  \n"
549                 "LD1 {v9.2d-v12.2d},%[Key], #64  \n"
550                 "LD1 {v13.2d}, %[Key], #16 \n"
551                 "LD1 {v0.2d}, %[reg] \n"
552 
553                 "LD1 {v14.2d}, [%[input]], #16  \n"
554                 "1:\n"
555                 "#CBC operations, xorbuf in with current aes->reg \n"
556                 "EOR v0.16b, v0.16b, v14.16b \n"
557                 "AESE v0.16b, v1.16b  \n"
558                 "AESMC v0.16b, v0.16b \n"
559                 "AESE v0.16b, v2.16b  \n"
560                 "AESMC v0.16b, v0.16b \n"
561                 "AESE v0.16b, v3.16b  \n"
562                 "AESMC v0.16b, v0.16b \n"
563                 "AESE v0.16b, v4.16b  \n"
564                 "AESMC v0.16b, v0.16b \n"
565                 "AESE v0.16b, v5.16b  \n"
566                 "AESMC v0.16b, v0.16b \n"
567                 "AESE v0.16b, v6.16b  \n"
568                 "AESMC v0.16b, v0.16b \n"
569                 "AESE v0.16b, v7.16b  \n"
570                 "AESMC v0.16b, v0.16b \n"
571                 "AESE v0.16b, v8.16b  \n"
572                 "AESMC v0.16b, v0.16b \n"
573                 "AESE v0.16b, v9.16b  \n"
574                 "AESMC v0.16b, v0.16b \n"
575                 "AESE v0.16b, v10.16b \n"
576                 "AESMC v0.16b, v0.16b \n"
577                 "AESE v0.16b, v11.16b \n"
578                 "AESMC v0.16b, v0.16b \n"
579                 "AESE v0.16b, v12.16b \n"
580                 "EOR v0.16b, v0.16b, v13.16b  \n"
581                 "SUB w11, w11, #1 \n"
582                 "ST1 {v0.2d}, [%[out]], #16  \n"
583 
584                 "CBZ w11, 2f \n"
585                 "LD1 {v14.2d}, [%[input]], #16\n"
586                 "B 1b \n"
587 
588                 "2:\n"
589                 "#store current counter value at the end \n"
590                 "ST1 {v0.2d}, %[regOut]   \n"
591 
592 
593                 :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in)
594                 :"0" (out), [Key] "m" (aes->key), [input] "2" (in),
595                  [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
596                 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
597                 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
598                 );
599                 break;
600 #endif /* WOLFSSL_AES_192*/
601 #ifdef WOLFSSL_AES_256
602             case 14: /* AES 256 BLOCK */
603                 __asm__ __volatile__ (
604                 "MOV w11, %w[blocks] \n"
605                 "LD1 {v1.2d-v4.2d},   %[Key], #64 \n"
606 
607                 "LD1 {v5.2d-v8.2d},   %[Key], #64 \n"
608                 "LD1 {v9.2d-v12.2d},  %[Key], #64 \n"
609                 "LD1 {v13.2d-v15.2d}, %[Key], #48 \n"
610                 "LD1 {v0.2d}, %[reg] \n"
611 
612                 "LD1 {v16.2d}, [%[input]], #16  \n"
613                 "1: \n"
614                 "#CBC operations, xorbuf in with current aes->reg \n"
615                 "EOR v0.16b, v0.16b, v16.16b \n"
616                 "AESE v0.16b, v1.16b  \n"
617                 "AESMC v0.16b, v0.16b \n"
618                 "AESE v0.16b, v2.16b  \n"
619                 "AESMC v0.16b, v0.16b \n"
620                 "AESE v0.16b, v3.16b  \n"
621                 "AESMC v0.16b, v0.16b \n"
622                 "AESE v0.16b, v4.16b  \n"
623                 "AESMC v0.16b, v0.16b \n"
624                 "AESE v0.16b, v5.16b  \n"
625                 "AESMC v0.16b, v0.16b \n"
626                 "AESE v0.16b, v6.16b  \n"
627                 "AESMC v0.16b, v0.16b \n"
628                 "AESE v0.16b, v7.16b  \n"
629                 "AESMC v0.16b, v0.16b \n"
630                 "AESE v0.16b, v8.16b  \n"
631                 "AESMC v0.16b, v0.16b \n"
632                 "AESE v0.16b, v9.16b  \n"
633                 "AESMC v0.16b, v0.16b \n"
634                 "AESE v0.16b, v10.16b \n"
635                 "AESMC v0.16b, v0.16b \n"
636                 "AESE v0.16b, v11.16b \n"
637                 "AESMC v0.16b, v0.16b \n"
638                 "AESE v0.16b, v12.16b \n"
639                 "AESMC v0.16b, v0.16b \n"
640                 "AESE v0.16b, v13.16b \n"
641                 "AESMC v0.16b, v0.16b \n"
642                 "AESE v0.16b, v14.16b \n"
643                 "EOR v0.16b, v0.16b, v15.16b \n"
644                 "SUB w11, w11, #1     \n"
645                 "ST1 {v0.2d}, [%[out]], #16  \n"
646 
647                 "CBZ w11, 2f \n"
648                 "LD1 {v16.2d}, [%[input]], #16 \n"
649                 "B 1b \n"
650 
651                 "2: \n"
652                 "#store current counter value at the end \n"
653                 "ST1 {v0.2d}, %[regOut]   \n"
654 
655 
656                 :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in)
657                 :"0" (out), [Key] "m" (aes->key), [input] "2" (in),
658                  [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
659                 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
660                 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15",
661                 "v16"
662                 );
663                 break;
664 #endif /* WOLFSSL_AES_256 */
665             default:
666                 WOLFSSL_MSG("Bad AES-CBC round value");
667                 return BAD_FUNC_ARG;
668             }
669         }
670 
671         return 0;
672     }
673 
674     #ifdef HAVE_AES_DECRYPT
wc_AesCbcDecrypt(Aes * aes,byte * out,const byte * in,word32 sz)675     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
676     {
677         word32 numBlocks = sz / AES_BLOCK_SIZE;
678 
679         if (aes == NULL || out == NULL || in == NULL) {
680             return BAD_FUNC_ARG;
681         }
682 
683         if (sz == 0) {
684             return 0;
685         }
686 
687         if (sz % AES_BLOCK_SIZE) {
688 #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
689             return BAD_LENGTH_E;
690 #else
691             return BAD_FUNC_ARG;
692 #endif
693         }
694 
695         /* do as many block size ops as possible */
696         if (numBlocks > 0) {
697             word32* key = aes->key;
698             word32* reg = aes->reg;
699 
700             switch(aes->rounds) {
701 #ifdef WOLFSSL_AES_128
702             case 10: /* AES 128 BLOCK */
703                 __asm__ __volatile__ (
704                 "MOV w11, %w[blocks] \n"
705                 "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
706                 "LD1 {v5.2d-v8.2d}, [%[Key]], #64  \n"
707                 "LD1 {v9.2d-v11.2d},[%[Key]], #48  \n"
708                 "LD1 {v13.2d}, [%[reg]] \n"
709 
710                 "1:\n"
711                 "LD1 {v0.2d}, [%[input]], #16  \n"
712                 "MOV v12.16b, v0.16b \n"
713                 "AESD v0.16b, v1.16b   \n"
714                 "AESIMC v0.16b, v0.16b \n"
715                 "AESD v0.16b, v2.16b   \n"
716                 "AESIMC v0.16b, v0.16b \n"
717                 "AESD v0.16b, v3.16b   \n"
718                 "AESIMC v0.16b, v0.16b \n"
719                 "AESD v0.16b, v4.16b   \n"
720                 "AESIMC v0.16b, v0.16b \n"
721                 "AESD v0.16b, v5.16b   \n"
722                 "AESIMC v0.16b, v0.16b \n"
723                 "AESD v0.16b, v6.16b   \n"
724                 "AESIMC v0.16b, v0.16b \n"
725                 "AESD v0.16b, v7.16b   \n"
726                 "AESIMC v0.16b, v0.16b \n"
727                 "AESD v0.16b, v8.16b   \n"
728                 "AESIMC v0.16b, v0.16b \n"
729                 "AESD v0.16b, v9.16b   \n"
730                 "AESIMC v0.16b, v0.16b \n"
731                 "AESD v0.16b, v10.16b  \n"
732                 "EOR v0.16b, v0.16b, v11.16b \n"
733 
734                 "EOR v0.16b, v0.16b, v13.16b \n"
735                 "SUB w11, w11, #1            \n"
736                 "ST1 {v0.2d}, [%[out]], #16  \n"
737                 "MOV v13.16b, v12.16b        \n"
738 
739                 "CBZ w11, 2f \n"
740                 "B 1b      \n"
741 
742                 "2: \n"
743                 "#store current counter value at the end \n"
744                 "ST1 {v13.2d}, [%[regOut]] \n"
745 
746                 :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
747                 :"0" (out), [Key] "r" (key), [input] "2" (in),
748                  [blocks] "r" (numBlocks), [reg] "1" (reg)
749                 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
750                 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13"
751                 );
752                 break;
753 #endif /* WOLFSSL_AES_128 */
754 #ifdef WOLFSSL_AES_192
755             case 12: /* AES 192 BLOCK */
756                 __asm__ __volatile__ (
757                 "MOV w11, %w[blocks] \n"
758                 "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
759                 "LD1 {v5.2d-v8.2d}, [%[Key]], #64  \n"
760                 "LD1 {v9.2d-v12.2d},[%[Key]], #64  \n"
761                 "LD1 {v13.16b}, [%[Key]], #16 \n"
762                 "LD1 {v15.2d}, [%[reg]]       \n"
763 
764                 "LD1 {v0.2d}, [%[input]], #16  \n"
765                 "1:    \n"
766                 "MOV v14.16b, v0.16b   \n"
767                 "AESD v0.16b, v1.16b   \n"
768                 "AESIMC v0.16b, v0.16b \n"
769                 "AESD v0.16b, v2.16b   \n"
770                 "AESIMC v0.16b, v0.16b \n"
771                 "AESD v0.16b, v3.16b   \n"
772                 "AESIMC v0.16b, v0.16b \n"
773                 "AESD v0.16b, v4.16b   \n"
774                 "AESIMC v0.16b, v0.16b \n"
775                 "AESD v0.16b, v5.16b   \n"
776                 "AESIMC v0.16b, v0.16b \n"
777                 "AESD v0.16b, v6.16b   \n"
778                 "AESIMC v0.16b, v0.16b \n"
779                 "AESD v0.16b, v7.16b   \n"
780                 "AESIMC v0.16b, v0.16b \n"
781                 "AESD v0.16b, v8.16b   \n"
782                 "AESIMC v0.16b, v0.16b \n"
783                 "AESD v0.16b, v9.16b   \n"
784                 "AESIMC v0.16b, v0.16b \n"
785                 "AESD v0.16b, v10.16b  \n"
786                 "AESIMC v0.16b, v0.16b \n"
787                 "AESD v0.16b, v11.16b  \n"
788                 "AESIMC v0.16b, v0.16b \n"
789                 "AESD v0.16b, v12.16b  \n"
790                 "EOR v0.16b, v0.16b, v13.16b \n"
791 
792                 "EOR v0.16b, v0.16b, v15.16b \n"
793                 "SUB w11, w11, #1            \n"
794                 "ST1 {v0.2d}, [%[out]], #16  \n"
795                 "MOV v15.16b, v14.16b        \n"
796 
797                 "CBZ w11, 2f \n"
798                 "LD1 {v0.2d}, [%[input]], #16 \n"
799                 "B 1b \n"
800 
801                 "2:\n"
802                 "#store current counter value at the end \n"
803                 "ST1 {v15.2d}, [%[regOut]] \n"
804 
805                 :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
806                 :"0" (out), [Key] "r" (key), [input] "2" (in),
807                  [blocks] "r" (numBlocks), [reg] "1" (reg)
808                 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
809                 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15"
810                 );
811                 break;
812 #endif /* WOLFSSL_AES_192 */
813 #ifdef WOLFSSL_AES_256
814             case 14: /* AES 256 BLOCK */
815                 __asm__ __volatile__ (
816                 "MOV w11, %w[blocks] \n"
817                 "LD1 {v1.2d-v4.2d},   [%[Key]], #64  \n"
818                 "LD1 {v5.2d-v8.2d},   [%[Key]], #64  \n"
819                 "LD1 {v9.2d-v12.2d},  [%[Key]], #64  \n"
820                 "LD1 {v13.2d-v15.2d}, [%[Key]], #48  \n"
821                 "LD1 {v17.2d}, [%[reg]] \n"
822 
823                 "LD1 {v0.2d}, [%[input]], #16  \n"
824                 "1:    \n"
825                 "MOV v16.16b, v0.16b   \n"
826                 "AESD v0.16b, v1.16b   \n"
827                 "AESIMC v0.16b, v0.16b \n"
828                 "AESD v0.16b, v2.16b   \n"
829                 "AESIMC v0.16b, v0.16b \n"
830                 "AESD v0.16b, v3.16b   \n"
831                 "AESIMC v0.16b, v0.16b \n"
832                 "AESD v0.16b, v4.16b   \n"
833                 "AESIMC v0.16b, v0.16b \n"
834                 "AESD v0.16b, v5.16b   \n"
835                 "AESIMC v0.16b, v0.16b \n"
836                 "AESD v0.16b, v6.16b   \n"
837                 "AESIMC v0.16b, v0.16b \n"
838                 "AESD v0.16b, v7.16b   \n"
839                 "AESIMC v0.16b, v0.16b \n"
840                 "AESD v0.16b, v8.16b   \n"
841                 "AESIMC v0.16b, v0.16b \n"
842                 "AESD v0.16b, v9.16b   \n"
843                 "AESIMC v0.16b, v0.16b \n"
844                 "AESD v0.16b, v10.16b  \n"
845                 "AESIMC v0.16b, v0.16b \n"
846                 "AESD v0.16b, v11.16b  \n"
847                 "AESIMC v0.16b, v0.16b \n"
848                 "AESD v0.16b, v12.16b  \n"
849                 "AESIMC v0.16b, v0.16b \n"
850                 "AESD v0.16b, v13.16b  \n"
851                 "AESIMC v0.16b, v0.16b \n"
852                 "AESD v0.16b, v14.16b  \n"
853                 "EOR v0.16b, v0.16b, v15.16b \n"
854 
855                 "EOR v0.16b, v0.16b, v17.16b \n"
856                 "SUB w11, w11, #1            \n"
857                 "ST1 {v0.2d}, [%[out]], #16  \n"
858                 "MOV v17.16b, v16.16b        \n"
859 
860                 "CBZ w11, 2f \n"
861                 "LD1 {v0.2d}, [%[input]], #16  \n"
862                 "B 1b \n"
863 
864                 "2:\n"
865                 "#store current counter value at the end \n"
866                 "ST1 {v17.2d}, [%[regOut]]   \n"
867 
868                 :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
869                 :"0" (out), [Key] "r" (key), [input] "2" (in),
870                  [blocks] "r" (numBlocks), [reg] "1" (reg)
871                 : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
872                 "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15",
873                 "v16", "v17"
874                 );
875                 break;
876 #endif /* WOLFSSL_AES_256 */
877             default:
878                 WOLFSSL_MSG("Bad AES-CBC round value");
879                 return BAD_FUNC_ARG;
880             }
881         }
882 
883         return 0;
884     }
885     #endif
886 
887 #endif /* HAVE_AES_CBC */
888 
889 /* AES-CTR */
890 #ifdef WOLFSSL_AES_COUNTER
891 
892         /* Increment AES counter */
IncrementAesCounter(byte * inOutCtr)893         static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
894         {
895             int i;
896 
897             /* in network byte order so start at end and work back */
898             for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
899                 if (++inOutCtr[i])  /* we're done unless we overflow */
900                     return;
901             }
902         }
903 
wc_AesCtrEncrypt(Aes * aes,byte * out,const byte * in,word32 sz)904         int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
905         {
906             byte* tmp;
907             word32 numBlocks;
908 
909             if (aes == NULL || out == NULL || in == NULL) {
910                 return BAD_FUNC_ARG;
911             }
912 
913             tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
914 
915             /* consume any unused bytes left in aes->tmp */
916             while (aes->left && sz) {
917                *(out++) = *(in++) ^ *(tmp++);
918                aes->left--;
919                sz--;
920             }
921 
922             /* do as many block size ops as possible */
923             numBlocks = sz/AES_BLOCK_SIZE;
924             if (numBlocks > 0) {
925                 /* pointer needed because it is incremented when read, causing
926                  * an issue with call to encrypt/decrypt leftovers */
927                 byte*  keyPt  = (byte*)aes->key;
928                 sz           -= numBlocks * AES_BLOCK_SIZE;
929                 switch(aes->rounds) {
930 #ifdef WOLFSSL_AES_128
931                 case 10: /* AES 128 BLOCK */
932                     __asm__ __volatile__ (
933                     "MOV w11, %w[blocks] \n"
934                     "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
935 
936                     "#Create vector with the value 1  \n"
937                     "MOVI v15.16b, #1                 \n"
938                     "USHR v15.2d, v15.2d, #56         \n"
939                     "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
940                     "EOR v14.16b, v14.16b, v14.16b    \n"
941                     "EXT v14.16b, v15.16b, v14.16b, #8\n"
942 
943                     "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
944                     "LD1 {v13.2d}, %[reg]             \n"
945 
946                     /* double block */
947                     "1:      \n"
948                     "CMP w11, #1 \n"
949                     "BEQ 2f    \n"
950                     "CMP w11, #0 \n"
951                     "BEQ 3f    \n"
952 
953                     "MOV v0.16b, v13.16b  \n"
954                     "AESE v0.16b, v1.16b  \n"
955                     "AESMC v0.16b, v0.16b \n"
956                     "REV64 v13.16b, v13.16b \n" /* network order */
957                     "AESE v0.16b, v2.16b  \n"
958                     "AESMC v0.16b, v0.16b \n"
959                     "EXT v13.16b, v13.16b, v13.16b, #8 \n"
960                     "SUB w11, w11, #2     \n"
961                     "ADD v15.2d, v13.2d, v14.2d \n" /* add 1 to counter */
962                     "ADD v13.2d, v15.2d, v14.2d \n" /* add 1 to counter */
963 
964                     "AESE v0.16b, v3.16b  \n"
965                     "AESMC v0.16b, v0.16b \n"
966                     "EXT v15.16b, v15.16b, v15.16b, #8 \n"
967                     "EXT v13.16b, v13.16b, v13.16b, #8 \n"
968 
969                     "AESE v0.16b, v4.16b  \n"
970                     "AESMC v0.16b, v0.16b \n"
971                     "REV64 v15.16b, v15.16b \n" /* revert from network order */
972                     "REV64 v13.16b, v13.16b \n" /* revert from network order */
973 
974                     "AESE v0.16b, v5.16b  \n"
975                     "AESMC v0.16b, v0.16b \n"
976                     "AESE v15.16b, v1.16b  \n"
977                     "AESMC v15.16b, v15.16b \n"
978 
979                     "AESE v0.16b, v6.16b  \n"
980                     "AESMC v0.16b, v0.16b \n"
981                     "AESE v15.16b, v2.16b  \n"
982                     "AESMC v15.16b, v15.16b \n"
983 
984                     "AESE v0.16b, v7.16b  \n"
985                     "AESMC v0.16b, v0.16b \n"
986                     "AESE v15.16b, v3.16b  \n"
987                     "AESMC v15.16b, v15.16b \n"
988 
989                     "AESE v0.16b, v8.16b  \n"
990                     "AESMC v0.16b, v0.16b \n"
991                     "AESE v15.16b, v4.16b  \n"
992                     "AESMC v15.16b, v15.16b \n"
993 
994                     "AESE v0.16b, v9.16b  \n"
995                     "AESMC v0.16b, v0.16b \n"
996                     "AESE v15.16b, v5.16b  \n"
997                     "AESMC v15.16b, v15.16b \n"
998 
999                     "AESE v0.16b, v10.16b  \n"
1000                     "AESE v15.16b, v6.16b  \n"
1001                     "AESMC v15.16b, v15.16b \n"
1002 
1003                     "EOR v0.16b, v0.16b, v11.16b \n"
1004                     "AESE v15.16b, v7.16b  \n"
1005                     "AESMC v15.16b, v15.16b \n"
1006 
1007                     "LD1 {v12.2d}, [%[input]], #16  \n"
1008                     "AESE v15.16b, v8.16b  \n"
1009                     "AESMC v15.16b, v15.16b \n"
1010 
1011                     "EOR v0.16b, v0.16b, v12.16b \n"
1012                     "AESE v15.16b, v9.16b  \n"
1013                     "AESMC v15.16b, v15.16b \n"
1014 
1015                     "LD1 {v12.2d}, [%[input]], #16  \n"
1016                     "AESE v15.16b, v10.16b  \n"
1017                     "ST1 {v0.2d}, [%[out]], #16  \n"
1018                     "EOR v15.16b, v15.16b, v11.16b \n"
1019                     "EOR v15.16b, v15.16b, v12.16b \n"
1020                     "ST1 {v15.2d}, [%[out]], #16  \n"
1021 
1022                     "B 1b \n"
1023 
1024                     /* single block */
1025                     "2: \n"
1026                     "MOV v0.16b, v13.16b  \n"
1027                     "AESE v0.16b, v1.16b  \n"
1028                     "AESMC v0.16b, v0.16b \n"
1029                     "REV64 v13.16b, v13.16b \n" /* network order */
1030                     "AESE v0.16b, v2.16b  \n"
1031                     "AESMC v0.16b, v0.16b \n"
1032                     "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1033                     "AESE v0.16b, v3.16b  \n"
1034                     "AESMC v0.16b, v0.16b \n"
1035                     "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1036                     "AESE v0.16b, v4.16b  \n"
1037                     "AESMC v0.16b, v0.16b \n"
1038                     "SUB w11, w11, #1     \n"
1039                     "AESE v0.16b, v5.16b  \n"
1040                     "AESMC v0.16b, v0.16b \n"
1041                     "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1042                     "AESE v0.16b, v6.16b  \n"
1043                     "AESMC v0.16b, v0.16b \n"
1044                     "REV64 v13.16b, v13.16b \n" /* revert from network order */
1045                     "AESE v0.16b, v7.16b  \n"
1046                     "AESMC v0.16b, v0.16b \n"
1047                     "AESE v0.16b, v8.16b  \n"
1048                     "AESMC v0.16b, v0.16b \n"
1049                     "AESE v0.16b, v9.16b  \n"
1050                     "AESMC v0.16b, v0.16b \n"
1051                     "AESE v0.16b, v10.16b \n"
1052                     "EOR v0.16b, v0.16b, v11.16b \n"
1053                     "#CTR operations, increment counter and xorbuf \n"
1054                     "LD1 {v12.2d}, [%[input]], #16  \n"
1055                     "EOR v0.16b, v0.16b, v12.16b \n"
1056                     "ST1 {v0.2d}, [%[out]], #16  \n"
1057 
1058                     "3: \n"
1059                     "#store current counter value at the end \n"
1060                     "ST1 {v13.2d}, %[regOut]   \n"
1061 
1062                     :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg),
1063                      "=r" (in)
1064                     :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
1065                      [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
1066                     : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
1067                     "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15"
1068                     );
1069                     break;
1070 #endif /* WOLFSSL_AES_128 */
1071 #ifdef WOLFSSL_AES_192
1072                 case 12: /* AES 192 BLOCK */
1073                     __asm__ __volatile__ (
1074                     "MOV w11, %w[blocks]              \n"
1075                     "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1076 
1077                     "#Create vector with the value 1  \n"
1078                     "MOVI v16.16b, #1                 \n"
1079                     "USHR v16.2d, v16.2d, #56         \n"
1080                     "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1081                     "EOR v14.16b, v14.16b, v14.16b    \n"
1082                     "EXT v16.16b, v16.16b, v14.16b, #8\n"
1083 
1084                     "LD1 {v9.2d-v12.2d}, [%[Key]], #64\n"
1085                     "LD1 {v15.2d}, %[reg]             \n"
1086                     "LD1 {v13.16b}, [%[Key]], #16     \n"
1087 
1088                     /* double block */
1089                     "1:      \n"
1090                     "CMP w11, #1 \n"
1091                     "BEQ 2f    \n"
1092                     "CMP w11, #0 \n"
1093                     "BEQ 3f    \n"
1094 
1095                     "MOV v0.16b, v15.16b  \n"
1096                     "AESE v0.16b, v1.16b  \n"
1097                     "AESMC v0.16b, v0.16b \n"
1098                     "REV64 v15.16b, v15.16b \n" /* network order */
1099                     "AESE v0.16b, v2.16b  \n"
1100                     "AESMC v0.16b, v0.16b \n"
1101                     "EXT v15.16b, v15.16b, v15.16b, #8 \n"
1102                     "SUB w11, w11, #2     \n"
1103                     "ADD v17.2d, v15.2d, v16.2d \n" /* add 1 to counter */
1104                     "ADD v15.2d, v17.2d, v16.2d \n" /* add 1 to counter */
1105 
1106                     "AESE v0.16b, v3.16b  \n"
1107                     "AESMC v0.16b, v0.16b \n"
1108                     "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1109                     "EXT v15.16b, v15.16b, v15.16b, #8 \n"
1110 
1111                     "AESE v0.16b, v4.16b  \n"
1112                     "AESMC v0.16b, v0.16b \n"
1113                     "REV64 v17.16b, v17.16b \n" /* revert from network order */
1114                     "REV64 v15.16b, v15.16b \n" /* revert from network order */
1115 
1116                     "AESE v0.16b, v5.16b  \n"
1117                     "AESMC v0.16b, v0.16b \n"
1118                     "AESE v17.16b, v1.16b  \n"
1119                     "AESMC v17.16b, v17.16b \n"
1120 
1121                     "AESE v0.16b, v6.16b  \n"
1122                     "AESMC v0.16b, v0.16b \n"
1123                     "AESE v17.16b, v2.16b  \n"
1124                     "AESMC v17.16b, v17.16b \n"
1125 
1126                     "AESE v0.16b, v7.16b  \n"
1127                     "AESMC v0.16b, v0.16b \n"
1128                     "AESE v17.16b, v3.16b  \n"
1129                     "AESMC v17.16b, v17.16b \n"
1130 
1131                     "AESE v0.16b, v8.16b  \n"
1132                     "AESMC v0.16b, v0.16b \n"
1133                     "AESE v17.16b, v4.16b  \n"
1134                     "AESMC v17.16b, v17.16b \n"
1135 
1136                     "AESE v0.16b, v9.16b  \n"
1137                     "AESMC v0.16b, v0.16b \n"
1138                     "AESE v17.16b, v5.16b  \n"
1139                     "AESMC v17.16b, v17.16b \n"
1140 
1141                     "AESE v0.16b, v10.16b  \n"
1142                     "AESMC v0.16b, v0.16b \n"
1143                     "AESE v17.16b, v6.16b  \n"
1144                     "AESMC v17.16b, v17.16b \n"
1145 
1146                     "AESE v0.16b, v11.16b  \n"
1147                     "AESMC v0.16b, v0.16b \n"
1148                     "AESE v17.16b, v7.16b  \n"
1149                     "AESMC v17.16b, v17.16b \n"
1150 
1151                     "AESE v0.16b, v12.16b  \n"
1152                     "AESE v17.16b, v8.16b  \n"
1153                     "AESMC v17.16b, v17.16b \n"
1154 
1155                     "EOR v0.16b, v0.16b, v13.16b \n"
1156                     "AESE v17.16b, v9.16b  \n"
1157                     "AESMC v17.16b, v17.16b \n"
1158 
1159                     "LD1 {v14.2d}, [%[input]], #16  \n"
1160                     "AESE v17.16b, v10.16b  \n"
1161                     "AESMC v17.16b, v17.16b \n"
1162 
1163                     "EOR v0.16b, v0.16b, v14.16b \n"
1164                     "AESE v17.16b, v11.16b  \n"
1165                     "AESMC v17.16b, v17.16b \n"
1166 
1167                     "LD1 {v14.2d}, [%[input]], #16  \n"
1168                     "AESE v17.16b, v12.16b  \n"
1169                     "ST1 {v0.2d}, [%[out]], #16  \n"
1170                     "EOR v17.16b, v17.16b, v13.16b \n"
1171                     "EOR v17.16b, v17.16b, v14.16b \n"
1172                     "ST1 {v17.2d}, [%[out]], #16  \n"
1173 
1174                     "B 1b \n"
1175 
1176                     "2:      \n"
1177                     "LD1 {v14.2d}, [%[input]], #16    \n"
1178                     "MOV v0.16b, v15.16b  \n"
1179 
1180                     "AESE v0.16b, v1.16b  \n"
1181                     "AESMC v0.16b, v0.16b \n"
1182                     "REV64 v15.16b, v15.16b \n" /* network order */
1183                     "AESE v0.16b, v2.16b  \n"
1184                     "AESMC v0.16b, v0.16b \n"
1185                     "EXT v15.16b, v15.16b, v15.16b, #8 \n"
1186                     "AESE v0.16b, v3.16b  \n"
1187                     "AESMC v0.16b, v0.16b \n"
1188                     "ADD v15.2d, v15.2d, v16.2d \n" /* add 1 to counter */
1189                     "AESE v0.16b, v4.16b  \n"
1190                     "AESMC v0.16b, v0.16b \n"
1191                     "SUB w11, w11, #1     \n"
1192                     "AESE v0.16b, v5.16b  \n"
1193                     "AESMC v0.16b, v0.16b \n"
1194                     "EXT v15.16b, v15.16b, v15.16b, #8 \n"
1195                     "AESE v0.16b, v6.16b  \n"
1196                     "AESMC v0.16b, v0.16b \n"
1197                     "REV64 v15.16b, v15.16b \n" /* revert from network order */
1198                     "AESE v0.16b, v7.16b  \n"
1199                     "AESMC v0.16b, v0.16b \n"
1200                     "AESE v0.16b, v8.16b  \n"
1201                     "AESMC v0.16b, v0.16b \n"
1202                     "AESE v0.16b, v9.16b  \n"
1203                     "AESMC v0.16b, v0.16b \n"
1204                     "AESE v0.16b, v10.16b \n"
1205                     "AESMC v0.16b, v0.16b \n"
1206                     "AESE v0.16b, v11.16b \n"
1207                     "AESMC v0.16b, v0.16b \n"
1208                     "AESE v0.16b, v12.16b \n"
1209                     "EOR v0.16b, v0.16b, v13.16b \n"
1210                     "#CTR operations, increment counter and xorbuf \n"
1211                     "EOR v0.16b, v0.16b, v14.16b \n"
1212                     "ST1 {v0.2d}, [%[out]], #16  \n"
1213 
1214                     "3: \n"
1215                     "#store current counter value at the end \n"
1216                     "ST1 {v15.2d}, %[regOut] \n"
1217 
1218                     :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg),
1219                      "=r" (in)
1220                     :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
1221                      [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
1222                     : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
1223                     "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15",
1224                     "v16", "v17"
1225                     );
1226                     break;
1227 #endif /* WOLFSSL_AES_192 */
1228 #ifdef WOLFSSL_AES_256
1229                 case 14: /* AES 256 BLOCK */
1230                     __asm__ __volatile__ (
1231                     "MOV w11, %w[blocks] \n"
1232                     "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1233 
1234                     "#Create vector with the value 1  \n"
1235                     "MOVI v18.16b, #1                 \n"
1236                     "USHR v18.2d, v18.2d, #56         \n"
1237                     "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1238                     "EOR v19.16b, v19.16b, v19.16b    \n"
1239                     "EXT v18.16b, v18.16b, v19.16b, #8\n"
1240 
1241                     "LD1 {v9.2d-v12.2d}, [%[Key]], #64  \n"
1242                     "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n"
1243                     "LD1 {v17.2d}, %[reg]               \n"
1244 
1245                     /* double block */
1246                     "1:      \n"
1247                     "CMP w11, #1 \n"
1248                     "BEQ 2f    \n"
1249                     "CMP w11, #0 \n"
1250                     "BEQ 3f    \n"
1251 
1252                     "MOV v0.16b, v17.16b  \n"
1253                     "AESE v0.16b, v1.16b  \n"
1254                     "AESMC v0.16b, v0.16b \n"
1255                     "REV64 v17.16b, v17.16b \n" /* network order */
1256                     "AESE v0.16b, v2.16b  \n"
1257                     "AESMC v0.16b, v0.16b \n"
1258                     "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1259                     "SUB w11, w11, #2     \n"
1260                     "ADD v19.2d, v17.2d, v18.2d \n" /* add 1 to counter */
1261                     "ADD v17.2d, v19.2d, v18.2d \n" /* add 1 to counter */
1262 
1263                     "AESE v0.16b, v3.16b  \n"
1264                     "AESMC v0.16b, v0.16b \n"
1265                     "EXT v19.16b, v19.16b, v19.16b, #8 \n"
1266                     "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1267 
1268                     "AESE v0.16b, v4.16b  \n"
1269                     "AESMC v0.16b, v0.16b \n"
1270                     "REV64 v19.16b, v19.16b \n" /* revert from network order */
1271                     "REV64 v17.16b, v17.16b \n" /* revert from network order */
1272 
1273                     "AESE v0.16b, v5.16b  \n"
1274                     "AESMC v0.16b, v0.16b \n"
1275                     "AESE v19.16b, v1.16b  \n"
1276                     "AESMC v19.16b, v19.16b \n"
1277 
1278                     "AESE v0.16b, v6.16b  \n"
1279                     "AESMC v0.16b, v0.16b \n"
1280                     "AESE v19.16b, v2.16b  \n"
1281                     "AESMC v19.16b, v19.16b \n"
1282 
1283                     "AESE v0.16b, v7.16b  \n"
1284                     "AESMC v0.16b, v0.16b \n"
1285                     "AESE v19.16b, v3.16b  \n"
1286                     "AESMC v19.16b, v19.16b \n"
1287 
1288                     "AESE v0.16b, v8.16b  \n"
1289                     "AESMC v0.16b, v0.16b \n"
1290                     "AESE v19.16b, v4.16b  \n"
1291                     "AESMC v19.16b, v19.16b \n"
1292 
1293                     "AESE v0.16b, v9.16b  \n"
1294                     "AESMC v0.16b, v0.16b \n"
1295                     "AESE v19.16b, v5.16b  \n"
1296                     "AESMC v19.16b, v19.16b \n"
1297 
1298                     "AESE v0.16b, v10.16b  \n"
1299                     "AESMC v0.16b, v0.16b \n"
1300                     "AESE v19.16b, v6.16b  \n"
1301                     "AESMC v19.16b, v19.16b \n"
1302 
1303                     "AESE v0.16b, v11.16b  \n"
1304                     "AESMC v0.16b, v0.16b \n"
1305                     "AESE v19.16b, v7.16b  \n"
1306                     "AESMC v19.16b, v19.16b \n"
1307 
1308                     "AESE v0.16b, v12.16b  \n"
1309                     "AESMC v0.16b, v0.16b \n"
1310                     "AESE v19.16b, v8.16b  \n"
1311                     "AESMC v19.16b, v19.16b \n"
1312 
1313                     "AESE v0.16b, v13.16b  \n"
1314                     "AESMC v0.16b, v0.16b \n"
1315                     "AESE v19.16b, v9.16b  \n"
1316                     "AESMC v19.16b, v19.16b \n"
1317 
1318                     "AESE v0.16b, v14.16b  \n"
1319                     "AESE v19.16b, v10.16b  \n"
1320                     "AESMC v19.16b, v19.16b \n"
1321 
1322                     "EOR v0.16b, v0.16b, v15.16b \n"
1323                     "AESE v19.16b, v11.16b  \n"
1324                     "AESMC v19.16b, v19.16b \n"
1325 
1326                     "LD1 {v16.2d}, [%[input]], #16 \n"
1327                     "AESE v19.16b, v12.16b  \n"
1328                     "AESMC v19.16b, v19.16b \n"
1329 
1330                     "EOR v0.16b, v0.16b, v16.16b \n"
1331                     "AESE v19.16b, v13.16b  \n"
1332                     "AESMC v19.16b, v19.16b \n"
1333 
1334                     "LD1 {v16.2d}, [%[input]], #16 \n"
1335                     "AESE v19.16b, v14.16b  \n"
1336                     "ST1 {v0.2d}, [%[out]], #16  \n"
1337                     "EOR v19.16b, v19.16b, v15.16b \n"
1338                     "EOR v19.16b, v19.16b, v16.16b \n"
1339                     "ST1 {v19.2d}, [%[out]], #16  \n"
1340 
1341                     "B 1b \n"
1342 
1343                     "2:      \n"
1344                     "LD1 {v16.2d}, [%[input]], #16 \n"
1345                     "MOV v0.16b, v17.16b  \n"
1346                     "AESE v0.16b, v1.16b  \n"
1347                     "AESMC v0.16b, v0.16b \n"
1348                     "REV64 v17.16b, v17.16b \n" /* network order */
1349                     "AESE v0.16b, v2.16b  \n"
1350                     "AESMC v0.16b, v0.16b \n"
1351                     "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1352                     "AESE v0.16b, v3.16b  \n"
1353                     "AESMC v0.16b, v0.16b \n"
1354                     "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */
1355                     "AESE v0.16b, v4.16b  \n"
1356                     "AESMC v0.16b, v0.16b \n"
1357                     "AESE v0.16b, v5.16b  \n"
1358                     "AESMC v0.16b, v0.16b \n"
1359                     "EXT v17.16b, v17.16b, v17.16b, #8 \n"
1360                     "AESE v0.16b, v6.16b  \n"
1361                     "AESMC v0.16b, v0.16b \n"
1362                     "REV64 v17.16b, v17.16b \n" /* revert from network order */
1363                     "AESE v0.16b, v7.16b  \n"
1364                     "AESMC v0.16b, v0.16b \n"
1365                     "AESE v0.16b, v8.16b  \n"
1366                     "AESMC v0.16b, v0.16b \n"
1367                     "AESE v0.16b, v9.16b  \n"
1368                     "AESMC v0.16b, v0.16b \n"
1369                     "AESE v0.16b, v10.16b \n"
1370                     "AESMC v0.16b, v0.16b \n"
1371                     "AESE v0.16b, v11.16b \n"
1372                     "AESMC v0.16b, v0.16b \n"
1373                     "AESE v0.16b, v12.16b \n"
1374                     "AESMC v0.16b, v0.16b \n"
1375                     "AESE v0.16b, v13.16b \n"
1376                     "AESMC v0.16b, v0.16b \n"
1377                     "AESE v0.16b, v14.16b \n"
1378                     "EOR v0.16b, v0.16b, v15.16b \n"
1379                     "#CTR operations, increment counter and xorbuf \n"
1380                     "EOR v0.16b, v0.16b, v16.16b \n"
1381                     "ST1 {v0.2d}, [%[out]], #16 \n"
1382 
1383                     "3: \n"
1384                     "#store current counter value at the end \n"
1385                     "ST1 {v17.2d}, %[regOut] \n"
1386 
1387 
1388                     :[out] "=r" (out), "=r" (keyPt), [regOut] "=m" (aes->reg),
1389                      "=r" (in)
1390                     :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
1391                      [blocks] "r" (numBlocks), [reg] "m" (aes->reg)
1392                     : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
1393                     "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14","v15",
1394                     "v16", "v17", "v18", "v19"
1395                     );
1396                     break;
1397 #endif /* WOLFSSL_AES_256 */
1398                 default:
1399                     WOLFSSL_MSG("Bad AES-CTR round value");
1400                     return BAD_FUNC_ARG;
1401                 }
1402 
1403                 aes->left = 0;
1404             }
1405 
1406             /* handle non block size remaining */
1407             if (sz) {
1408                 wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp);
1409                 IncrementAesCounter((byte*)aes->reg);
1410 
1411                 aes->left = AES_BLOCK_SIZE;
1412                 tmp = (byte*)aes->tmp;
1413 
1414                 while (sz--) {
1415                     *(out++) = *(in++) ^ *(tmp++);
1416                     aes->left--;
1417                 }
1418             }
1419             return 0;
1420         }
1421 
1422 #endif /* WOLFSSL_AES_COUNTER */
1423 
1424 #ifdef HAVE_AESGCM
1425 
1426 /*
1427  * Based from GCM implementation in wolfcrypt/src/aes.c
1428  */
1429 
1430 /* PMULL and RBIT only with AArch64 */
1431 /* Use ARM hardware for polynomial multiply */
GMULT(byte * X,byte * Y)1432 static void GMULT(byte* X, byte* Y)
1433 {
1434     __asm__ volatile (
1435         "LD1 {v0.16b}, [%[inX]] \n"
1436         "LD1 {v1.16b}, [%[inY]] \n" /* v1 already reflected from set key */
1437         "RBIT v0.16b, v0.16b \n"
1438 
1439 
1440         /* Algorithm 1 from Intel GCM white paper.
1441            "Carry-Less Multiplication and Its Usage for Computing the GCM Mode"
1442          */
1443         "PMULL  v3.1q, v0.1d, v1.1d \n"     /* a0 * b0 = C */
1444         "PMULL2 v4.1q, v0.2d, v1.2d \n"     /* a1 * b1 = D */
1445         "EXT v5.16b, v1.16b, v1.16b, #8 \n" /* b0b1 -> b1b0 */
1446         "PMULL  v6.1q, v0.1d, v5.1d \n"     /* a0 * b1 = E */
1447         "PMULL2 v5.1q, v0.2d, v5.2d \n"     /* a1 * b0 = F */
1448 
1449         "#Set a register to all 0s using EOR \n"
1450         "EOR v7.16b, v7.16b, v7.16b \n"
1451         "EOR v5.16b, v5.16b, v6.16b \n"     /* F ^ E */
1452         "EXT v6.16b, v7.16b, v5.16b, #8 \n" /* get (F^E)[0] */
1453         "EOR v3.16b, v3.16b, v6.16b \n"     /* low 128 bits in v3 */
1454         "EXT v6.16b, v5.16b, v7.16b, #8 \n" /* get (F^E)[1] */
1455         "EOR v4.16b, v4.16b, v6.16b \n"     /* high 128 bits in v4 */
1456 
1457 
1458         /* Based from White Paper "Implementing GCM on ARMv8"
1459            by Conrado P.L. Gouvea and Julio Lopez
1460            reduction on 256bit value using Algorithm 5 */
1461         "MOVI v8.16b, #0x87 \n"
1462         "USHR v8.2d, v8.2d, #56 \n"
1463         /* v8 is now 0x00000000000000870000000000000087 reflected 0xe1....*/
1464         "PMULL2 v5.1q, v4.2d, v8.2d \n"
1465         "EXT v6.16b, v5.16b, v7.16b, #8 \n" /* v7 is all 0's */
1466         "EOR v4.16b, v4.16b, v6.16b \n"
1467         "EXT v6.16b, v7.16b, v5.16b, #8 \n"
1468         "EOR v3.16b, v3.16b, v6.16b \n"
1469         "PMULL v5.1q, v4.1d, v8.1d  \n"
1470         "EOR v4.16b, v3.16b, v5.16b \n"
1471 
1472         "RBIT v4.16b, v4.16b \n"
1473         "STR q4, [%[out]] \n"
1474         : [out] "=r" (X), "=r" (Y)
1475         : [inX] "0" (X), [inY] "1" (Y)
1476         : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8"
1477     );
1478 }
1479 
1480 
GHASH(Aes * aes,const byte * a,word32 aSz,const byte * c,word32 cSz,byte * s,word32 sSz)1481 void GHASH(Aes* aes, const byte* a, word32 aSz,
1482                                 const byte* c, word32 cSz, byte* s, word32 sSz)
1483 {
1484     byte x[AES_BLOCK_SIZE];
1485     byte scratch[AES_BLOCK_SIZE];
1486     word32 blocks, partial;
1487     byte* h = aes->H;
1488 
1489     XMEMSET(x, 0, AES_BLOCK_SIZE);
1490 
1491     /* Hash in A, the Additional Authentication Data */
1492     if (aSz != 0 && a != NULL) {
1493         blocks = aSz / AES_BLOCK_SIZE;
1494         partial = aSz % AES_BLOCK_SIZE;
1495         /* do as many blocks as possible */
1496         while (blocks--) {
1497             xorbuf(x, a, AES_BLOCK_SIZE);
1498             GMULT(x, h);
1499             a += AES_BLOCK_SIZE;
1500         }
1501         if (partial != 0) {
1502             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1503             XMEMCPY(scratch, a, partial);
1504             xorbuf(x, scratch, AES_BLOCK_SIZE);
1505             GMULT(x, h);
1506         }
1507     }
1508 
1509     /* Hash in C, the Ciphertext */
1510     if (cSz != 0 && c != NULL) {
1511         blocks = cSz / AES_BLOCK_SIZE;
1512         partial = cSz % AES_BLOCK_SIZE;
1513         while (blocks--) {
1514             xorbuf(x, c, AES_BLOCK_SIZE);
1515             GMULT(x, h);
1516             c += AES_BLOCK_SIZE;
1517         }
1518         if (partial != 0) {
1519             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1520             XMEMCPY(scratch, c, partial);
1521             xorbuf(x, scratch, AES_BLOCK_SIZE);
1522             GMULT(x, h);
1523         }
1524     }
1525 
1526     /* Hash in the lengths of A and C in bits */
1527     FlattenSzInBits(&scratch[0], aSz);
1528     FlattenSzInBits(&scratch[8], cSz);
1529     xorbuf(x, scratch, AES_BLOCK_SIZE);
1530 
1531     /* Copy the result (minus last GMULT) into s. */
1532     XMEMCPY(s, x, sSz);
1533 }
1534 
1535 
1536 #ifdef WOLFSSL_AES_128
1537 /* internal function : see wc_AesGcmEncrypt */
Aes128GcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)1538 static int Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
1539                    const byte* iv, word32 ivSz,
1540                    byte* authTag, word32 authTagSz,
1541                    const byte* authIn, word32 authInSz)
1542 {
1543     word32 blocks;
1544     word32 partial;
1545     byte counter[AES_BLOCK_SIZE];
1546     byte initialCounter[AES_BLOCK_SIZE];
1547     byte x[AES_BLOCK_SIZE];
1548     byte scratch[AES_BLOCK_SIZE];
1549 
1550     /* Noticed different optimization levels treated head of array different.
1551        Some cases was stack pointer plus offset others was a regester containing
1552        address. To make uniform for passing in to inline assembly code am using
1553        pointers to the head of each local array.
1554      */
1555     byte* ctr  = counter;
1556     byte* iCtr = initialCounter;
1557     byte* xPt  = x;
1558     byte* sPt  = scratch;
1559     byte* keyPt; /* pointer to handle pointer advencment */
1560 
1561     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
1562     if (ivSz == GCM_NONCE_MID_SZ) {
1563         XMEMCPY(initialCounter, iv, ivSz);
1564         initialCounter[AES_BLOCK_SIZE - 1] = 1;
1565     }
1566     else {
1567         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
1568         GMULT(initialCounter, aes->H);
1569     }
1570     XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE);
1571 
1572 
1573     /* Hash in the Additional Authentication Data */
1574     XMEMSET(x, 0, AES_BLOCK_SIZE);
1575     if (authInSz != 0 && authIn != NULL) {
1576         blocks = authInSz / AES_BLOCK_SIZE;
1577         partial = authInSz % AES_BLOCK_SIZE;
1578         /* do as many blocks as possible */
1579         while (blocks--) {
1580             xorbuf(x, authIn, AES_BLOCK_SIZE);
1581             GMULT(x, aes->H);
1582             authIn += AES_BLOCK_SIZE;
1583         }
1584         if (partial != 0) {
1585             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1586             XMEMCPY(scratch, authIn, partial);
1587             xorbuf(x, scratch, AES_BLOCK_SIZE);
1588             GMULT(x, aes->H);
1589         }
1590     }
1591 
1592     /* do as many blocks as possible */
1593     blocks = sz / AES_BLOCK_SIZE;
1594     partial = sz % AES_BLOCK_SIZE;
1595     if (blocks > 0) {
1596         keyPt  = (byte*)aes->key;
1597         __asm__ __volatile__ (
1598             "MOV w11, %w[blocks] \n"
1599             "LD1 {v13.2d}, [%[ctr]] \n"
1600 
1601             "#Create vector with the value 1  \n"
1602             "MOVI v14.16b, #1                 \n"
1603             "USHR v14.2d, v14.2d, #56         \n"
1604             "EOR v22.16b, v22.16b, v22.16b    \n"
1605             "EXT v14.16b, v14.16b, v22.16b, #8\n"
1606 
1607 
1608             /***************************************************
1609                Get first out block for GHASH using AES encrypt
1610              ***************************************************/
1611             "REV64 v13.16b, v13.16b \n" /* network order */
1612             "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1613             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1614             "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1615             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1616             "REV64 v13.16b, v13.16b \n" /* revert from network order */
1617             "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1618             "MOV v0.16b, v13.16b  \n"
1619             "AESE v0.16b, v1.16b  \n"
1620             "AESMC v0.16b, v0.16b \n"
1621             "AESE v0.16b, v2.16b  \n"
1622             "AESMC v0.16b, v0.16b \n"
1623             "AESE v0.16b, v3.16b  \n"
1624             "AESMC v0.16b, v0.16b \n"
1625             "LD1 {v16.2d}, %[inY] \n"
1626             "AESE v0.16b, v4.16b  \n"
1627             "AESMC v0.16b, v0.16b \n"
1628             "SUB w11, w11, #1     \n"
1629             "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
1630             "AESE v0.16b, v5.16b  \n"
1631             "AESMC v0.16b, v0.16b \n"
1632             "MOVI v23.16b, #0x87 \n"
1633             "AESE v0.16b, v6.16b  \n"
1634             "AESMC v0.16b, v0.16b \n"
1635             "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */
1636             "AESE v0.16b, v7.16b  \n"
1637             "AESMC v0.16b, v0.16b \n"
1638             "USHR v23.2d, v23.2d, #56 \n"
1639             "AESE v0.16b, v8.16b  \n"
1640             "AESMC v0.16b, v0.16b \n"
1641             "LD1 {v12.2d}, [%[input]], #16 \n"
1642             "AESE v0.16b, v9.16b  \n"
1643             "AESMC v0.16b, v0.16b \n"
1644             "AESE v0.16b, v10.16b \n"
1645             "EOR v0.16b, v0.16b, v11.16b \n"
1646 
1647             "EOR v0.16b, v0.16b, v12.16b \n"
1648             "ST1 {v0.2d}, [%[out]], #16  \n"
1649             "MOV v15.16b, v0.16b \n"
1650 
1651             "CBZ w11, 1f \n" /* only one block jump to final GHASH */
1652 
1653             "LD1 {v12.2d}, [%[input]], #16 \n"
1654 
1655             /***************************************************
1656                Interweave GHASH and encrypt if more then 1 block
1657              ***************************************************/
1658             "2: \n"
1659             "REV64 v13.16b, v13.16b \n" /* network order */
1660             "EOR v15.16b, v17.16b, v15.16b \n"
1661             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1662             "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1663             "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */
1664             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1665             "REV64 v13.16b, v13.16b \n" /* revert from network order */
1666             "PMULL  v18.1q, v15.1d, v16.1d \n"     /* a0 * b0 = C */
1667             "MOV v0.16b, v13.16b  \n"
1668             "PMULL2 v19.1q, v15.2d, v16.2d \n"     /* a1 * b1 = D */
1669             "AESE v0.16b, v1.16b  \n"
1670             "AESMC v0.16b, v0.16b \n"
1671             "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
1672             "AESE v0.16b, v2.16b  \n"
1673             "AESMC v0.16b, v0.16b \n"
1674             "PMULL  v21.1q, v15.1d, v20.1d \n"     /* a0 * b1 = E */
1675             "PMULL2 v20.1q, v15.2d, v20.2d \n"     /* a1 * b0 = F */
1676             "AESE v0.16b, v3.16b  \n"
1677             "AESMC v0.16b, v0.16b \n"
1678             "EOR v20.16b, v20.16b, v21.16b \n"     /* F ^ E */
1679             "AESE v0.16b, v4.16b  \n"
1680             "AESMC v0.16b, v0.16b \n"
1681             "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
1682             "SUB w11, w11, #1     \n"
1683             "AESE v0.16b, v5.16b  \n"
1684             "AESMC v0.16b, v0.16b \n"
1685             "EOR v18.16b, v18.16b, v21.16b \n"     /* low 128 bits in v3 */
1686             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
1687             "AESE v0.16b, v6.16b  \n"
1688             "AESMC v0.16b, v0.16b \n"
1689             "EOR v19.16b, v19.16b, v21.16b \n"     /* high 128 bits in v4 */
1690             "AESE v0.16b, v7.16b  \n"
1691             "AESMC v0.16b, v0.16b \n"
1692             "PMULL2 v20.1q, v19.2d, v23.2d \n"
1693             "AESE v0.16b, v8.16b  \n"
1694             "AESMC v0.16b, v0.16b \n"
1695             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
1696             "AESE v0.16b, v9.16b  \n"
1697             "AESMC v0.16b, v0.16b \n"
1698             "EOR v19.16b, v19.16b, v21.16b \n"
1699             "AESE v0.16b, v10.16b \n"
1700             "EXT v21.16b, v22.16b, v20.16b, #8 \n"
1701             "EOR v0.16b, v0.16b, v11.16b \n"
1702             "EOR v18.16b, v18.16b, v21.16b \n"
1703 
1704             "EOR v0.16b, v0.16b, v12.16b \n"
1705             "PMULL v20.1q, v19.1d, v23.1d  \n"
1706             "ST1 {v0.2d}, [%[out]], #16  \n"
1707             "EOR v19.16b, v18.16b, v20.16b \n"
1708             "MOV v15.16b, v0.16b \n"
1709             "RBIT v17.16b, v19.16b \n"
1710 
1711             "CBZ w11, 1f \n"
1712             "LD1 {v12.2d}, [%[input]], #16 \n"
1713             "B 2b \n"
1714 
1715             /***************************************************
1716                GHASH on last block
1717              ***************************************************/
1718             "1: \n"
1719             "EOR v15.16b, v17.16b, v15.16b \n"
1720             "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */
1721 
1722             "#store current AES counter value \n"
1723             "ST1 {v13.2d}, [%[ctrOut]] \n"
1724             "PMULL  v18.1q, v15.1d, v16.1d \n"     /* a0 * b0 = C */
1725             "PMULL2 v19.1q, v15.2d, v16.2d \n"     /* a1 * b1 = D */
1726             "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
1727             "PMULL  v21.1q, v15.1d, v20.1d \n"     /* a0 * b1 = E */
1728             "PMULL2 v20.1q, v15.2d, v20.2d \n"     /* a1 * b0 = F */
1729             "EOR v20.16b, v20.16b, v21.16b \n"     /* F ^ E */
1730             "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
1731             "EOR v18.16b, v18.16b, v21.16b \n"     /* low 128 bits in v3 */
1732             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
1733             "EOR v19.16b, v19.16b, v21.16b \n"     /* high 128 bits in v4 */
1734 
1735             "#Reduce product from multiplication \n"
1736             "PMULL2 v20.1q, v19.2d, v23.2d \n"
1737             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
1738             "EOR v19.16b, v19.16b, v21.16b \n"
1739             "EXT v21.16b, v22.16b, v20.16b, #8 \n"
1740             "EOR v18.16b, v18.16b, v21.16b \n"
1741             "PMULL v20.1q, v19.1d, v23.1d  \n"
1742             "EOR v19.16b, v18.16b, v20.16b \n"
1743             "RBIT v17.16b, v19.16b \n"
1744             "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */
1745 
1746             :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in)
1747             ,[xOut] "=r" (xPt),"=m" (aes->H)
1748             :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
1749              [input] "3" (in)
1750             ,[inX] "4" (xPt), [inY] "m" (aes->H)
1751             : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
1752             "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
1753             ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24"
1754         );
1755     }
1756 
1757     /* take care of partial block sizes leftover */
1758     if (partial != 0) {
1759         IncrementGcmCounter(counter);
1760         wc_AesEncrypt(aes, counter, scratch);
1761         xorbuf(scratch, in, partial);
1762         XMEMCPY(out, scratch, partial);
1763 
1764         XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1765         XMEMCPY(scratch, out, partial);
1766         xorbuf(x, scratch, AES_BLOCK_SIZE);
1767         GMULT(x, aes->H);
1768     }
1769 
1770     /* Hash in the lengths of A and C in bits */
1771     XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1772     FlattenSzInBits(&scratch[0], authInSz);
1773     FlattenSzInBits(&scratch[8], sz);
1774     xorbuf(x, scratch, AES_BLOCK_SIZE);
1775     XMEMCPY(scratch, x, AES_BLOCK_SIZE);
1776 
1777     keyPt  = (byte*)aes->key;
1778     __asm__ __volatile__ (
1779 
1780         "LD1 {v16.16b}, [%[tag]] \n"
1781         "LD1 {v17.16b}, %[h] \n"
1782         "RBIT v16.16b, v16.16b \n"
1783 
1784         "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1785         "PMULL  v18.1q, v16.1d, v17.1d \n"     /* a0 * b0 = C */
1786         "PMULL2 v19.1q, v16.2d, v17.2d \n"     /* a1 * b1 = D */
1787         "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1788         "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */
1789         "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
1790         "PMULL  v21.1q, v16.1d, v20.1d \n"     /* a0 * b1 = E */
1791         "PMULL2 v20.1q, v16.2d, v20.2d \n"     /* a1 * b0 = F */
1792         "LD1 {v0.2d}, [%[ctr]]             \n"
1793 
1794         "#Set a register to all 0s using EOR \n"
1795         "EOR v22.16b, v22.16b, v22.16b \n"
1796         "EOR v20.16b, v20.16b, v21.16b \n"     /* F ^ E */
1797         "AESE v0.16b, v1.16b  \n"
1798         "AESMC v0.16b, v0.16b \n"
1799         "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
1800         "AESE v0.16b, v2.16b  \n"
1801         "AESMC v0.16b, v0.16b \n"
1802         "EOR v18.16b, v18.16b, v21.16b \n"     /* low 128 bits in v3 */
1803         "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
1804         "AESE v0.16b, v3.16b  \n"
1805         "AESMC v0.16b, v0.16b \n"
1806         "EOR v19.16b, v19.16b, v21.16b \n"     /* high 128 bits in v4 */
1807         "MOVI v23.16b, #0x87 \n"
1808         "AESE v0.16b, v4.16b  \n"
1809         "AESMC v0.16b, v0.16b \n"
1810         "USHR v23.2d, v23.2d, #56 \n"
1811         "PMULL2 v20.1q, v19.2d, v23.2d \n"
1812         "AESE v0.16b, v5.16b  \n"
1813         "AESMC v0.16b, v0.16b \n"
1814         "EXT v21.16b, v20.16b, v22.16b, #8 \n"
1815         "AESE v0.16b, v6.16b  \n"
1816         "AESMC v0.16b, v0.16b \n"
1817         "EOR v19.16b, v19.16b, v21.16b \n"
1818         "AESE v0.16b, v7.16b  \n"
1819         "AESMC v0.16b, v0.16b \n"
1820         "EXT v21.16b, v22.16b, v20.16b, #8 \n"
1821         "AESE v0.16b, v8.16b  \n"
1822         "AESMC v0.16b, v0.16b \n"
1823         "EOR v18.16b, v18.16b, v21.16b \n"
1824         "AESE v0.16b, v9.16b  \n"
1825         "AESMC v0.16b, v0.16b \n"
1826         "PMULL v20.1q, v19.1d, v23.1d  \n"
1827         "EOR v19.16b, v18.16b, v20.16b \n"
1828         "AESE v0.16b, v10.16b \n"
1829         "RBIT v19.16b, v19.16b \n"
1830         "EOR v0.16b, v0.16b, v11.16b \n"
1831         "EOR v19.16b, v19.16b, v0.16b \n"
1832         "STR q19, [%[out]] \n"
1833 
1834         :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr)
1835         :[tag] "0" (sPt), [Key] "1" (keyPt),
1836         [ctr] "2" (iCtr) , [h] "m" (aes->H)
1837         : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5",
1838         "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14",
1839         "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23","v24"
1840     );
1841 
1842 
1843     if (authTagSz > AES_BLOCK_SIZE) {
1844         XMEMCPY(authTag, scratch, AES_BLOCK_SIZE);
1845     }
1846     else {
1847         /* authTagSz can be smaller than AES_BLOCK_SIZE */
1848         XMEMCPY(authTag, scratch, authTagSz);
1849     }
1850     return 0;
1851 }
1852 #endif /* WOLFSSL_AES_128 */
1853 
1854 #ifdef WOLFSSL_AES_192
1855 /* internal function : see wc_AesGcmEncrypt */
Aes192GcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)1856 static int Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
1857                    const byte* iv, word32 ivSz,
1858                    byte* authTag, word32 authTagSz,
1859                    const byte* authIn, word32 authInSz)
1860 {
1861     word32 blocks;
1862     word32 partial;
1863     byte counter[AES_BLOCK_SIZE];
1864     byte initialCounter[AES_BLOCK_SIZE];
1865     byte x[AES_BLOCK_SIZE];
1866     byte scratch[AES_BLOCK_SIZE];
1867 
1868     /* Noticed different optimization levels treated head of array different.
1869        Some cases was stack pointer plus offset others was a regester containing
1870        address. To make uniform for passing in to inline assembly code am using
1871        pointers to the head of each local array.
1872      */
1873     byte* ctr  = counter;
1874     byte* iCtr = initialCounter;
1875     byte* xPt  = x;
1876     byte* sPt  = scratch;
1877     byte* keyPt; /* pointer to handle pointer advencment */
1878 
1879     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
1880     if (ivSz == GCM_NONCE_MID_SZ) {
1881         XMEMCPY(initialCounter, iv, ivSz);
1882         initialCounter[AES_BLOCK_SIZE - 1] = 1;
1883     }
1884     else {
1885         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
1886         GMULT(initialCounter, aes->H);
1887     }
1888     XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE);
1889 
1890 
1891     /* Hash in the Additional Authentication Data */
1892     XMEMSET(x, 0, AES_BLOCK_SIZE);
1893     if (authInSz != 0 && authIn != NULL) {
1894         blocks = authInSz / AES_BLOCK_SIZE;
1895         partial = authInSz % AES_BLOCK_SIZE;
1896         /* do as many blocks as possible */
1897         while (blocks--) {
1898             xorbuf(x, authIn, AES_BLOCK_SIZE);
1899             GMULT(x, aes->H);
1900             authIn += AES_BLOCK_SIZE;
1901         }
1902         if (partial != 0) {
1903             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
1904             XMEMCPY(scratch, authIn, partial);
1905             xorbuf(x, scratch, AES_BLOCK_SIZE);
1906             GMULT(x, aes->H);
1907         }
1908     }
1909 
1910     /* do as many blocks as possible */
1911     blocks = sz / AES_BLOCK_SIZE;
1912     partial = sz % AES_BLOCK_SIZE;
1913     if (blocks > 0) {
1914         keyPt  = (byte*)aes->key;
1915         __asm__ __volatile__ (
1916             "MOV w11, %w[blocks] \n"
1917             "LD1 {v13.2d}, [%[ctr]] \n"
1918 
1919             "#Create vector with the value 1  \n"
1920             "MOVI v14.16b, #1                 \n"
1921             "USHR v14.2d, v14.2d, #56         \n"
1922             "EOR v22.16b, v22.16b, v22.16b    \n"
1923             "EXT v14.16b, v14.16b, v22.16b, #8\n"
1924 
1925 
1926             /***************************************************
1927                Get first out block for GHASH using AES encrypt
1928              ***************************************************/
1929             "REV64 v13.16b, v13.16b \n" /* network order */
1930             "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
1931             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1932             "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1933             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1934             "REV64 v13.16b, v13.16b \n" /* revert from network order */
1935             "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
1936             "MOV v0.16b, v13.16b  \n"
1937             "AESE v0.16b, v1.16b  \n"
1938             "AESMC v0.16b, v0.16b \n"
1939             "AESE v0.16b, v2.16b  \n"
1940             "AESMC v0.16b, v0.16b \n"
1941             "AESE v0.16b, v3.16b  \n"
1942             "AESMC v0.16b, v0.16b \n"
1943             "LD1 {v16.2d}, %[inY] \n"
1944             "AESE v0.16b, v4.16b  \n"
1945             "AESMC v0.16b, v0.16b \n"
1946             "SUB w11, w11, #1     \n"
1947             "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
1948             "LD1 {v30.2d-v31.2d}, [%[Key]], #32\n"
1949             "AESE v0.16b, v5.16b  \n"
1950             "AESMC v0.16b, v0.16b \n"
1951             "MOVI v23.16b, #0x87 \n"
1952             "AESE v0.16b, v6.16b  \n"
1953             "AESMC v0.16b, v0.16b \n"
1954             "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */
1955             "AESE v0.16b, v7.16b  \n"
1956             "AESMC v0.16b, v0.16b \n"
1957             "USHR v23.2d, v23.2d, #56 \n"
1958             "AESE v0.16b, v8.16b  \n"
1959             "AESMC v0.16b, v0.16b \n"
1960             "LD1 {v12.2d}, [%[input]], #16 \n"
1961             "AESE v0.16b, v9.16b  \n"
1962             "AESMC v0.16b, v0.16b \n"
1963             "AESE v0.16b, v10.16b  \n"
1964             "AESMC v0.16b, v0.16b \n"
1965             "AESE v0.16b, v11.16b  \n"
1966             "AESMC v0.16b, v0.16b \n"
1967             "AESE v0.16b, v30.16b \n"
1968             "EOR v0.16b, v0.16b, v31.16b \n"
1969 
1970             "EOR v0.16b, v0.16b, v12.16b \n"
1971             "ST1 {v0.2d}, [%[out]], #16  \n"
1972             "MOV v15.16b, v0.16b \n"
1973 
1974             "CBZ w11, 1f \n" /* only one block jump to final GHASH */
1975             "LD1 {v12.2d}, [%[input]], #16 \n"
1976 
1977             /***************************************************
1978                Interweave GHASH and encrypt if more then 1 block
1979              ***************************************************/
1980             "2: \n"
1981             "REV64 v13.16b, v13.16b \n" /* network order */
1982             "EOR v15.16b, v17.16b, v15.16b \n"
1983             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1984             "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
1985             "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */
1986             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
1987             "REV64 v13.16b, v13.16b \n" /* revert from network order */
1988             "PMULL  v18.1q, v15.1d, v16.1d \n"     /* a0 * b0 = C */
1989             "MOV v0.16b, v13.16b  \n"
1990             "PMULL2 v19.1q, v15.2d, v16.2d \n"     /* a1 * b1 = D */
1991             "AESE v0.16b, v1.16b  \n"
1992             "AESMC v0.16b, v0.16b \n"
1993             "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
1994             "AESE v0.16b, v2.16b  \n"
1995             "AESMC v0.16b, v0.16b \n"
1996             "PMULL  v21.1q, v15.1d, v20.1d \n"     /* a0 * b1 = E */
1997             "PMULL2 v20.1q, v15.2d, v20.2d \n"     /* a1 * b0 = F */
1998             "AESE v0.16b, v3.16b  \n"
1999             "AESMC v0.16b, v0.16b \n"
2000             "EOR v20.16b, v20.16b, v21.16b \n"     /* F ^ E */
2001             "AESE v0.16b, v4.16b  \n"
2002             "AESMC v0.16b, v0.16b \n"
2003             "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2004             "SUB w11, w11, #1     \n"
2005             "AESE v0.16b, v5.16b  \n"
2006             "AESMC v0.16b, v0.16b \n"
2007             "EOR v18.16b, v18.16b, v21.16b \n"     /* low 128 bits in v3 */
2008             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2009             "AESE v0.16b, v6.16b  \n"
2010             "AESMC v0.16b, v0.16b \n"
2011             "EOR v19.16b, v19.16b, v21.16b \n"     /* high 128 bits in v4 */
2012             "AESE v0.16b, v7.16b  \n"
2013             "AESMC v0.16b, v0.16b \n"
2014             "PMULL2 v20.1q, v19.2d, v23.2d \n"
2015             "AESE v0.16b, v8.16b  \n"
2016             "AESMC v0.16b, v0.16b \n"
2017             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
2018             "AESE v0.16b, v9.16b  \n"
2019             "AESMC v0.16b, v0.16b \n"
2020             "AESE v0.16b, v10.16b  \n"
2021             "AESMC v0.16b, v0.16b \n"
2022             "AESE v0.16b, v11.16b  \n"
2023             "AESMC v0.16b, v0.16b \n"
2024             "EOR v19.16b, v19.16b, v21.16b \n"
2025             "AESE v0.16b, v30.16b \n"
2026             "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2027             "EOR v0.16b, v0.16b, v31.16b \n"
2028             "EOR v18.16b, v18.16b, v21.16b \n"
2029 
2030             "EOR v0.16b, v0.16b, v12.16b \n"
2031             "PMULL v20.1q, v19.1d, v23.1d  \n"
2032             "ST1 {v0.2d}, [%[out]], #16  \n"
2033             "EOR v19.16b, v18.16b, v20.16b \n"
2034             "MOV v15.16b, v0.16b \n"
2035             "RBIT v17.16b, v19.16b \n"
2036 
2037             "CBZ w11, 1f \n"
2038             "LD1 {v12.2d}, [%[input]], #16 \n"
2039             "B 2b \n"
2040 
2041             /***************************************************
2042                GHASH on last block
2043              ***************************************************/
2044             "1: \n"
2045             "EOR v15.16b, v17.16b, v15.16b \n"
2046             "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */
2047 
2048             "#store current AES counter value \n"
2049             "ST1 {v13.2d}, [%[ctrOut]] \n"
2050             "PMULL  v18.1q, v15.1d, v16.1d \n"     /* a0 * b0 = C */
2051             "PMULL2 v19.1q, v15.2d, v16.2d \n"     /* a1 * b1 = D */
2052             "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
2053             "PMULL  v21.1q, v15.1d, v20.1d \n"     /* a0 * b1 = E */
2054             "PMULL2 v20.1q, v15.2d, v20.2d \n"     /* a1 * b0 = F */
2055             "EOR v20.16b, v20.16b, v21.16b \n"     /* F ^ E */
2056             "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2057             "EOR v18.16b, v18.16b, v21.16b \n"     /* low 128 bits in v3 */
2058             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2059             "EOR v19.16b, v19.16b, v21.16b \n"     /* high 128 bits in v4 */
2060 
2061             "#Reduce product from multiplication \n"
2062             "PMULL2 v20.1q, v19.2d, v23.2d \n"
2063             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
2064             "EOR v19.16b, v19.16b, v21.16b \n"
2065             "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2066             "EOR v18.16b, v18.16b, v21.16b \n"
2067             "PMULL v20.1q, v19.1d, v23.1d  \n"
2068             "EOR v19.16b, v18.16b, v20.16b \n"
2069             "RBIT v17.16b, v19.16b \n"
2070             "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */
2071 
2072             :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in)
2073             ,[xOut] "=r" (xPt),"=m" (aes->H)
2074             :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2075              [input] "3" (in)
2076             ,[inX] "4" (xPt), [inY] "m" (aes->H)
2077             : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2078             "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
2079             ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
2080             "v24","v25","v26","v27","v28","v29","v30","v31"
2081         );
2082     }
2083 
2084     /* take care of partial block sizes leftover */
2085     if (partial != 0) {
2086         IncrementGcmCounter(counter);
2087         wc_AesEncrypt(aes, counter, scratch);
2088         xorbuf(scratch, in, partial);
2089         XMEMCPY(out, scratch, partial);
2090 
2091         XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2092         XMEMCPY(scratch, out, partial);
2093         xorbuf(x, scratch, AES_BLOCK_SIZE);
2094         GMULT(x, aes->H);
2095     }
2096 
2097     /* Hash in the lengths of A and C in bits */
2098     XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2099     FlattenSzInBits(&scratch[0], authInSz);
2100     FlattenSzInBits(&scratch[8], sz);
2101     xorbuf(x, scratch, AES_BLOCK_SIZE);
2102     XMEMCPY(scratch, x, AES_BLOCK_SIZE);
2103 
2104     keyPt  = (byte*)aes->key;
2105     __asm__ __volatile__ (
2106 
2107         "LD1 {v16.16b}, [%[tag]] \n"
2108         "LD1 {v17.16b}, %[h] \n"
2109         "RBIT v16.16b, v16.16b \n"
2110 
2111         "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
2112         "PMULL  v18.1q, v16.1d, v17.1d \n"     /* a0 * b0 = C */
2113         "PMULL2 v19.1q, v16.2d, v17.2d \n"     /* a1 * b1 = D */
2114         "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
2115         "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */
2116         "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
2117         "LD1 {v30.2d-v31.2d}, [%[Key]], #32\n"
2118         "PMULL  v21.1q, v16.1d, v20.1d \n"     /* a0 * b1 = E */
2119         "PMULL2 v20.1q, v16.2d, v20.2d \n"     /* a1 * b0 = F */
2120         "LD1 {v0.2d}, [%[ctr]]             \n"
2121 
2122         "#Set a register to all 0s using EOR \n"
2123         "EOR v22.16b, v22.16b, v22.16b \n"
2124         "EOR v20.16b, v20.16b, v21.16b \n"     /* F ^ E */
2125         "AESE v0.16b, v1.16b  \n"
2126         "AESMC v0.16b, v0.16b \n"
2127         "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2128         "AESE v0.16b, v2.16b  \n"
2129         "AESMC v0.16b, v0.16b \n"
2130         "EOR v18.16b, v18.16b, v21.16b \n"     /* low 128 bits in v3 */
2131         "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2132         "AESE v0.16b, v3.16b  \n"
2133         "AESMC v0.16b, v0.16b \n"
2134         "EOR v19.16b, v19.16b, v21.16b \n"     /* high 128 bits in v4 */
2135         "MOVI v23.16b, #0x87 \n"
2136         "AESE v0.16b, v4.16b  \n"
2137         "AESMC v0.16b, v0.16b \n"
2138         "USHR v23.2d, v23.2d, #56 \n"
2139         "PMULL2 v20.1q, v19.2d, v23.2d \n"
2140         "AESE v0.16b, v5.16b  \n"
2141         "AESMC v0.16b, v0.16b \n"
2142         "EXT v21.16b, v20.16b, v22.16b, #8 \n"
2143         "AESE v0.16b, v6.16b  \n"
2144         "AESMC v0.16b, v0.16b \n"
2145         "EOR v19.16b, v19.16b, v21.16b \n"
2146         "AESE v0.16b, v7.16b  \n"
2147         "AESMC v0.16b, v0.16b \n"
2148         "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2149         "AESE v0.16b, v8.16b  \n"
2150         "AESMC v0.16b, v0.16b \n"
2151         "EOR v18.16b, v18.16b, v21.16b \n"
2152         "AESE v0.16b, v9.16b  \n"
2153         "AESMC v0.16b, v0.16b \n"
2154         "AESE v0.16b, v10.16b  \n"
2155         "AESMC v0.16b, v0.16b \n"
2156         "AESE v0.16b, v11.16b  \n"
2157         "AESMC v0.16b, v0.16b \n"
2158         "PMULL v20.1q, v19.1d, v23.1d  \n"
2159         "EOR v19.16b, v18.16b, v20.16b \n"
2160         "AESE v0.16b, v30.16b \n"
2161         "RBIT v19.16b, v19.16b \n"
2162         "EOR v0.16b, v0.16b, v31.16b \n"
2163         "EOR v19.16b, v19.16b, v0.16b \n"
2164         "STR q19, [%[out]] \n"
2165 
2166         :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr)
2167         :[tag] "0" (sPt), [Key] "1" (keyPt),
2168         [ctr] "2" (iCtr) , [h] "m" (aes->H)
2169         : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5",
2170         "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14",
2171         "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23","v24"
2172     );
2173 
2174 
2175     if (authTagSz > AES_BLOCK_SIZE) {
2176         XMEMCPY(authTag, scratch, AES_BLOCK_SIZE);
2177     }
2178     else {
2179         /* authTagSz can be smaller than AES_BLOCK_SIZE */
2180         XMEMCPY(authTag, scratch, authTagSz);
2181     }
2182 
2183     return 0;
2184 }
2185 #endif /* WOLFSSL_AES_192 */
2186 
2187 #ifdef WOLFSSL_AES_256
2188 /* internal function : see wc_AesGcmEncrypt */
Aes256GcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)2189 static int Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
2190                    const byte* iv, word32 ivSz,
2191                    byte* authTag, word32 authTagSz,
2192                    const byte* authIn, word32 authInSz)
2193 {
2194     word32 blocks;
2195     word32 partial;
2196     byte counter[AES_BLOCK_SIZE];
2197     byte initialCounter[AES_BLOCK_SIZE];
2198     byte x[AES_BLOCK_SIZE];
2199     byte scratch[AES_BLOCK_SIZE];
2200 
2201     /* Noticed different optimization levels treated head of array different.
2202        Some cases was stack pointer plus offset others was a regester containing
2203        address. To make uniform for passing in to inline assembly code am using
2204        pointers to the head of each local array.
2205      */
2206     byte* ctr  = counter;
2207     byte* iCtr = initialCounter;
2208     byte* xPt  = x;
2209     byte* sPt  = scratch;
2210     byte* keyPt; /* pointer to handle pointer advencment */
2211 
2212     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
2213     if (ivSz == GCM_NONCE_MID_SZ) {
2214         XMEMCPY(initialCounter, iv, ivSz);
2215         initialCounter[AES_BLOCK_SIZE - 1] = 1;
2216     }
2217     else {
2218         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
2219         GMULT(initialCounter, aes->H);
2220     }
2221     XMEMCPY(counter, initialCounter, AES_BLOCK_SIZE);
2222 
2223 
2224     /* Hash in the Additional Authentication Data */
2225     XMEMSET(x, 0, AES_BLOCK_SIZE);
2226     if (authInSz != 0 && authIn != NULL) {
2227         blocks = authInSz / AES_BLOCK_SIZE;
2228         partial = authInSz % AES_BLOCK_SIZE;
2229         /* do as many blocks as possible */
2230         while (blocks--) {
2231             xorbuf(x, authIn, AES_BLOCK_SIZE);
2232             GMULT(x, aes->H);
2233             authIn += AES_BLOCK_SIZE;
2234         }
2235         if (partial != 0) {
2236             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2237             XMEMCPY(scratch, authIn, partial);
2238             xorbuf(x, scratch, AES_BLOCK_SIZE);
2239             GMULT(x, aes->H);
2240         }
2241     }
2242 
2243     /* do as many blocks as possible */
2244     blocks = sz / AES_BLOCK_SIZE;
2245     partial = sz % AES_BLOCK_SIZE;
2246     if (blocks > 0) {
2247         keyPt  = (byte*)aes->key;
2248         __asm__ __volatile__ (
2249             "MOV w11, %w[blocks] \n"
2250             "LD1 {v13.2d}, [%[ctr]] \n"
2251 
2252             "#Create vector with the value 1  \n"
2253             "MOVI v14.16b, #1                 \n"
2254             "USHR v14.2d, v14.2d, #56         \n"
2255             "EOR v22.16b, v22.16b, v22.16b    \n"
2256             "EXT v14.16b, v14.16b, v22.16b, #8\n"
2257 
2258 
2259             /***************************************************
2260                Get first out block for GHASH using AES encrypt
2261              ***************************************************/
2262             "REV64 v13.16b, v13.16b \n" /* network order */
2263             "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
2264             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
2265             "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
2266             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
2267             "REV64 v13.16b, v13.16b \n" /* revert from network order */
2268             "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
2269             "MOV v0.16b, v13.16b  \n"
2270             "AESE v0.16b, v1.16b  \n"
2271             "AESMC v0.16b, v0.16b \n"
2272             "AESE v0.16b, v2.16b  \n"
2273             "AESMC v0.16b, v0.16b \n"
2274             "AESE v0.16b, v3.16b  \n"
2275             "AESMC v0.16b, v0.16b \n"
2276             "LD1 {v16.2d}, %[inY] \n"
2277             "AESE v0.16b, v4.16b  \n"
2278             "AESMC v0.16b, v0.16b \n"
2279             "SUB w11, w11, #1     \n"
2280             "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
2281             "LD1 {v28.2d-v31.2d}, [%[Key]], #64\n"
2282             "AESE v0.16b, v5.16b  \n"
2283             "AESMC v0.16b, v0.16b \n"
2284             "MOVI v23.16b, #0x87 \n"
2285             "AESE v0.16b, v6.16b  \n"
2286             "AESMC v0.16b, v0.16b \n"
2287             "LD1 {v17.2d}, [%[inX]] \n" /* account for additional data */
2288             "AESE v0.16b, v7.16b  \n"
2289             "AESMC v0.16b, v0.16b \n"
2290             "USHR v23.2d, v23.2d, #56 \n"
2291             "AESE v0.16b, v8.16b  \n"
2292             "AESMC v0.16b, v0.16b \n"
2293             "LD1 {v12.2d}, [%[input]], #16 \n"
2294             "AESE v0.16b, v9.16b  \n"
2295             "AESMC v0.16b, v0.16b \n"
2296             "AESE v0.16b, v10.16b  \n"
2297             "AESMC v0.16b, v0.16b \n"
2298             "AESE v0.16b, v11.16b  \n"
2299             "AESMC v0.16b, v0.16b \n"
2300             "AESE v0.16b, v28.16b  \n"
2301             "AESMC v0.16b, v0.16b \n"
2302             "AESE v0.16b, v29.16b  \n"
2303             "AESMC v0.16b, v0.16b \n"
2304             "AESE v0.16b, v30.16b \n"
2305             "EOR v0.16b, v0.16b, v31.16b \n"
2306 
2307             "EOR v0.16b, v0.16b, v12.16b \n"
2308             "ST1 {v0.2d}, [%[out]], #16  \n"
2309             "MOV v15.16b, v0.16b \n"
2310 
2311             "CBZ w11, 1f \n" /* only one block jump to final GHASH */
2312             "LD1 {v12.2d}, [%[input]], #16 \n"
2313 
2314             /***************************************************
2315                Interweave GHASH and encrypt if more then 1 block
2316              ***************************************************/
2317             "2: \n"
2318             "REV64 v13.16b, v13.16b \n" /* network order */
2319             "EOR v15.16b, v17.16b, v15.16b \n"
2320             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
2321             "ADD v13.2d, v13.2d, v14.2d \n" /* add 1 to counter */
2322             "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block (c) */
2323             "EXT v13.16b, v13.16b, v13.16b, #8 \n"
2324             "REV64 v13.16b, v13.16b \n" /* revert from network order */
2325             "PMULL  v18.1q, v15.1d, v16.1d \n"     /* a0 * b0 = C */
2326             "MOV v0.16b, v13.16b  \n"
2327             "PMULL2 v19.1q, v15.2d, v16.2d \n"     /* a1 * b1 = D */
2328             "AESE v0.16b, v1.16b  \n"
2329             "AESMC v0.16b, v0.16b \n"
2330             "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
2331             "AESE v0.16b, v2.16b  \n"
2332             "AESMC v0.16b, v0.16b \n"
2333             "PMULL  v21.1q, v15.1d, v20.1d \n"     /* a0 * b1 = E */
2334             "PMULL2 v20.1q, v15.2d, v20.2d \n"     /* a1 * b0 = F */
2335             "AESE v0.16b, v3.16b  \n"
2336             "AESMC v0.16b, v0.16b \n"
2337             "EOR v20.16b, v20.16b, v21.16b \n"     /* F ^ E */
2338             "AESE v0.16b, v4.16b  \n"
2339             "AESMC v0.16b, v0.16b \n"
2340             "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2341             "SUB w11, w11, #1     \n"
2342             "AESE v0.16b, v5.16b  \n"
2343             "AESMC v0.16b, v0.16b \n"
2344             "EOR v18.16b, v18.16b, v21.16b \n"     /* low 128 bits in v3 */
2345             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2346             "AESE v0.16b, v6.16b  \n"
2347             "AESMC v0.16b, v0.16b \n"
2348             "EOR v19.16b, v19.16b, v21.16b \n"     /* high 128 bits in v4 */
2349             "AESE v0.16b, v7.16b  \n"
2350             "AESMC v0.16b, v0.16b \n"
2351             "PMULL2 v20.1q, v19.2d, v23.2d \n"
2352             "AESE v0.16b, v8.16b  \n"
2353             "AESMC v0.16b, v0.16b \n"
2354             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
2355             "AESE v0.16b, v9.16b  \n"
2356             "AESMC v0.16b, v0.16b \n"
2357             "AESE v0.16b, v10.16b  \n"
2358             "AESMC v0.16b, v0.16b \n"
2359             "AESE v0.16b, v11.16b  \n"
2360             "AESMC v0.16b, v0.16b \n"
2361             "AESE v0.16b, v28.16b  \n"
2362             "AESMC v0.16b, v0.16b \n"
2363             "AESE v0.16b, v29.16b  \n"
2364             "AESMC v0.16b, v0.16b \n"
2365             "EOR v19.16b, v19.16b, v21.16b \n"
2366             "AESE v0.16b, v30.16b \n"
2367             "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2368             "EOR v0.16b, v0.16b, v31.16b \n"
2369             "EOR v18.16b, v18.16b, v21.16b \n"
2370 
2371             "EOR v0.16b, v0.16b, v12.16b \n"
2372             "PMULL v20.1q, v19.1d, v23.1d  \n"
2373             "ST1 {v0.2d}, [%[out]], #16  \n"
2374             "EOR v19.16b, v18.16b, v20.16b \n"
2375             "MOV v15.16b, v0.16b \n"
2376             "RBIT v17.16b, v19.16b \n"
2377 
2378             "CBZ w11, 1f \n"
2379             "LD1 {v12.2d}, [%[input]], #16 \n"
2380             "B 2b \n"
2381 
2382             /***************************************************
2383                GHASH on last block
2384              ***************************************************/
2385             "1: \n"
2386             "EOR v15.16b, v17.16b, v15.16b \n"
2387             "RBIT v15.16b, v15.16b \n" /* v15 is encrypted out block */
2388 
2389             "#store current AES counter value \n"
2390             "ST1 {v13.2d}, [%[ctrOut]] \n"
2391             "PMULL  v18.1q, v15.1d, v16.1d \n"     /* a0 * b0 = C */
2392             "PMULL2 v19.1q, v15.2d, v16.2d \n"     /* a1 * b1 = D */
2393             "EXT v20.16b, v16.16b, v16.16b, #8 \n" /* b0b1 -> b1b0 */
2394             "PMULL  v21.1q, v15.1d, v20.1d \n"     /* a0 * b1 = E */
2395             "PMULL2 v20.1q, v15.2d, v20.2d \n"     /* a1 * b0 = F */
2396             "EOR v20.16b, v20.16b, v21.16b \n"     /* F ^ E */
2397             "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2398             "EOR v18.16b, v18.16b, v21.16b \n"     /* low 128 bits in v3 */
2399             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2400             "EOR v19.16b, v19.16b, v21.16b \n"     /* high 128 bits in v4 */
2401 
2402             "#Reduce product from multiplication \n"
2403             "PMULL2 v20.1q, v19.2d, v23.2d \n"
2404             "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* v22 is all 0's */
2405             "EOR v19.16b, v19.16b, v21.16b \n"
2406             "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2407             "EOR v18.16b, v18.16b, v21.16b \n"
2408             "PMULL v20.1q, v19.1d, v23.1d  \n"
2409             "EOR v19.16b, v18.16b, v20.16b \n"
2410             "RBIT v17.16b, v19.16b \n"
2411             "STR q17, [%[xOut]] \n" /* GHASH x value for partial blocks */
2412 
2413             :[out] "=r" (out), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (in)
2414             ,[xOut] "=r" (xPt),"=m" (aes->H)
2415             :"0" (out), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2416              [input] "3" (in)
2417             ,[inX] "4" (xPt), [inY] "m" (aes->H)
2418             : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2419             "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
2420             ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24"
2421         );
2422     }
2423 
2424     /* take care of partial block sizes leftover */
2425     if (partial != 0) {
2426         IncrementGcmCounter(counter);
2427         wc_AesEncrypt(aes, counter, scratch);
2428         xorbuf(scratch, in, partial);
2429         XMEMCPY(out, scratch, partial);
2430 
2431         XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2432         XMEMCPY(scratch, out, partial);
2433         xorbuf(x, scratch, AES_BLOCK_SIZE);
2434         GMULT(x, aes->H);
2435     }
2436 
2437     /* Hash in the lengths of A and C in bits */
2438     XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2439     FlattenSzInBits(&scratch[0], authInSz);
2440     FlattenSzInBits(&scratch[8], sz);
2441     xorbuf(x, scratch, AES_BLOCK_SIZE);
2442     XMEMCPY(scratch, x, AES_BLOCK_SIZE);
2443 
2444     keyPt  = (byte*)aes->key;
2445     __asm__ __volatile__ (
2446 
2447         "LD1 {v16.16b}, [%[tag]] \n"
2448         "LD1 {v17.16b}, %[h] \n"
2449         "RBIT v16.16b, v16.16b \n"
2450 
2451         "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
2452         "PMULL  v18.1q, v16.1d, v17.1d \n"     /* a0 * b0 = C */
2453         "PMULL2 v19.1q, v16.2d, v17.2d \n"     /* a1 * b1 = D */
2454         "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
2455         "EXT v20.16b, v17.16b, v17.16b, #8 \n" /* b0b1 -> b1b0 */
2456         "LD1 {v9.2d-v11.2d}, [%[Key]], #48\n"
2457         "LD1 {v28.2d-v31.2d}, [%[Key]], #64\n"
2458         "PMULL  v21.1q, v16.1d, v20.1d \n"     /* a0 * b1 = E */
2459         "PMULL2 v20.1q, v16.2d, v20.2d \n"     /* a1 * b0 = F */
2460         "LD1 {v0.2d}, [%[ctr]]             \n"
2461 
2462         "#Set a register to all 0s using EOR \n"
2463         "EOR v22.16b, v22.16b, v22.16b \n"
2464         "EOR v20.16b, v20.16b, v21.16b \n"     /* F ^ E */
2465         "AESE v0.16b, v1.16b  \n"
2466         "AESMC v0.16b, v0.16b \n"
2467         "EXT v21.16b, v22.16b, v20.16b, #8 \n" /* get (F^E)[0] */
2468         "AESE v0.16b, v2.16b  \n"
2469         "AESMC v0.16b, v0.16b \n"
2470         "EOR v18.16b, v18.16b, v21.16b \n"     /* low 128 bits in v3 */
2471         "EXT v21.16b, v20.16b, v22.16b, #8 \n" /* get (F^E)[1] */
2472         "AESE v0.16b, v3.16b  \n"
2473         "AESMC v0.16b, v0.16b \n"
2474         "EOR v19.16b, v19.16b, v21.16b \n"     /* high 128 bits in v4 */
2475         "MOVI v23.16b, #0x87 \n"
2476         "AESE v0.16b, v4.16b  \n"
2477         "AESMC v0.16b, v0.16b \n"
2478         "USHR v23.2d, v23.2d, #56 \n"
2479         "PMULL2 v20.1q, v19.2d, v23.2d \n"
2480         "AESE v0.16b, v5.16b  \n"
2481         "AESMC v0.16b, v0.16b \n"
2482         "EXT v21.16b, v20.16b, v22.16b, #8 \n"
2483         "AESE v0.16b, v6.16b  \n"
2484         "AESMC v0.16b, v0.16b \n"
2485         "EOR v19.16b, v19.16b, v21.16b \n"
2486         "AESE v0.16b, v7.16b  \n"
2487         "AESMC v0.16b, v0.16b \n"
2488         "EXT v21.16b, v22.16b, v20.16b, #8 \n"
2489         "AESE v0.16b, v8.16b  \n"
2490         "AESMC v0.16b, v0.16b \n"
2491         "EOR v18.16b, v18.16b, v21.16b \n"
2492         "AESE v0.16b, v9.16b  \n"
2493         "AESMC v0.16b, v0.16b \n"
2494         "AESE v0.16b, v10.16b  \n"
2495         "AESMC v0.16b, v0.16b \n"
2496         "AESE v0.16b, v11.16b  \n"
2497         "AESMC v0.16b, v0.16b \n"
2498         "AESE v0.16b, v28.16b  \n"
2499         "AESMC v0.16b, v0.16b \n"
2500         "AESE v0.16b, v29.16b  \n"
2501         "AESMC v0.16b, v0.16b \n"
2502         "PMULL v20.1q, v19.1d, v23.1d  \n"
2503         "EOR v19.16b, v18.16b, v20.16b \n"
2504         "AESE v0.16b, v30.16b \n"
2505         "RBIT v19.16b, v19.16b \n"
2506         "EOR v0.16b, v0.16b, v31.16b \n"
2507         "EOR v19.16b, v19.16b, v0.16b \n"
2508         "STR q19, [%[out]] \n"
2509 
2510         :[out] "=r" (sPt), "=r" (keyPt), "=r" (iCtr)
2511         :[tag] "0" (sPt), [Key] "1" (keyPt),
2512         [ctr] "2" (iCtr) , [h] "m" (aes->H)
2513         : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5",
2514         "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14",
2515         "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23",
2516         "v24","v25","v26","v27","v28","v29","v30","v31"
2517     );
2518 
2519 
2520     if (authTagSz > AES_BLOCK_SIZE) {
2521         XMEMCPY(authTag, scratch, AES_BLOCK_SIZE);
2522     }
2523     else {
2524         /* authTagSz can be smaller than AES_BLOCK_SIZE */
2525         XMEMCPY(authTag, scratch, authTagSz);
2526     }
2527 
2528     return 0;
2529 }
2530 #endif /* WOLFSSL_AES_256 */
2531 
2532 
2533 /* aarch64 with PMULL and PMULL2
2534  * Encrypt and tag data using AES with GCM mode.
2535  * aes: Aes structure having already been set with set key function
2536  * out: encrypted data output buffer
2537  * in:  plain text input buffer
2538  * sz:  size of plain text and out buffer
2539  * iv:  initialization vector
2540  * ivSz:      size of iv buffer
2541  * authTag:   buffer to hold tag
2542  * authTagSz: size of tag buffer
2543  * authIn:    additional data buffer
2544  * authInSz:  size of additional data buffer
2545  *
2546  * Notes:
2547  * GHASH multiplication based from Algorithm 1 from Intel GCM white paper.
2548  * "Carry-Less Multiplication and Its Usage for Computing the GCM Mode"
2549  *
2550  * GHASH reduction Based from White Paper "Implementing GCM on ARMv8"
2551  * by Conrado P.L. Gouvea and Julio Lopez reduction on 256bit value using
2552  * Algorithm 5
2553  */
wc_AesGcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)2554 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
2555                    const byte* iv, word32 ivSz,
2556                    byte* authTag, word32 authTagSz,
2557                    const byte* authIn, word32 authInSz)
2558 {
2559     /* sanity checks */
2560     if (aes == NULL || (iv == NULL && ivSz > 0) ||
2561                        (authTag == NULL) ||
2562                        (authIn == NULL && authInSz > 0) ||
2563                        (ivSz == 0)) {
2564         WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
2565         return BAD_FUNC_ARG;
2566     }
2567 
2568     if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || authTagSz > AES_BLOCK_SIZE) {
2569         WOLFSSL_MSG("GcmEncrypt authTagSz error");
2570         return BAD_FUNC_ARG;
2571     }
2572 
2573     switch (aes->rounds) {
2574 #ifdef WOLFSSL_AES_128
2575         case 10:
2576             return Aes128GcmEncrypt(aes, out, in, sz, iv, ivSz,
2577                                     authTag, authTagSz, authIn, authInSz);
2578 #endif
2579 #ifdef WOLFSSL_AES_192
2580         case 12:
2581             return Aes192GcmEncrypt(aes, out, in, sz, iv, ivSz,
2582                                     authTag, authTagSz, authIn, authInSz);
2583 #endif
2584 #ifdef WOLFSSL_AES_256
2585         case 14:
2586             return Aes256GcmEncrypt(aes, out, in, sz, iv, ivSz,
2587                                     authTag, authTagSz, authIn, authInSz);
2588 #endif
2589         default:
2590             WOLFSSL_MSG("AES-GCM invalid round number");
2591             return BAD_FUNC_ARG;
2592     }
2593 }
2594 
2595 
2596 #ifdef HAVE_AES_DECRYPT
2597 /*
2598  * Check tag and decrypt data using AES with GCM mode.
2599  * aes: Aes structure having already been set with set key function
2600  * out: decrypted data output buffer
2601  * in:  cipher text buffer
2602  * sz:  size of plain text and out buffer
2603  * iv:  initialization vector
2604  * ivSz:      size of iv buffer
2605  * authTag:   buffer holding tag
2606  * authTagSz: size of tag buffer
2607  * authIn:    additional data buffer
2608  * authInSz:  size of additional data buffer
2609  */
wc_AesGcmDecrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,const byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)2610 int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
2611                    const byte* iv, word32 ivSz,
2612                    const byte* authTag, word32 authTagSz,
2613                    const byte* authIn, word32 authInSz)
2614 {
2615     word32 blocks = sz / AES_BLOCK_SIZE;
2616     word32 partial = sz % AES_BLOCK_SIZE;
2617     const byte* c = in;
2618     byte* p = out;
2619     byte counter[AES_BLOCK_SIZE];
2620     byte initialCounter[AES_BLOCK_SIZE];
2621     byte *ctr ;
2622     byte scratch[AES_BLOCK_SIZE];
2623 
2624     ctr = counter ;
2625 
2626     /* sanity checks */
2627     if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
2628         authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 ||
2629         ivSz == 0) {
2630         WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
2631         return BAD_FUNC_ARG;
2632     }
2633 
2634     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
2635     if (ivSz == GCM_NONCE_MID_SZ) {
2636         XMEMCPY(initialCounter, iv, ivSz);
2637         initialCounter[AES_BLOCK_SIZE - 1] = 1;
2638     }
2639     else {
2640         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
2641         GMULT(initialCounter, aes->H);
2642     }
2643     XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
2644 
2645     /* Calculate the authTag again using the received auth data and the
2646      * cipher text. */
2647     {
2648         byte Tprime[AES_BLOCK_SIZE];
2649         byte EKY0[AES_BLOCK_SIZE];
2650 
2651         GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime));
2652         GMULT(Tprime, aes->H);
2653         wc_AesEncrypt(aes, ctr, EKY0);
2654         xorbuf(Tprime, EKY0, sizeof(Tprime));
2655 
2656         if (ConstantCompare(authTag, Tprime, authTagSz) != 0) {
2657             return AES_GCM_AUTH_E;
2658         }
2659     }
2660 
2661     /* do as many blocks as possible */
2662     if (blocks > 0) {
2663         /* pointer needed because it is incremented when read, causing
2664          * an issue with call to encrypt/decrypt leftovers */
2665         byte*  keyPt  = (byte*)aes->key;
2666         switch(aes->rounds) {
2667 #ifdef WOLFSSL_AES_128
2668         case 10: /* AES 128 BLOCK */
2669             __asm__ __volatile__ (
2670             "MOV w11, %w[blocks] \n"
2671             "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
2672 
2673             "#Create vector with the value 1   \n"
2674             "MOVI v14.16b, #1                  \n"
2675             "USHR v14.2d, v14.2d, #56          \n"
2676             "LD1 {v5.2d-v8.2d}, [%[Key]], #64  \n"
2677             "EOR v13.16b, v13.16b, v13.16b     \n"
2678             "EXT v14.16b, v14.16b, v13.16b, #8 \n"
2679 
2680             "LD1 {v9.2d-v11.2d}, [%[Key]], #48 \n"
2681             "LD1 {v12.2d}, [%[ctr]]            \n"
2682             "LD1 {v13.2d}, [%[input]], #16     \n"
2683 
2684             "1: \n"
2685             "REV64 v12.16b, v12.16b \n" /* network order */
2686             "EXT v12.16b, v12.16b, v12.16b, #8 \n"
2687             "ADD v12.2d, v12.2d, v14.2d \n" /* add 1 to counter */
2688             "EXT v12.16b, v12.16b, v12.16b, #8 \n"
2689             "REV64 v12.16b, v12.16b \n" /* revert from network order */
2690             "MOV v0.16b, v12.16b  \n"
2691             "AESE v0.16b, v1.16b  \n"
2692             "AESMC v0.16b, v0.16b \n"
2693             "AESE v0.16b, v2.16b  \n"
2694             "AESMC v0.16b, v0.16b \n"
2695             "AESE v0.16b, v3.16b  \n"
2696             "AESMC v0.16b, v0.16b \n"
2697             "AESE v0.16b, v4.16b  \n"
2698             "AESMC v0.16b, v0.16b \n"
2699             "SUB w11, w11, #1     \n"
2700             "AESE v0.16b, v5.16b  \n"
2701             "AESMC v0.16b, v0.16b \n"
2702             "AESE v0.16b, v6.16b  \n"
2703             "AESMC v0.16b, v0.16b \n"
2704             "AESE v0.16b, v7.16b  \n"
2705             "AESMC v0.16b, v0.16b \n"
2706             "AESE v0.16b, v8.16b  \n"
2707             "AESMC v0.16b, v0.16b \n"
2708             "AESE v0.16b, v9.16b  \n"
2709             "AESMC v0.16b, v0.16b \n"
2710             "AESE v0.16b, v10.16b \n"
2711             "EOR v0.16b, v0.16b, v11.16b \n"
2712 
2713             "EOR v0.16b, v0.16b, v13.16b \n"
2714             "ST1 {v0.2d}, [%[out]], #16  \n"
2715 
2716             "CBZ w11, 2f \n"
2717             "LD1 {v13.2d}, [%[input]], #16 \n"
2718             "B 1b \n"
2719 
2720             "2: \n"
2721             "#store current counter value at the end \n"
2722             "ST1 {v12.16b}, [%[ctrOut]] \n"
2723 
2724             :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c)
2725             :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2726              [input] "3" (c)
2727             : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2728             "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14"
2729             );
2730             break;
2731 #endif
2732 #ifdef WOLFSSL_AES_192
2733         case 12: /* AES 192 BLOCK */
2734             __asm__ __volatile__ (
2735             "MOV w11, %w[blocks] \n"
2736             "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
2737 
2738             "#Create vector with the value 1   \n"
2739             "MOVI v16.16b, #1                  \n"
2740             "USHR v16.2d, v16.2d, #56          \n"
2741             "LD1 {v5.2d-v8.2d}, [%[Key]], #64  \n"
2742             "EOR v14.16b, v14.16b, v14.16b     \n"
2743             "EXT v16.16b, v16.16b, v14.16b, #8 \n"
2744 
2745             "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n"
2746             "LD1 {v13.2d}, [%[Key]], #16       \n"
2747             "LD1 {v14.2d}, [%[ctr]]            \n"
2748             "LD1 {v15.2d}, [%[input]], #16     \n"
2749 
2750             "1: \n"
2751             "REV64 v14.16b, v14.16b \n" /* network order */
2752             "EXT v14.16b, v14.16b, v14.16b, #8 \n"
2753             "ADD v14.2d, v14.2d, v16.2d \n" /* add 1 to counter */
2754             "EXT v14.16b, v14.16b, v14.16b, #8 \n"
2755             "REV64 v14.16b, v14.16b \n" /* revert from network order */
2756             "MOV v0.16b, v14.16b  \n"
2757             "AESE v0.16b, v1.16b  \n"
2758             "AESMC v0.16b, v0.16b \n"
2759             "AESE v0.16b, v2.16b  \n"
2760             "AESMC v0.16b, v0.16b \n"
2761             "AESE v0.16b, v3.16b  \n"
2762             "AESMC v0.16b, v0.16b \n"
2763             "AESE v0.16b, v4.16b  \n"
2764             "AESMC v0.16b, v0.16b \n"
2765             "SUB w11, w11, #1     \n"
2766             "AESE v0.16b, v5.16b  \n"
2767             "AESMC v0.16b, v0.16b \n"
2768             "AESE v0.16b, v6.16b  \n"
2769             "AESMC v0.16b, v0.16b \n"
2770             "AESE v0.16b, v7.16b  \n"
2771             "AESMC v0.16b, v0.16b \n"
2772             "AESE v0.16b, v8.16b  \n"
2773             "AESMC v0.16b, v0.16b \n"
2774             "AESE v0.16b, v9.16b  \n"
2775             "AESMC v0.16b, v0.16b \n"
2776             "AESE v0.16b, v10.16b \n"
2777             "AESMC v0.16b, v0.16b \n"
2778             "AESE v0.16b, v11.16b \n"
2779             "AESMC v0.16b, v0.16b \n"
2780             "AESE v0.16b, v12.16b \n"
2781             "EOR v0.16b, v0.16b, v13.16b \n"
2782 
2783             "EOR v0.16b, v0.16b, v15.16b \n"
2784             "ST1 {v0.2d}, [%[out]], #16  \n"
2785 
2786             "CBZ w11, 2f \n"
2787             "LD1 {v15.2d}, [%[input]], #16 \n"
2788             "B 1b \n"
2789 
2790             "2: \n"
2791             "#store current counter value at the end \n"
2792             "ST1 {v14.2d}, [%[ctrOut]]   \n"
2793 
2794             :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c)
2795             :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2796              [input] "3" (c)
2797             : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2798             "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
2799             "v16"
2800             );
2801             break;
2802 #endif /* WOLFSSL_AES_192 */
2803 #ifdef WOLFSSL_AES_256
2804         case 14: /* AES 256 BLOCK */
2805             __asm__ __volatile__ (
2806             "MOV w11, %w[blocks] \n"
2807             "LD1 {v1.2d-v4.2d}, [%[Key]], #64  \n"
2808 
2809             "#Create vector with the value 1   \n"
2810             "MOVI v18.16b, #1                  \n"
2811             "USHR v18.2d, v18.2d, #56          \n"
2812             "LD1 {v5.2d-v8.2d}, [%[Key]], #64  \n"
2813             "EOR v19.16b, v19.16b, v19.16b     \n"
2814             "EXT v18.16b, v18.16b, v19.16b, #8 \n"
2815 
2816             "LD1 {v9.2d-v12.2d},  [%[Key]], #64 \n"
2817             "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n"
2818             "LD1 {v17.2d}, [%[ctr]]             \n"
2819             "LD1 {v16.2d}, [%[input]], #16      \n"
2820 
2821             "1: \n"
2822             "REV64 v17.16b, v17.16b \n" /* network order */
2823             "EXT v17.16b, v17.16b, v17.16b, #8 \n"
2824             "ADD v17.2d, v17.2d, v18.2d \n" /* add 1 to counter */
2825             "EXT v17.16b, v17.16b, v17.16b, #8 \n"
2826             "REV64 v17.16b, v17.16b \n" /* revert from network order */
2827             "MOV v0.16b, v17.16b  \n"
2828             "AESE v0.16b, v1.16b  \n"
2829             "AESMC v0.16b, v0.16b \n"
2830             "AESE v0.16b, v2.16b  \n"
2831             "AESMC v0.16b, v0.16b \n"
2832             "AESE v0.16b, v3.16b  \n"
2833             "AESMC v0.16b, v0.16b \n"
2834             "AESE v0.16b, v4.16b  \n"
2835             "AESMC v0.16b, v0.16b \n"
2836             "SUB w11, w11, #1     \n"
2837             "AESE v0.16b, v5.16b  \n"
2838             "AESMC v0.16b, v0.16b \n"
2839             "AESE v0.16b, v6.16b  \n"
2840             "AESMC v0.16b, v0.16b \n"
2841             "AESE v0.16b, v7.16b  \n"
2842             "AESMC v0.16b, v0.16b \n"
2843             "AESE v0.16b, v8.16b  \n"
2844             "AESMC v0.16b, v0.16b \n"
2845             "AESE v0.16b, v9.16b  \n"
2846             "AESMC v0.16b, v0.16b \n"
2847             "AESE v0.16b, v10.16b \n"
2848             "AESMC v0.16b, v0.16b \n"
2849             "AESE v0.16b, v11.16b \n"
2850             "AESMC v0.16b, v0.16b \n"
2851             "AESE v0.16b, v12.16b \n"
2852             "AESMC v0.16b, v0.16b \n"
2853             "AESE v0.16b, v13.16b \n"
2854             "AESMC v0.16b, v0.16b \n"
2855             "AESE v0.16b, v14.16b \n"
2856             "EOR v0.16b, v0.16b, v15.16b \n"
2857 
2858             "EOR v0.16b, v0.16b, v16.16b \n"
2859             "ST1 {v0.2d}, [%[out]], #16  \n"
2860 
2861             "CBZ w11, 2f \n"
2862             "LD1 {v16.2d}, [%[input]], #16 \n"
2863             "B 1b \n"
2864 
2865             "2: \n"
2866             "#store current counter value at the end \n"
2867             "ST1 {v17.2d}, [%[ctrOut]] \n"
2868 
2869             :[out] "=r" (p), "=r" (keyPt), [ctrOut] "=r" (ctr), "=r" (c)
2870             :"0" (p), [Key] "1" (keyPt), [ctr] "2" (ctr), [blocks] "r" (blocks),
2871              [input] "3" (c)
2872             : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
2873             "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
2874             "v16", "v17", "v18", "v19"
2875             );
2876             break;
2877 #endif /* WOLFSSL_AES_256 */
2878         default:
2879             WOLFSSL_MSG("Bad AES-GCM round value");
2880             return BAD_FUNC_ARG;
2881         }
2882     }
2883     if (partial != 0) {
2884         IncrementGcmCounter(ctr);
2885         wc_AesEncrypt(aes, ctr, scratch);
2886 
2887         /* check if pointer is null after main AES-GCM blocks
2888          * helps static analysis */
2889         if (p == NULL || c == NULL) {
2890             return BAD_STATE_E;
2891         }
2892         xorbuf(scratch, c, partial);
2893         XMEMCPY(p, scratch, partial);
2894     }
2895     return 0;
2896 }
2897 
2898 #endif /* HAVE_AES_DECRYPT */
2899 #endif /* HAVE_AESGCM */
2900 
2901 
2902 /***************************************
2903  * not 64 bit so use 32 bit mode
2904 ****************************************/
2905 #else
2906 
2907 /* AES CCM/GCM use encrypt direct but not decrypt */
2908 #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \
2909     defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
wc_AesEncrypt(Aes * aes,const byte * inBlock,byte * outBlock)2910     static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
2911     {
2912             /*
2913               AESE exor's input with round key
2914                    shift rows of exor'ed result
2915                    sub bytes for shifted rows
2916              */
2917 
2918             word32* keyPt = aes->key;
2919             __asm__ __volatile__ (
2920                 "VLD1.32 {q0}, [%[CtrIn]] \n"
2921                 "VLDM %[Key]!, {q1-q4}    \n"
2922 
2923                 "AESE.8 q0, q1\n"
2924                 "AESMC.8 q0, q0\n"
2925                 "AESE.8 q0, q2\n"
2926                 "AESMC.8 q0, q0\n"
2927                 "VLD1.32 {q1}, [%[Key]]!  \n"
2928                 "AESE.8 q0, q3\n"
2929                 "AESMC.8 q0, q0\n"
2930                 "VLD1.32 {q2}, [%[Key]]!  \n"
2931                 "AESE.8 q0, q4\n"
2932                 "AESMC.8 q0, q0\n"
2933                 "VLD1.32 {q3}, [%[Key]]!  \n"
2934                 "AESE.8 q0, q1\n"
2935                 "AESMC.8 q0, q0\n"
2936                 "VLD1.32 {q4}, [%[Key]]!  \n"
2937                 "AESE.8 q0, q2\n"
2938                 "AESMC.8 q0, q0\n"
2939                 "AESE.8 q0, q3\n"
2940                 "AESMC.8 q0, q0\n"
2941                 "VLD1.32 {q1}, [%[Key]]!  \n"
2942                 "AESE.8 q0, q4\n"
2943                 "AESMC.8 q0, q0\n"
2944                 "VLD1.32 {q2}, [%[Key]]!  \n"
2945                 "AESE.8 q0, q1\n"
2946                 "AESMC.8 q0, q0\n"
2947                 "AESE.8 q0, q2\n"
2948 
2949                 "MOV r12, %[R]    \n"
2950                 "CMP r12, #10 \n"
2951                 "BEQ 1f    \n"
2952                 "VLD1.32 {q1}, [%[Key]]!  \n"
2953                 "AESMC.8 q0, q0\n"
2954                 "VLD1.32 {q2}, [%[Key]]!  \n"
2955                 "AESE.8 q0, q1\n"
2956                 "AESMC.8 q0, q0\n"
2957                 "AESE.8 q0, q2\n"
2958 
2959                 "CMP r12, #12 \n"
2960                 "BEQ 1f    \n"
2961                 "VLD1.32 {q1}, [%[Key]]!  \n"
2962                 "AESMC.8 q0, q0\n"
2963                 "VLD1.32 {q2}, [%[Key]]!  \n"
2964                 "AESE.8 q0, q1\n"
2965                 "AESMC.8 q0, q0\n"
2966                 "AESE.8 q0, q2\n"
2967 
2968                 "#Final AddRoundKey then store result \n"
2969                 "1: \n"
2970                 "VLD1.32 {q1}, [%[Key]]!  \n"
2971                 "VEOR.32 q0, q0, q1\n"
2972                 "VST1.32 {q0}, [%[CtrOut]]   \n"
2973 
2974                 :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds),
2975                  "=r" (inBlock)
2976                 :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds),
2977                  [CtrIn] "3" (inBlock)
2978                 : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4"
2979             );
2980 
2981         return 0;
2982     }
2983 #endif /* AES_GCM, AES_CCM, DIRECT or COUNTER */
2984 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
2985     #ifdef HAVE_AES_DECRYPT
wc_AesDecrypt(Aes * aes,const byte * inBlock,byte * outBlock)2986     static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
2987     {
2988             /*
2989               AESE exor's input with round key
2990                    shift rows of exor'ed result
2991                    sub bytes for shifted rows
2992              */
2993 
2994             word32* keyPt = aes->key;
2995             __asm__ __volatile__ (
2996                 "VLD1.32 {q0}, [%[CtrIn]] \n"
2997                 "VLDM %[Key]!, {q1-q4}    \n"
2998 
2999                 "AESD.8 q0, q1\n"
3000                 "AESIMC.8 q0, q0\n"
3001                 "AESD.8 q0, q2\n"
3002                 "AESIMC.8 q0, q0\n"
3003                 "VLD1.32 {q1}, [%[Key]]!  \n"
3004                 "AESD.8 q0, q3\n"
3005                 "AESIMC.8 q0, q0\n"
3006                 "VLD1.32 {q2}, [%[Key]]!  \n"
3007                 "AESD.8 q0, q4\n"
3008                 "AESIMC.8 q0, q0\n"
3009                 "VLD1.32 {q3}, [%[Key]]!  \n"
3010                 "AESD.8 q0, q1\n"
3011                 "AESIMC.8 q0, q0\n"
3012                 "VLD1.32 {q4}, [%[Key]]!  \n"
3013                 "AESD.8 q0, q2\n"
3014                 "AESIMC.8 q0, q0\n"
3015                 "AESD.8 q0, q3\n"
3016                 "AESIMC.8 q0, q0\n"
3017                 "VLD1.32 {q1}, [%[Key]]!  \n"
3018                 "AESD.8 q0, q4\n"
3019                 "AESIMC.8 q0, q0\n"
3020                 "VLD1.32 {q2}, [%[Key]]!  \n"
3021                 "AESD.8 q0, q1\n"
3022                 "AESIMC.8 q0, q0\n"
3023                 "AESD.8 q0, q2\n"
3024 
3025                 "MOV r12, %[R] \n"
3026                 "CMP r12, #10  \n"
3027                 "BEQ 1f \n"
3028                 "VLD1.32 {q1}, [%[Key]]!  \n"
3029                 "AESIMC.8 q0, q0\n"
3030                 "VLD1.32 {q2}, [%[Key]]!  \n"
3031                 "AESD.8 q0, q1\n"
3032                 "AESIMC.8 q0, q0\n"
3033                 "AESD.8 q0, q2\n"
3034 
3035                 "CMP r12, #12  \n"
3036                 "BEQ 1f \n"
3037                 "VLD1.32 {q1}, [%[Key]]!  \n"
3038                 "AESIMC.8 q0, q0\n"
3039                 "VLD1.32 {q2}, [%[Key]]!  \n"
3040                 "AESD.8 q0, q1\n"
3041                 "AESIMC.8 q0, q0\n"
3042                 "AESD.8 q0, q2\n"
3043 
3044                 "#Final AddRoundKey then store result \n"
3045                 "1: \n"
3046                 "VLD1.32 {q1}, [%[Key]]! \n"
3047                 "VEOR.32 q0, q0, q1\n"
3048                 "VST1.32 {q0}, [%[CtrOut]]    \n"
3049 
3050                 :[CtrOut] "=r" (outBlock), "=r" (keyPt), "=r" (aes->rounds),
3051                  "=r" (inBlock)
3052                 :"0" (outBlock), [Key] "1" (keyPt), [R] "2" (aes->rounds),
3053                  [CtrIn] "3" (inBlock)
3054                 : "cc", "memory", "r12", "q0", "q1", "q2", "q3", "q4"
3055             );
3056 
3057         return 0;
3058 }
3059     #endif /* HAVE_AES_DECRYPT */
3060 #endif /* DIRECT or COUNTER */
3061 
3062 /* AES-CBC */
3063 #ifdef HAVE_AES_CBC
wc_AesCbcEncrypt(Aes * aes,byte * out,const byte * in,word32 sz)3064     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
3065     {
3066         word32 numBlocks = sz / AES_BLOCK_SIZE;
3067 
3068         if (aes == NULL || out == NULL || in == NULL) {
3069             return BAD_FUNC_ARG;
3070         }
3071 
3072         if (sz == 0) {
3073             return 0;
3074         }
3075 
3076 #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
3077         if (sz % AES_BLOCK_SIZE) {
3078             return BAD_LENGTH_E;
3079         }
3080 #endif
3081 
3082         /* do as many block size ops as possible */
3083         if (numBlocks > 0) {
3084             word32* keyPt = aes->key;
3085             word32* regPt = aes->reg;
3086             /*
3087             AESE exor's input with round key
3088             shift rows of exor'ed result
3089             sub bytes for shifted rows
3090 
3091             note: grouping AESE & AESMC together as pairs reduces latency
3092             */
3093             switch(aes->rounds) {
3094 #ifdef WOLFSSL_AES_128
3095             case 10: /* AES 128 BLOCK */
3096                 __asm__ __volatile__ (
3097                 "MOV r11, %[blocks] \n"
3098                 "VLD1.32 {q1}, [%[Key]]!  \n"
3099                 "VLD1.32 {q2}, [%[Key]]!  \n"
3100                 "VLD1.32 {q3}, [%[Key]]!  \n"
3101                 "VLD1.32 {q4}, [%[Key]]!  \n"
3102                 "VLD1.32 {q5}, [%[Key]]!  \n"
3103                 "VLD1.32 {q6}, [%[Key]]!  \n"
3104                 "VLD1.32 {q7}, [%[Key]]!  \n"
3105                 "VLD1.32 {q8}, [%[Key]]!  \n"
3106                 "VLD1.32 {q9}, [%[Key]]!  \n"
3107                 "VLD1.32 {q10}, [%[Key]]! \n"
3108                 "VLD1.32 {q11}, [%[Key]]! \n"
3109                 "VLD1.32 {q0}, [%[reg]]   \n"
3110                 "VLD1.32 {q12}, [%[input]]!\n"
3111 
3112                 "1:\n"
3113                 "#CBC operations, xorbuf in with current aes->reg \n"
3114                 "VEOR.32 q0, q0, q12 \n"
3115                 "AESE.8 q0, q1 \n"
3116                 "AESMC.8 q0, q0\n"
3117                 "AESE.8 q0, q2 \n"
3118                 "AESMC.8 q0, q0\n"
3119                 "AESE.8 q0, q3 \n"
3120                 "AESMC.8 q0, q0\n"
3121                 "AESE.8 q0, q4 \n"
3122                 "AESMC.8 q0, q0\n"
3123                 "AESE.8 q0, q5 \n"
3124                 "AESMC.8 q0, q0\n"
3125                 "AESE.8 q0, q6 \n"
3126                 "AESMC.8 q0, q0\n"
3127                 "AESE.8 q0, q7 \n"
3128                 "AESMC.8 q0, q0\n"
3129                 "AESE.8 q0, q8 \n"
3130                 "AESMC.8 q0, q0\n"
3131                 "AESE.8 q0, q9 \n"
3132                 "AESMC.8 q0, q0\n"
3133                 "AESE.8 q0, q10\n"
3134                 "VEOR.32 q0, q0, q11 \n"
3135                 "SUB r11, r11, #1    \n"
3136                 "VST1.32 {q0}, [%[out]]!   \n"
3137 
3138                 "CMP r11, #0   \n"
3139                 "BEQ 2f \n"
3140                 "VLD1.32 {q12}, [%[input]]! \n"
3141                 "B 1b \n"
3142 
3143                 "2:\n"
3144                 "#store current counter value at the end \n"
3145                 "VST1.32 {q0}, [%[regOut]] \n"
3146 
3147                 :[out] "=r" (out), [regOut] "=r" (regPt)
3148                 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3149                  [blocks] "r" (numBlocks), [reg] "1" (regPt)
3150                 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3151                 "q6", "q7", "q8", "q9", "q10", "q11", "q12"
3152                 );
3153                 break;
3154 #endif /* WOLFSSL_AES_128 */
3155 #ifdef WOLFSSL_AES_192
3156             case 12: /* AES 192 BLOCK */
3157                 __asm__ __volatile__ (
3158                 "MOV r11, %[blocks] \n"
3159                 "VLD1.32 {q1}, [%[Key]]!  \n"
3160                 "VLD1.32 {q2}, [%[Key]]!  \n"
3161                 "VLD1.32 {q3}, [%[Key]]!  \n"
3162                 "VLD1.32 {q4}, [%[Key]]!  \n"
3163                 "VLD1.32 {q5}, [%[Key]]!  \n"
3164                 "VLD1.32 {q6}, [%[Key]]!  \n"
3165                 "VLD1.32 {q7}, [%[Key]]!  \n"
3166                 "VLD1.32 {q8}, [%[Key]]!  \n"
3167                 "VLD1.32 {q9}, [%[Key]]!  \n"
3168                 "VLD1.32 {q10}, [%[Key]]! \n"
3169                 "VLD1.32 {q11}, [%[Key]]! \n"
3170                 "VLD1.32 {q0}, [%[reg]]   \n"
3171                 "VLD1.32 {q12}, [%[input]]!\n"
3172                 "VLD1.32 {q13}, [%[Key]]!  \n"
3173                 "VLD1.32 {q14}, [%[Key]]!  \n"
3174 
3175                 "1:\n"
3176                 "#CBC operations, xorbuf in with current aes->reg \n"
3177                 "VEOR.32 q0, q0, q12 \n"
3178                 "AESE.8 q0, q1 \n"
3179                 "AESMC.8 q0, q0\n"
3180                 "AESE.8 q0, q2 \n"
3181                 "AESMC.8 q0, q0\n"
3182                 "AESE.8 q0, q3 \n"
3183                 "AESMC.8 q0, q0\n"
3184                 "AESE.8 q0, q4 \n"
3185                 "AESMC.8 q0, q0\n"
3186                 "AESE.8 q0, q5 \n"
3187                 "AESMC.8 q0, q0\n"
3188                 "AESE.8 q0, q6 \n"
3189                 "AESMC.8 q0, q0\n"
3190                 "AESE.8 q0, q7 \n"
3191                 "AESMC.8 q0, q0\n"
3192                 "AESE.8 q0, q8 \n"
3193                 "AESMC.8 q0, q0\n"
3194                 "AESE.8 q0, q9 \n"
3195                 "AESMC.8 q0, q0\n"
3196                 "AESE.8 q0, q10 \n"
3197                 "AESMC.8 q0, q0\n"
3198                 "AESE.8 q0, q11 \n"
3199                 "AESMC.8 q0, q0\n"
3200                 "AESE.8 q0, q13\n"
3201                 "VEOR.32 q0, q0, q14 \n"
3202                 "SUB r11, r11, #1    \n"
3203                 "VST1.32 {q0}, [%[out]]!   \n"
3204 
3205                 "CMP r11, #0   \n"
3206                 "BEQ 2f \n"
3207                 "VLD1.32 {q12}, [%[input]]! \n"
3208                 "B 1b \n"
3209 
3210                 "2:\n"
3211                 "#store current counter qalue at the end \n"
3212                 "VST1.32 {q0}, [%[regOut]] \n"
3213 
3214                 :[out] "=r" (out), [regOut] "=r" (regPt)
3215                 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3216                  [blocks] "r" (numBlocks), [reg] "1" (regPt)
3217                 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3218                 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14"
3219                 );
3220                 break;
3221 #endif /* WOLFSSL_AES_192 */
3222 #ifdef WOLFSSL_AES_256
3223             case 14: /* AES 256 BLOCK */
3224                 __asm__ __volatile__ (
3225                 "MOV r11, %[blocks] \n"
3226                 "VLD1.32 {q1}, [%[Key]]!  \n"
3227                 "VLD1.32 {q2}, [%[Key]]!  \n"
3228                 "VLD1.32 {q3}, [%[Key]]!  \n"
3229                 "VLD1.32 {q4}, [%[Key]]!  \n"
3230                 "VLD1.32 {q5}, [%[Key]]!  \n"
3231                 "VLD1.32 {q6}, [%[Key]]!  \n"
3232                 "VLD1.32 {q7}, [%[Key]]!  \n"
3233                 "VLD1.32 {q8}, [%[Key]]!  \n"
3234                 "VLD1.32 {q9}, [%[Key]]!  \n"
3235                 "VLD1.32 {q10}, [%[Key]]! \n"
3236                 "VLD1.32 {q11}, [%[Key]]! \n"
3237                 "VLD1.32 {q0}, [%[reg]]   \n"
3238                 "VLD1.32 {q12}, [%[input]]!\n"
3239                 "VLD1.32 {q13}, [%[Key]]!  \n"
3240                 "VLD1.32 {q14}, [%[Key]]!  \n"
3241 
3242                 "1:\n"
3243                 "#CBC operations, xorbuf in with current aes->reg \n"
3244                 "VEOR.32 q0, q0, q12 \n"
3245                 "AESE.8 q0, q1 \n"
3246                 "AESMC.8 q0, q0\n"
3247                 "AESE.8 q0, q2 \n"
3248                 "AESMC.8 q0, q0\n"
3249                 "AESE.8 q0, q3 \n"
3250                 "AESMC.8 q0, q0\n"
3251                 "AESE.8 q0, q4 \n"
3252                 "AESMC.8 q0, q0\n"
3253                 "AESE.8 q0, q5 \n"
3254                 "AESMC.8 q0, q0\n"
3255                 "AESE.8 q0, q6 \n"
3256                 "AESMC.8 q0, q0\n"
3257                 "AESE.8 q0, q7 \n"
3258                 "AESMC.8 q0, q0\n"
3259                 "AESE.8 q0, q8 \n"
3260                 "AESMC.8 q0, q0\n"
3261                 "AESE.8 q0, q9 \n"
3262                 "AESMC.8 q0, q0\n"
3263                 "AESE.8 q0, q10 \n"
3264                 "AESMC.8 q0, q0\n"
3265                 "AESE.8 q0, q11 \n"
3266                 "AESMC.8 q0, q0\n"
3267                 "AESE.8 q0, q13 \n"
3268                 "AESMC.8 q0, q0\n"
3269                 "VLD1.32 {q15}, [%[Key]]!  \n"
3270                 "AESE.8 q0, q14 \n"
3271                 "AESMC.8 q0, q0\n"
3272                 "AESE.8 q0, q15\n"
3273                 "VLD1.32 {q15}, [%[Key]]   \n"
3274                 "VEOR.32 q0, q0, q15 \n"
3275                 "SUB r11, r11, #1    \n"
3276                 "VST1.32 {q0}, [%[out]]!   \n"
3277                 "SUB %[Key], %[Key], #16   \n"
3278 
3279                 "CMP r11, #0   \n"
3280                 "BEQ 2f \n"
3281                 "VLD1.32 {q12}, [%[input]]! \n"
3282                 "B 1b \n"
3283 
3284                 "2:\n"
3285                 "#store current counter qalue at the end \n"
3286                 "VST1.32 {q0}, [%[regOut]] \n"
3287 
3288                 :[out] "=r" (out), [regOut] "=r" (regPt), "=r" (keyPt)
3289                 :"0" (out), [Key] "2" (keyPt), [input] "r" (in),
3290                  [blocks] "r" (numBlocks), [reg] "1" (regPt)
3291                 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3292                 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
3293                 );
3294                 break;
3295 #endif /* WOLFSSL_AES_256 */
3296             default:
3297                 WOLFSSL_MSG("Bad AES-CBC round value");
3298                 return BAD_FUNC_ARG;
3299             }
3300         }
3301 
3302         return 0;
3303     }
3304 
3305     #ifdef HAVE_AES_DECRYPT
wc_AesCbcDecrypt(Aes * aes,byte * out,const byte * in,word32 sz)3306     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
3307     {
3308         word32 numBlocks = sz / AES_BLOCK_SIZE;
3309 
3310         if (aes == NULL || out == NULL || in == NULL) {
3311             return BAD_FUNC_ARG;
3312         }
3313 
3314         if (sz == 0) {
3315             return 0;
3316         }
3317 
3318         if (sz % AES_BLOCK_SIZE) {
3319 #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS
3320             return BAD_LENGTH_E;
3321 #else
3322             return BAD_FUNC_ARG;
3323 #endif
3324         }
3325 
3326         /* do as many block size ops as possible */
3327         if (numBlocks > 0) {
3328             word32* keyPt = aes->key;
3329             word32* regPt = aes->reg;
3330             switch(aes->rounds) {
3331 #ifdef WOLFSSL_AES_128
3332             case 10: /* AES 128 BLOCK */
3333                 __asm__ __volatile__ (
3334                 "MOV r11, %[blocks] \n"
3335                 "VLD1.32 {q1}, [%[Key]]!  \n"
3336                 "VLD1.32 {q2}, [%[Key]]!  \n"
3337                 "VLD1.32 {q3}, [%[Key]]!  \n"
3338                 "VLD1.32 {q4}, [%[Key]]!  \n"
3339                 "VLD1.32 {q5}, [%[Key]]!  \n"
3340                 "VLD1.32 {q6}, [%[Key]]!  \n"
3341                 "VLD1.32 {q7}, [%[Key]]!  \n"
3342                 "VLD1.32 {q8}, [%[Key]]!  \n"
3343                 "VLD1.32 {q9}, [%[Key]]!  \n"
3344                 "VLD1.32 {q10}, [%[Key]]! \n"
3345                 "VLD1.32 {q11}, [%[Key]]! \n"
3346                 "VLD1.32 {q13}, [%[reg]]  \n"
3347                 "VLD1.32 {q0}, [%[input]]!\n"
3348 
3349                 "1:\n"
3350                 "VMOV.32 q12, q0 \n"
3351                 "AESD.8 q0, q1\n"
3352                 "AESIMC.8 q0, q0\n"
3353                 "AESD.8 q0, q2\n"
3354                 "AESIMC.8 q0, q0\n"
3355                 "AESD.8 q0, q3\n"
3356                 "AESIMC.8 q0, q0\n"
3357                 "AESD.8 q0, q4\n"
3358                 "AESIMC.8 q0, q0\n"
3359                 "AESD.8 q0, q5\n"
3360                 "AESIMC.8 q0, q0\n"
3361                 "AESD.8 q0, q6\n"
3362                 "AESIMC.8 q0, q0\n"
3363                 "AESD.8 q0, q7\n"
3364                 "AESIMC.8 q0, q0\n"
3365                 "AESD.8 q0, q8\n"
3366                 "AESIMC.8 q0, q0\n"
3367                 "AESD.8 q0, q9\n"
3368                 "AESIMC.8 q0, q0\n"
3369                 "AESD.8 q0, q10\n"
3370                 "VEOR.32 q0, q0, q11\n"
3371 
3372                 "VEOR.32 q0, q0, q13\n"
3373                 "SUB r11, r11, #1            \n"
3374                 "VST1.32 {q0}, [%[out]]!  \n"
3375                 "VMOV.32 q13, q12        \n"
3376 
3377                 "CMP r11, #0 \n"
3378                 "BEQ 2f \n"
3379                 "VLD1.32 {q0}, [%[input]]!  \n"
3380                 "B 1b      \n"
3381 
3382                 "2: \n"
3383                 "#store current counter qalue at the end \n"
3384                 "VST1.32 {q13}, [%[regOut]] \n"
3385 
3386                 :[out] "=r" (out), [regOut] "=r" (regPt)
3387                 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3388                  [blocks] "r" (numBlocks), [reg] "1" (regPt)
3389                 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3390                 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13"
3391                 );
3392                 break;
3393 #endif /* WOLFSSL_AES_128 */
3394 #ifdef WOLFSSL_AES_192
3395             case 12: /* AES 192 BLOCK */
3396                 __asm__ __volatile__ (
3397                 "MOV r11, %[blocks] \n"
3398                 "VLD1.32 {q1}, [%[Key]]!  \n"
3399                 "VLD1.32 {q2}, [%[Key]]!  \n"
3400                 "VLD1.32 {q3}, [%[Key]]!  \n"
3401                 "VLD1.32 {q4}, [%[Key]]!  \n"
3402                 "VLD1.32 {q5}, [%[Key]]!  \n"
3403                 "VLD1.32 {q6}, [%[Key]]!  \n"
3404                 "VLD1.32 {q7}, [%[Key]]!  \n"
3405                 "VLD1.32 {q8}, [%[Key]]!  \n"
3406                 "VLD1.32 {q9}, [%[Key]]!  \n"
3407                 "VLD1.32 {q10}, [%[Key]]! \n"
3408                 "VLD1.32 {q11}, [%[Key]]! \n"
3409                 "VLD1.32 {q12}, [%[Key]]! \n"
3410                 "VLD1.32 {q13}, [%[Key]]! \n"
3411                 "VLD1.32 {q14}, [%[reg]]  \n"
3412                 "VLD1.32 {q0}, [%[input]]!\n"
3413 
3414                 "1:    \n"
3415                 "VMOV.32 q15, q0 \n"
3416                 "AESD.8 q0, q1\n"
3417                 "AESIMC.8 q0, q0\n"
3418                 "AESD.8 q0, q2\n"
3419                 "AESIMC.8 q0, q0\n"
3420                 "AESD.8 q0, q3\n"
3421                 "AESIMC.8 q0, q0\n"
3422                 "AESD.8 q0, q4\n"
3423                 "AESIMC.8 q0, q0\n"
3424                 "AESD.8 q0, q5\n"
3425                 "AESIMC.8 q0, q0\n"
3426                 "AESD.8 q0, q6\n"
3427                 "AESIMC.8 q0, q0\n"
3428                 "AESD.8 q0, q7\n"
3429                 "AESIMC.8 q0, q0\n"
3430                 "AESD.8 q0, q8\n"
3431                 "AESIMC.8 q0, q0\n"
3432                 "AESD.8 q0, q9\n"
3433                 "AESIMC.8 q0, q0\n"
3434                 "AESD.8 q0, q10\n"
3435                 "AESIMC.8 q0, q0\n"
3436                 "AESD.8 q0, q11\n"
3437                 "AESIMC.8 q0, q0\n"
3438                 "AESD.8 q0, q12\n"
3439                 "VEOR.32 q0, q0, q13\n"
3440 
3441                 "VEOR.32 q0, q0, q14\n"
3442                 "SUB r11, r11, #1        \n"
3443                 "VST1.32 {q0}, [%[out]]! \n"
3444                 "VMOV.32 q14, q15        \n"
3445 
3446                 "CMP r11, #0 \n"
3447                 "BEQ 2f \n"
3448                 "VLD1.32 {q0}, [%[input]]!  \n"
3449                 "B 1b \n"
3450 
3451                 "2:\n"
3452                 "#store current counter value at the end \n"
3453                 "VST1.32 {q15}, [%[regOut]] \n"
3454 
3455                 :[out] "=r" (out), [regOut] "=r" (regPt)
3456                 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3457                  [blocks] "r" (numBlocks), [reg] "1" (regPt)
3458                 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3459                 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
3460                 );
3461                 break;
3462 #endif /* WOLFSSL_AES_192 */
3463 #ifdef WOLFSSL_AES_256
3464             case 14: /* AES 256 BLOCK */
3465                 __asm__ __volatile__ (
3466                 "MOV r11, %[blocks] \n"
3467                 "VLD1.32 {q1}, [%[Key]]!  \n"
3468                 "VLD1.32 {q2}, [%[Key]]!  \n"
3469                 "VLD1.32 {q3}, [%[Key]]!  \n"
3470                 "VLD1.32 {q4}, [%[Key]]!  \n"
3471                 "VLD1.32 {q5}, [%[Key]]!  \n"
3472                 "VLD1.32 {q6}, [%[Key]]!  \n"
3473                 "VLD1.32 {q7}, [%[Key]]!  \n"
3474                 "VLD1.32 {q8}, [%[Key]]!  \n"
3475                 "VLD1.32 {q9}, [%[Key]]!  \n"
3476                 "VLD1.32 {q10}, [%[Key]]! \n"
3477                 "VLD1.32 {q11}, [%[Key]]! \n"
3478                 "VLD1.32 {q12}, [%[Key]]! \n"
3479                 "VLD1.32 {q14}, [%[reg]]  \n"
3480                 "VLD1.32 {q0}, [%[input]]!\n"
3481 
3482                 "1:\n"
3483                 "VMOV.32 q15, q0 \n"
3484                 "AESD.8 q0, q1\n"
3485                 "AESIMC.8 q0, q0\n"
3486                 "AESD.8 q0, q2\n"
3487                 "AESIMC.8 q0, q0\n"
3488                 "AESD.8 q0, q3\n"
3489                 "AESIMC.8 q0, q0\n"
3490                 "AESD.8 q0, q4\n"
3491                 "AESIMC.8 q0, q0\n"
3492                 "AESD.8 q0, q5\n"
3493                 "AESIMC.8 q0, q0\n"
3494                 "AESD.8 q0, q6\n"
3495                 "AESIMC.8 q0, q0\n"
3496                 "AESD.8 q0, q7\n"
3497                 "AESIMC.8 q0, q0\n"
3498                 "AESD.8 q0, q8\n"
3499                 "AESIMC.8 q0, q0\n"
3500                 "AESD.8 q0, q9\n"
3501                 "AESIMC.8 q0, q0\n"
3502                 "AESD.8 q0, q10\n"
3503                 "AESIMC.8 q0, q0\n"
3504                 "AESD.8 q0, q11\n"
3505                 "AESIMC.8 q0, q0\n"
3506                 "VLD1.32 {q13}, [%[Key]]!  \n"
3507                 "AESD.8 q0, q12\n"
3508                 "AESIMC.8 q0, q0\n"
3509                 "AESD.8 q0, q13\n"
3510                 "AESIMC.8 q0, q0\n"
3511                 "VLD1.32 {q13}, [%[Key]]!  \n"
3512                 "AESD.8 q0, q13\n"
3513                 "VLD1.32 {q13}, [%[Key]]  \n"
3514                 "VEOR.32 q0, q0, q13\n"
3515                 "SUB %[Key], %[Key], #32 \n"
3516 
3517                 "VEOR.32 q0, q0, q14\n"
3518                 "SUB r11, r11, #1            \n"
3519                 "VST1.32 {q0}, [%[out]]!  \n"
3520                 "VMOV.32 q14, q15        \n"
3521 
3522                 "CMP r11, #0 \n"
3523                 "BEQ 2f \n"
3524                 "VLD1.32 {q0}, [%[input]]!  \n"
3525                 "B 1b \n"
3526 
3527                 "2:\n"
3528                 "#store current counter value at the end \n"
3529                 "VST1.32 {q15}, [%[regOut]] \n"
3530 
3531                 :[out] "=r" (out), [regOut] "=r" (regPt)
3532                 :"0" (out), [Key] "r" (keyPt), [input] "r" (in),
3533                  [blocks] "r" (numBlocks), [reg] "1" (regPt)
3534                 : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3535                 "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
3536                 );
3537                 break;
3538 #endif /* WOLFSSL_AES_256 */
3539             default:
3540                 WOLFSSL_MSG("Bad AES-CBC round value");
3541                 return BAD_FUNC_ARG;
3542             }
3543         }
3544 
3545         return 0;
3546     }
3547     #endif
3548 
3549 #endif /* HAVE_AES_CBC */
3550 
3551 /* AES-CTR */
3552 #ifdef WOLFSSL_AES_COUNTER
3553 
3554         /* Increment AES counter */
IncrementAesCounter(byte * inOutCtr)3555         static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
3556         {
3557             int i;
3558 
3559             /* in network byte order so start at end and work back */
3560             for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
3561                 if (++inOutCtr[i])  /* we're done unless we overflow */
3562                     return;
3563             }
3564         }
3565 
wc_AesCtrEncrypt(Aes * aes,byte * out,const byte * in,word32 sz)3566         int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
3567         {
3568             byte* tmp;
3569             word32 numBlocks;
3570 
3571             if (aes == NULL || out == NULL || in == NULL) {
3572                 return BAD_FUNC_ARG;
3573             }
3574 
3575             tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
3576 
3577             /* consume any unused bytes left in aes->tmp */
3578             while (aes->left && sz) {
3579                *(out++) = *(in++) ^ *(tmp++);
3580                aes->left--;
3581                sz--;
3582             }
3583 
3584             /* do as many block size ops as possible */
3585             numBlocks = sz/AES_BLOCK_SIZE;
3586             if (numBlocks > 0) {
3587                 /* pointer needed because it is incremented when read, causing
3588                  * an issue with call to encrypt/decrypt leftovers */
3589                 word32*  keyPt  = aes->key;
3590                 word32*  regPt  = aes->reg;
3591                 sz           -= numBlocks * AES_BLOCK_SIZE;
3592                 switch(aes->rounds) {
3593 #ifdef WOLFSSL_AES_128
3594                 case 10: /* AES 128 BLOCK */
3595                     __asm__ __volatile__ (
3596                     "MOV r11, %[blocks] \n"
3597                     "VLDM %[Key]!, {q1-q4} \n"
3598 
3599                     "#Create vector with the value 1  \n"
3600                     "VMOV.u32 q15, #1                 \n"
3601                     "VSHR.u64 q15, q15, #32  \n"
3602                     "VLDM %[Key]!, {q5-q8} \n"
3603                     "VEOR.32 q14, q14, q14    \n"
3604                     "VLDM %[Key]!, {q9-q11} \n"
3605                     "VEXT.8 q14, q15, q14, #8\n"
3606 
3607                     "VLD1.32 {q13}, [%[reg]]\n"
3608 
3609                     /* double block */
3610                     "1:      \n"
3611                     "CMP r11, #1 \n"
3612                     "BEQ 2f    \n"
3613                     "CMP r11, #0 \n"
3614                     "BEQ 3f    \n"
3615 
3616                     "VMOV.32 q0, q13  \n"
3617                     "AESE.8 q0, q1\n"
3618                     "AESMC.8 q0, q0\n"
3619                     "VREV64.8 q13, q13 \n" /* network order */
3620                     "AESE.8 q0, q2\n"
3621                     "AESMC.8 q0, q0\n"
3622                     "VEXT.8 q13, q13, q13, #8 \n"
3623                     "SUB r11, r11, #2     \n"
3624                     "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */
3625                     "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */
3626                     "AESE.8 q0, q3\n"
3627                     "AESMC.8 q0, q0\n"
3628                     "VEXT.8 q15, q15, q15, #8 \n"
3629                     "VEXT.8 q13, q13, q13, #8 \n"
3630                     "AESE.8 q0, q4\n"
3631                     "AESMC.8 q0, q0\n"
3632                     "VREV64.8 q15, q15\n" /* revert from network order */
3633                     "VREV64.8 q13, q13\n" /* revert from network order */
3634                     "AESE.8 q0, q5\n"
3635                     "AESMC.8 q0, q0\n"
3636                     "AESE.8 q15, q1\n"
3637                     "AESMC.8 q15, q15\n"
3638 
3639                     "AESE.8 q0, q6\n"
3640                     "AESMC.8 q0, q0\n"
3641                     "AESE.8 q15, q2\n"
3642                     "AESMC.8 q15, q15\n"
3643 
3644                     "AESE.8 q0, q7\n"
3645                     "AESMC.8 q0, q0\n"
3646                     "AESE.8 q15, q3\n"
3647                     "AESMC.8 q15, q15\n"
3648 
3649                     "AESE.8 q0, q8\n"
3650                     "AESMC.8 q0, q0\n"
3651                     "AESE.8 q15, q4\n"
3652                     "AESMC.8 q15, q15\n"
3653 
3654                     "AESE.8 q0, q9\n"
3655                     "AESMC.8 q0, q0\n"
3656                     "AESE.8 q15, q5\n"
3657                     "AESMC.8 q15, q15\n"
3658 
3659                     "AESE.8 q0, q10\n"
3660                     "AESE.8 q15, q6\n"
3661                     "AESMC.8 q15, q15\n"
3662                     "VEOR.32 q0, q0, q11\n"
3663 
3664                     "AESE.8 q15, q7\n"
3665                     "AESMC.8 q15, q15\n"
3666                     "VLD1.32 {q12}, [%[input]]!  \n"
3667                     "AESE.8 q15, q8\n"
3668                     "AESMC.8 q15, q15\n"
3669 
3670                     "VEOR.32 q0, q0, q12\n"
3671                     "AESE.8 q15, q9\n"
3672                     "AESMC.8 q15, q15\n"
3673 
3674                     "VLD1.32 {q12}, [%[input]]!  \n"
3675                     "AESE.8 q15, q10\n"
3676                     "VST1.32 {q0}, [%[out]]!  \n"
3677                     "VEOR.32 q15, q15, q11\n"
3678                     "VEOR.32 q15, q15, q12\n"
3679                     "VST1.32 {q15}, [%[out]]!  \n"
3680 
3681                     "B 1b \n"
3682 
3683                     /* single block */
3684                     "2:      \n"
3685                     "VMOV.32 q0, q13  \n"
3686                     "AESE.8 q0, q1\n"
3687                     "AESMC.8 q0, q0\n"
3688                     "VREV64.8 q13, q13 \n" /* network order */
3689                     "AESE.8 q0, q2\n"
3690                     "AESMC.8 q0, q0\n"
3691                     "VEXT.8 q13, q13, q13, #8 \n"
3692                     "AESE.8 q0, q3\n"
3693                     "AESMC.8 q0, q0\n"
3694                     "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */
3695                     "AESE.8 q0, q4\n"
3696                     "AESMC.8 q0, q0\n"
3697                     "SUB r11, r11, #1     \n"
3698                     "AESE.8 q0, q5\n"
3699                     "AESMC.8 q0, q0\n"
3700                     "VEXT.8 q13, q13, q13, #8 \n"
3701                     "AESE.8 q0, q6\n"
3702                     "AESMC.8 q0, q0\n"
3703                     "VREV64.8 q13, q13\n" /* revert from network order */
3704                     "AESE.8 q0, q7\n"
3705                     "AESMC.8 q0, q0\n"
3706                     "AESE.8 q0, q8\n"
3707                     "AESMC.8 q0, q0\n"
3708                     "AESE.8 q0, q9\n"
3709                     "AESMC.8 q0, q0\n"
3710                     "AESE.8 q0, q10\n"
3711                     "VLD1.32 {q12}, [%[input]]!  \n"
3712                     "VEOR.32 q0, q0, q11\n"
3713                     "#CTR operations, increment counter and xorbuf \n"
3714                     "VEOR.32 q0, q0, q12\n"
3715                     "VST1.32 {q0}, [%[out]]!  \n"
3716 
3717                     "3: \n"
3718                     "#store current counter qalue at the end \n"
3719                     "VST1.32 {q13}, [%[regOut]]   \n"
3720 
3721                     :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt),
3722                      "=r" (in)
3723                     :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
3724                      [blocks] "r" (numBlocks), [reg] "2" (regPt)
3725                     : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3726                     "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14", "q15"
3727                     );
3728                     break;
3729 #endif /* WOLFSSL_AES_128 */
3730 #ifdef WOLFSSL_AES_192
3731                 case 12: /* AES 192 BLOCK */
3732                     __asm__ __volatile__ (
3733                     "MOV r11, %[blocks] \n"
3734                     "VLDM %[Key]!, {q1-q4} \n"
3735 
3736                     "#Create vector with the value 1  \n"
3737                     "VMOV.u32 q15, #1                 \n"
3738                     "VSHR.u64 q15, q15, #32  \n"
3739                     "VLDM %[Key]!, {q5-q8} \n"
3740                     "VEOR.32 q14, q14, q14    \n"
3741                     "VEXT.8 q14, q15, q14, #8\n"
3742 
3743                     "VLDM %[Key]!, {q9-q10} \n"
3744                     "VLD1.32 {q13}, [%[reg]]\n"
3745 
3746                     /* double block */
3747                     "1:   \n"
3748                     "CMP r11, #1 \n"
3749                     "BEQ 2f \n"
3750                     "CMP r11, #0 \n"
3751                     "BEQ 3f   \n"
3752 
3753                     "VMOV.32 q0, q13\n"
3754                     "AESE.8 q0, q1\n"
3755                     "AESMC.8 q0, q0\n"
3756                     "VREV64.8 q13, q13 \n" /* network order */
3757                     "AESE.8 q0, q2\n"
3758                     "AESMC.8 q0, q0\n"
3759                     "VEXT.8 q13, q13, q13, #8 \n"
3760                     "SUB r11, r11, #2     \n"
3761                     "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */
3762                     "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */
3763                     "AESE.8 q0, q3\n"
3764                     "AESMC.8 q0, q0\n"
3765                     "VEXT.8 q15, q15, q15, #8 \n"
3766                     "VEXT.8 q13, q13, q13, #8 \n"
3767                     "AESE.8 q0, q4\n"
3768                     "AESMC.8 q0, q0\n"
3769                     "VREV64.8 q15, q15\n" /* revert from network order */
3770                     "VREV64.8 q13, q13\n" /* revert from network order */
3771                     "AESE.8 q0, q5\n"
3772                     "AESMC.8 q0, q0\n"
3773                     "AESE.8 q15, q1\n"
3774                     "AESMC.8 q15, q15\n"
3775 
3776                     "AESE.8 q0, q6\n"
3777                     "AESMC.8 q0, q0\n"
3778                     "AESE.8 q15, q2\n"
3779                     "AESMC.8 q15, q15\n"
3780 
3781                     "AESE.8 q0, q7\n"
3782                     "AESMC.8 q0, q0\n"
3783                     "AESE.8 q15, q3\n"
3784                     "AESMC.8 q15, q15\n"
3785 
3786                     "AESE.8 q0, q8\n"
3787                     "AESMC.8 q0, q0\n"
3788                     "AESE.8 q15, q4\n"
3789                     "AESMC.8 q15, q15\n"
3790 
3791                     "AESE.8 q0, q9\n"
3792                     "AESMC.8 q0, q0\n"
3793                     "AESE.8 q15, q5\n"
3794                     "AESMC.8 q15, q15\n"
3795 
3796                     "AESE.8 q0, q10\n"
3797                     "AESMC.8 q0, q0\n"
3798                     "VLD1.32 {q11}, [%[Key]]! \n"
3799                     "AESE.8 q15, q6\n"
3800                     "AESMC.8 q15, q15\n"
3801 
3802                     "AESE.8 q0, q11\n"
3803                     "AESMC.8 q0, q0\n"
3804                     "AESE.8 q15, q7\n"
3805                     "AESMC.8 q15, q15\n"
3806 
3807                     "AESE.8 q15, q8\n"
3808                     "AESMC.8 q15, q15\n"
3809 
3810                     "VLD1.32 {q12}, [%[Key]]! \n"
3811                     "AESE.8 q15, q9\n"
3812                     "AESMC.8 q15, q15\n"
3813                     "AESE.8 q15, q10\n"
3814                     "AESMC.8 q15, q15\n"
3815 
3816                     "AESE.8 q15, q11\n"
3817                     "AESMC.8 q15, q15\n"
3818                     "VLD1.32 {q11}, [%[Key]] \n"
3819                     "AESE.8 q0, q12\n"
3820                     "AESE.8 q15, q12\n"
3821 
3822                     "VLD1.32 {q12}, [%[input]]!  \n"
3823                     "VEOR.32 q0, q0, q11\n"
3824                     "VEOR.32 q15, q15, q11\n"
3825                     "VEOR.32 q0, q0, q12\n"
3826 
3827                     "VLD1.32 {q12}, [%[input]]!  \n"
3828                     "VST1.32 {q0}, [%[out]]!  \n"
3829                     "VEOR.32 q15, q15, q12\n"
3830                     "VST1.32 {q15}, [%[out]]!  \n"
3831                     "SUB %[Key], %[Key], #32 \n"
3832 
3833                     "B 1b \n"
3834 
3835 
3836                     /* single block */
3837                     "2:      \n"
3838                     "VLD1.32 {q11}, [%[Key]]! \n"
3839                     "VMOV.32 q0, q13  \n"
3840                     "AESE.8 q0, q1\n"
3841                     "AESMC.8 q0, q0\n"
3842                     "VREV64.8 q13, q13 \n" /* network order */
3843                     "AESE.8 q0, q2\n"
3844                     "AESMC.8 q0, q0\n"
3845                     "VEXT.8 q13, q13, q13, #8 \n"
3846                     "AESE.8 q0, q3\n"
3847                     "AESMC.8 q0, q0\n"
3848                     "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */
3849                     "AESE.8 q0, q4\n"
3850                     "AESMC.8 q0, q0\n"
3851                     "SUB r11, r11, #1     \n"
3852                     "AESE.8 q0, q5\n"
3853                     "AESMC.8 q0, q0\n"
3854                     "VEXT.8 q13, q13, q13, #8 \n"
3855                     "AESE.8 q0, q6\n"
3856                     "AESMC.8 q0, q0\n"
3857                     "VREV64.8 q13, q13\n" /* revert from network order */
3858                     "AESE.8 q0, q7\n"
3859                     "AESMC.8 q0, q0\n"
3860                     "AESE.8 q0, q8\n"
3861                     "AESMC.8 q0, q0\n"
3862                     "AESE.8 q0, q9\n"
3863                     "AESMC.8 q0, q0\n"
3864                     "AESE.8 q0, q10\n"
3865                     "AESMC.8 q0, q0\n"
3866                     "VLD1.32 {q12}, [%[Key]]! \n"
3867                     "AESE.8 q0, q11\n"
3868                     "AESMC.8 q0, q0\n"
3869                     "VLD1.32 {q11}, [%[Key]] \n"
3870                     "AESE.8 q0, q12\n"
3871                     "VLD1.32 {q12}, [%[input]]! \n"
3872                     "VEOR.32 q0, q0, q11\n"
3873                     "#CTR operations, increment counter and xorbuf \n"
3874                     "VEOR.32 q0, q0, q12\n"
3875                     "VST1.32 {q0}, [%[out]]!  \n"
3876 
3877                     "3: \n"
3878                     "#store current counter qalue at the end \n"
3879                     "VST1.32 {q13}, [%[regOut]]   \n"
3880 
3881                     :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt),
3882                      "=r" (in)
3883                     :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
3884                      [blocks] "r" (numBlocks), [reg] "2" (regPt)
3885                     : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
3886                     "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14"
3887                     );
3888                     break;
3889 #endif /* WOLFSSL_AES_192 */
3890 #ifdef WOLFSSL_AES_256
3891                 case 14: /* AES 256 BLOCK */
3892                     __asm__ __volatile__ (
3893                     "MOV r11, %[blocks] \n"
3894                     "VLDM %[Key]!, {q1-q4} \n"
3895 
3896                     "#Create vector with the value 1  \n"
3897                     "VMOV.u32 q15, #1                 \n"
3898                     "VSHR.u64 q15, q15, #32  \n"
3899                     "VLDM %[Key]!, {q5-q8} \n"
3900                     "VEOR.32 q14, q14, q14    \n"
3901                     "VEXT.8 q14, q15, q14, #8\n"
3902 
3903                     "VLDM %[Key]!, {q9-q10} \n"
3904                     "VLD1.32 {q13}, [%[reg]]\n"
3905 
3906                     /* double block */
3907                     "1:      \n"
3908                     "CMP r11, #1 \n"
3909                     "BEQ 2f    \n"
3910                     "CMP r11, #0 \n"
3911                     "BEQ 3f    \n"
3912 
3913                     "VMOV.32 q0, q13  \n"
3914                     "AESE.8 q0, q1\n"
3915                     "AESMC.8 q0, q0\n"
3916                     "VREV64.8 q13, q13 \n" /* network order */
3917                     "AESE.8 q0, q2\n"
3918                     "AESMC.8 q0, q0\n"
3919                     "VEXT.8 q13, q13, q13, #8 \n"
3920                     "SUB r11, r11, #2     \n"
3921                     "VADD.i32 q15, q13, q14 \n" /* add 1 to counter */
3922                     "VADD.i32 q13, q15, q14 \n" /* add 1 to counter */
3923                     "AESE.8 q0, q3\n"
3924                     "AESMC.8 q0, q0\n"
3925                     "VEXT.8 q15, q15, q15, #8 \n"
3926                     "VEXT.8 q13, q13, q13, #8 \n"
3927                     "AESE.8 q0, q4\n"
3928                     "AESMC.8 q0, q0\n"
3929                     "VREV64.8 q15, q15\n" /* revert from network order */
3930                     "AESE.8 q0, q5\n"
3931                     "AESMC.8 q0, q0\n"
3932                     "VREV64.8 q13, q13\n" /* revert from network order */
3933                     "AESE.8 q15, q1\n"
3934                     "AESMC.8 q15, q15\n"
3935 
3936                     "AESE.8 q0, q6\n"
3937                     "AESMC.8 q0, q0\n"
3938                     "AESE.8 q15, q2\n"
3939                     "AESMC.8 q15, q15\n"
3940 
3941                     "AESE.8 q0, q7\n"
3942                     "AESMC.8 q0, q0\n"
3943                     "AESE.8 q15, q3\n"
3944                     "AESMC.8 q15, q15\n"
3945 
3946                     "AESE.8 q0, q8\n"
3947                     "AESMC.8 q0, q0\n"
3948                     "AESE.8 q15, q4\n"
3949                     "AESMC.8 q15, q15\n"
3950 
3951                     "AESE.8 q0, q9\n"
3952                     "AESMC.8 q0, q0\n"
3953                     "AESE.8 q15, q5\n"
3954                     "AESMC.8 q15, q15\n"
3955 
3956                     "AESE.8 q0, q10\n"
3957                     "AESMC.8 q0, q0\n"
3958                     "VLD1.32 {q11}, [%[Key]]! \n"
3959                     "AESE.8 q15, q6\n"
3960                     "AESMC.8 q15, q15\n"
3961 
3962                     "AESE.8 q0, q11\n"
3963                     "AESMC.8 q0, q0\n"
3964                     "AESE.8 q15, q7\n"
3965                     "AESMC.8 q15, q15\n"
3966 
3967                     "AESE.8 q15, q8\n"
3968                     "AESMC.8 q15, q15\n"
3969 
3970                     "AESE.8 q15, q9\n"
3971                     "AESMC.8 q15, q15\n"
3972                     "VLD1.32 {q12}, [%[Key]]!  \n"
3973                     "AESE.8 q15, q10\n"
3974                     "AESMC.8 q15, q15\n"
3975 
3976                     "AESE.8 q15, q11\n"
3977                     "AESMC.8 q15, q15\n"
3978 
3979                     "VLD1.32 {q11}, [%[Key]]! \n"
3980                     "AESE.8 q0, q12\n" /* rnd 12*/
3981                     "AESMC.8 q0, q0\n"
3982                     "AESE.8 q15, q12\n" /* rnd 12 */
3983                     "AESMC.8 q15, q15\n"
3984 
3985                     "VLD1.32 {q12}, [%[Key]]!  \n"
3986                     "AESE.8 q0, q11\n" /* rnd 13 */
3987                     "AESMC.8 q0, q0\n"
3988                     "AESE.8 q15, q11\n" /* rnd 13 */
3989                     "AESMC.8 q15, q15\n"
3990 
3991                     "VLD1.32 {q11}, [%[Key]] \n"
3992                     "AESE.8 q0, q12\n" /* rnd 14 */
3993                     "AESE.8 q15, q12\n" /* rnd 14 */
3994 
3995                     "VLD1.32 {q12}, [%[input]]!  \n"
3996                     "VEOR.32 q0, q0, q11\n" /* rnd 15 */
3997                     "VEOR.32 q15, q15, q11\n" /* rnd 15 */
3998                     "VEOR.32 q0, q0, q12\n"
3999 
4000                     "VLD1.32 {q12}, [%[input]]!  \n"
4001                     "VST1.32 {q0}, [%[out]]!  \n"
4002                     "VEOR.32 q15, q15, q12\n"
4003                     "VST1.32 {q15}, [%[out]]!  \n"
4004                     "SUB %[Key], %[Key], #64 \n"
4005 
4006                     /* single block */
4007                     "B 1b \n"
4008 
4009                     "2:      \n"
4010                     "VLD1.32 {q11}, [%[Key]]! \n"
4011                     "VMOV.32 q0, q13  \n"
4012                     "AESE.8 q0, q1\n"
4013                     "AESMC.8 q0, q0\n"
4014                     "VREV64.8 q13, q13 \n" /* network order */
4015                     "AESE.8 q0, q2\n"
4016                     "AESMC.8 q0, q0\n"
4017                     "VEXT.8 q13, q13, q13, #8 \n"
4018                     "AESE.8 q0, q3\n"
4019                     "AESMC.8 q0, q0\n"
4020                     "VADD.i32 q13, q13, q14 \n" /* add 1 to counter */
4021                     "AESE.8 q0, q4\n"
4022                     "AESMC.8 q0, q0\n"
4023                     "AESE.8 q0, q5\n"
4024                     "AESMC.8 q0, q0\n"
4025                     "VEXT.8 q13, q13, q13, #8 \n"
4026                     "AESE.8 q0, q6\n"
4027                     "AESMC.8 q0, q0\n"
4028                     "VREV64.8 q13, q13\n" /* revert from network order */
4029                     "AESE.8 q0, q7\n"
4030                     "AESMC.8 q0, q0\n"
4031                     "AESE.8 q0, q8\n"
4032                     "AESMC.8 q0, q0\n"
4033                     "AESE.8 q0, q9\n"
4034                     "AESMC.8 q0, q0\n"
4035                     "AESE.8 q0, q10\n"
4036                     "AESMC.8 q0, q0\n"
4037                     "VLD1.32 {q12}, [%[Key]]! \n"
4038                     "AESE.8 q0, q11\n"
4039                     "AESMC.8 q0, q0\n"
4040                     "VLD1.32 {q11}, [%[Key]]! \n"
4041                     "AESE.8 q0, q12\n" /* rnd 12 */
4042                     "AESMC.8 q0, q0\n"
4043                     "VLD1.32 {q12}, [%[Key]]! \n"
4044                     "AESE.8 q0, q11\n" /* rnd 13 */
4045                     "AESMC.8 q0, q0\n"
4046                     "VLD1.32 {q11}, [%[Key]] \n"
4047                     "AESE.8 q0, q12\n" /* rnd 14 */
4048                     "VLD1.32 {q12}, [%[input]]! \n"
4049                     "VEOR.32 q0, q0, q11\n" /* rnd 15 */
4050                     "#CTR operations, increment counter and xorbuf \n"
4051                     "VEOR.32 q0, q0, q12\n"
4052                     "VST1.32 {q0}, [%[out]]!  \n"
4053 
4054                     "3: \n"
4055                     "#store current counter qalue at the end \n"
4056                     "VST1.32 {q13}, [%[regOut]]   \n"
4057 
4058                     :[out] "=r" (out), "=r" (keyPt), [regOut] "=r" (regPt),
4059                      "=r" (in)
4060                     :"0" (out), [Key] "1" (keyPt), [input] "3" (in),
4061                      [blocks] "r" (numBlocks), [reg] "2" (regPt)
4062                     : "cc", "memory", "r11", "q0", "q1", "q2", "q3", "q4", "q5",
4063                     "q6", "q7", "q8", "q9", "q10","q11","q12","q13","q14"
4064                     );
4065                     break;
4066 #endif /* WOLFSSL_AES_256 */
4067                 default:
4068                     WOLFSSL_MSG("Bad AES-CTR round qalue");
4069                     return BAD_FUNC_ARG;
4070                 }
4071 
4072                 aes->left = 0;
4073             }
4074 
4075             /* handle non block size remaining */
4076             if (sz) {
4077                 wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp);
4078                 IncrementAesCounter((byte*)aes->reg);
4079 
4080                 aes->left = AES_BLOCK_SIZE;
4081                 tmp = (byte*)aes->tmp;
4082 
4083                 while (sz--) {
4084                     *(out++) = *(in++) ^ *(tmp++);
4085                     aes->left--;
4086                 }
4087             }
4088 
4089             return 0;
4090         }
4091 
4092 #endif /* WOLFSSL_AES_COUNTER */
4093 
4094 #ifdef HAVE_AESGCM
4095 /*
4096  * Uses Karatsuba algorithm. Reduction algorithm is based on "Implementing GCM
4097  * on ARMv8". Shifting left to account for bit reflection is based on
4098  * "Carry-Less Multiplication and Its Usage for Computing the GCM mode"
4099  */
GMULT(byte * X,byte * Y)4100 static void GMULT(byte* X, byte* Y)
4101 {
4102     __asm__ __volatile__ (
4103         "VLD1.32 {q0}, [%[x]] \n"
4104 
4105         /* In GCM format bits are big endian, switch location of bytes to
4106          * allow for logical shifts and carries.
4107          */
4108         "VREV64.8 q0, q0 \n"
4109         "VLD1.32 {q1}, [%[y]] \n" /* converted on set key */
4110         "VSWP.8 d0, d1 \n"
4111 
4112         "VMULL.p64  q5, d0, d2 \n"
4113         "VMULL.p64  q6, d1, d3 \n"
4114         "VEOR d15, d2, d3 \n"
4115         "VEOR d14, d0, d1 \n"
4116         "VMULL.p64  q7, d15, d14 \n"
4117         "VEOR q7, q5 \n"
4118         "VEOR q7, q6 \n"
4119         "VEOR d11, d14 \n"
4120         "VEOR d12, d15\n"
4121 
4122         /* shift to left by 1 to account for reflection */
4123         "VMOV q7, q6 \n"
4124         "VSHL.u64 q6, q6, #1 \n"
4125         "VSHR.u64 q7, q7, #63 \n"
4126         "VEOR d13, d14 \n"
4127         "VMOV q8, q5 \n"
4128         "VSHL.u64 q5, q5, #1 \n"
4129         "VSHR.u64 q8, q8, #63 \n"
4130         "VEOR d12, d17 \n"
4131         "VEOR d11, d16 \n"
4132 
4133         /* create constant 0xc200000000000000 */
4134         "VMOV.i32 d16, 0xc2000000 \n"
4135         "VSHL.u64 d16, d16, #32 \n"
4136 
4137         /* reduce product of multiplication */
4138         "VMULL.p64 q9, d10, d16 \n"
4139         "VEOR d11, d18 \n"
4140         "VEOR d12, d19 \n"
4141         "VMULL.p64 q9, d11, d16 \n"
4142         "VEOR q6, q9 \n"
4143         "VEOR q10, q5, q6 \n"
4144 
4145         /* convert to GCM format */
4146         "VREV64.8 q10, q10 \n"
4147         "VSWP.8 d20, d21 \n"
4148 
4149         "VST1.32 {q10}, [%[xOut]] \n"
4150 
4151         : [xOut] "=r" (X), [yOut] "=r" (Y)
4152         : [x] "0" (X), [y] "1" (Y)
4153         : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6" ,"q7", "q8",
4154         "q9", "q10", "q11" ,"q12", "q13", "q14", "q15"
4155     );
4156 }
4157 
4158 
GHASH(Aes * aes,const byte * a,word32 aSz,const byte * c,word32 cSz,byte * s,word32 sSz)4159 void GHASH(Aes* aes, const byte* a, word32 aSz,
4160                                 const byte* c, word32 cSz, byte* s, word32 sSz)
4161 {
4162     byte x[AES_BLOCK_SIZE];
4163     byte scratch[AES_BLOCK_SIZE];
4164     word32 blocks, partial;
4165     byte* h = aes->H;
4166 
4167     XMEMSET(x, 0, AES_BLOCK_SIZE);
4168 
4169     /* Hash in A, the Additional Authentication Data */
4170     if (aSz != 0 && a != NULL) {
4171         blocks = aSz / AES_BLOCK_SIZE;
4172         partial = aSz % AES_BLOCK_SIZE;
4173         while (blocks--) {
4174             xorbuf(x, a, AES_BLOCK_SIZE);
4175             GMULT(x, h);
4176             a += AES_BLOCK_SIZE;
4177         }
4178         if (partial != 0) {
4179             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
4180             XMEMCPY(scratch, a, partial);
4181             xorbuf(x, scratch, AES_BLOCK_SIZE);
4182             GMULT(x, h);
4183         }
4184     }
4185 
4186     /* Hash in C, the Ciphertext */
4187     if (cSz != 0 && c != NULL) {
4188         blocks = cSz / AES_BLOCK_SIZE;
4189         partial = cSz % AES_BLOCK_SIZE;
4190         while (blocks--) {
4191             xorbuf(x, c, AES_BLOCK_SIZE);
4192             GMULT(x, h);
4193             c += AES_BLOCK_SIZE;
4194         }
4195         if (partial != 0) {
4196             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
4197             XMEMCPY(scratch, c, partial);
4198             xorbuf(x, scratch, AES_BLOCK_SIZE);
4199             GMULT(x, h);
4200         }
4201     }
4202 
4203     /* Hash in the lengths of A and C in bits */
4204     FlattenSzInBits(&scratch[0], aSz);
4205     FlattenSzInBits(&scratch[8], cSz);
4206     xorbuf(x, scratch, AES_BLOCK_SIZE);
4207     GMULT(x, h);
4208 
4209     /* Copy the result into s. */
4210     XMEMCPY(s, x, sSz);
4211 }
4212 
4213 
4214 /* Aarch32
4215  * Encrypt and tag data using AES with GCM mode.
4216  * aes: Aes structure having already been set with set key function
4217  * out: encrypted data output buffer
4218  * in:  plain text input buffer
4219  * sz:  size of plain text and out buffer
4220  * iv:  initialization vector
4221  * ivSz:      size of iv buffer
4222  * authTag:   buffer to hold tag
4223  * authTagSz: size of tag buffer
4224  * authIn:    additional data buffer
4225  * authInSz:  size of additional data buffer
4226  */
wc_AesGcmEncrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)4227 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
4228                    const byte* iv, word32 ivSz,
4229                    byte* authTag, word32 authTagSz,
4230                    const byte* authIn, word32 authInSz)
4231 {
4232     word32 blocks = sz / AES_BLOCK_SIZE;
4233     word32 partial = sz % AES_BLOCK_SIZE;
4234     const byte* p = in;
4235     byte* c = out;
4236     byte counter[AES_BLOCK_SIZE];
4237     byte initialCounter[AES_BLOCK_SIZE];
4238     byte *ctr ;
4239     byte scratch[AES_BLOCK_SIZE];
4240     ctr = counter ;
4241 
4242     /* sanity checks */
4243     if (aes == NULL || (iv == NULL && ivSz > 0) ||
4244                        (authTag == NULL) ||
4245                        (authIn == NULL && authInSz > 0) ||
4246                        (ivSz == 0)) {
4247         WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
4248         return BAD_FUNC_ARG;
4249     }
4250 
4251     if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || authTagSz > AES_BLOCK_SIZE) {
4252         WOLFSSL_MSG("GcmEncrypt authTagSz error");
4253         return BAD_FUNC_ARG;
4254     }
4255 
4256     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
4257     if (ivSz == GCM_NONCE_MID_SZ) {
4258         XMEMCPY(initialCounter, iv, ivSz);
4259         initialCounter[AES_BLOCK_SIZE - 1] = 1;
4260     }
4261     else {
4262         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
4263     }
4264     XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
4265 
4266     while (blocks--) {
4267         IncrementGcmCounter(ctr);
4268         wc_AesEncrypt(aes, ctr, scratch);
4269         xorbuf(scratch, p, AES_BLOCK_SIZE);
4270         XMEMCPY(c, scratch, AES_BLOCK_SIZE);
4271         p += AES_BLOCK_SIZE;
4272         c += AES_BLOCK_SIZE;
4273     }
4274 
4275     if (partial != 0) {
4276         IncrementGcmCounter(ctr);
4277         wc_AesEncrypt(aes, ctr, scratch);
4278         xorbuf(scratch, p, partial);
4279         XMEMCPY(c, scratch, partial);
4280 
4281     }
4282 
4283     GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
4284     wc_AesEncrypt(aes, initialCounter, scratch);
4285     if (authTagSz > AES_BLOCK_SIZE) {
4286         xorbuf(authTag, scratch, AES_BLOCK_SIZE);
4287     }
4288     else {
4289         xorbuf(authTag, scratch, authTagSz);
4290     }
4291 
4292     return 0;
4293 }
4294 
4295 
4296 #ifdef HAVE_AES_DECRYPT
4297 /*
4298  * Check tag and decrypt data using AES with GCM mode.
4299  * aes: Aes structure having already been set with set key function
4300  * out: decrypted data output buffer
4301  * in:  cipher text buffer
4302  * sz:  size of plain text and out buffer
4303  * iv:  initialization vector
4304  * ivSz:      size of iv buffer
4305  * authTag:   buffer holding tag
4306  * authTagSz: size of tag buffer
4307  * authIn:    additional data buffer
4308  * authInSz:  size of additional data buffer
4309  */
wc_AesGcmDecrypt(Aes * aes,byte * out,const byte * in,word32 sz,const byte * iv,word32 ivSz,const byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)4310 int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
4311                    const byte* iv, word32 ivSz,
4312                    const byte* authTag, word32 authTagSz,
4313                    const byte* authIn, word32 authInSz)
4314 {
4315     word32 blocks = sz / AES_BLOCK_SIZE;
4316     word32 partial = sz % AES_BLOCK_SIZE;
4317     const byte* c = in;
4318     byte* p = out;
4319     byte counter[AES_BLOCK_SIZE];
4320     byte initialCounter[AES_BLOCK_SIZE];
4321     byte *ctr ;
4322     byte scratch[AES_BLOCK_SIZE];
4323     ctr = counter ;
4324 
4325     /* sanity checks */
4326     if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
4327         authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 ||
4328         ivSz == 0) {
4329         WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
4330         return BAD_FUNC_ARG;
4331     }
4332 
4333     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
4334     if (ivSz == GCM_NONCE_MID_SZ) {
4335         XMEMCPY(initialCounter, iv, ivSz);
4336         initialCounter[AES_BLOCK_SIZE - 1] = 1;
4337     }
4338     else {
4339         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
4340     }
4341     XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
4342 
4343     /* Calculate the authTag again using the received auth data and the
4344      * cipher text. */
4345     {
4346         byte Tprime[AES_BLOCK_SIZE];
4347         byte EKY0[AES_BLOCK_SIZE];
4348 
4349         GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime));
4350         wc_AesEncrypt(aes, ctr, EKY0);
4351         xorbuf(Tprime, EKY0, sizeof(Tprime));
4352 
4353         if (ConstantCompare(authTag, Tprime, authTagSz) != 0) {
4354             return AES_GCM_AUTH_E;
4355         }
4356     }
4357 
4358     while (blocks--) {
4359         IncrementGcmCounter(ctr);
4360         wc_AesEncrypt(aes, ctr, scratch);
4361         xorbuf(scratch, c, AES_BLOCK_SIZE);
4362         XMEMCPY(p, scratch, AES_BLOCK_SIZE);
4363         p += AES_BLOCK_SIZE;
4364         c += AES_BLOCK_SIZE;
4365     }
4366     if (partial != 0) {
4367         IncrementGcmCounter(ctr);
4368         wc_AesEncrypt(aes, ctr, scratch);
4369 
4370         /* check if pointer is null after main AES-GCM blocks
4371          * helps static analysis */
4372         if (p == NULL || c == NULL) {
4373             return BAD_STATE_E;
4374         }
4375         xorbuf(scratch, c, partial);
4376         XMEMCPY(p, scratch, partial);
4377     }
4378     return 0;
4379 }
4380 #endif /* HAVE_AES_DECRYPT */
4381 #endif /* HAVE_AESGCM */
4382 
4383 #endif /* aarch64 */
4384 
4385 #ifdef HAVE_AESGCM
4386 #ifdef WOLFSSL_AESGCM_STREAM
4387     /* Access initialization counter data. */
4388     #define AES_INITCTR(aes)        ((aes)->streamData + 0 * AES_BLOCK_SIZE)
4389     /* Access counter data. */
4390     #define AES_COUNTER(aes)        ((aes)->streamData + 1 * AES_BLOCK_SIZE)
4391     /* Access tag data. */
4392     #define AES_TAG(aes)            ((aes)->streamData + 2 * AES_BLOCK_SIZE)
4393     /* Access last GHASH block. */
4394     #define AES_LASTGBLOCK(aes)     ((aes)->streamData + 3 * AES_BLOCK_SIZE)
4395     /* Access last encrypted block. */
4396     #define AES_LASTBLOCK(aes)      ((aes)->streamData + 4 * AES_BLOCK_SIZE)
4397 
4398 /* GHASH one block of data.
4399  *
4400  * XOR block into tag and GMULT with H.
4401  *
4402  * @param [in, out] aes    AES GCM object.
4403  * @param [in]      block  Block of AAD or cipher text.
4404  */
4405 #define GHASH_ONE_BLOCK(aes, block)                     \
4406     do {                                                \
4407         xorbuf(AES_TAG(aes), block, AES_BLOCK_SIZE);    \
4408         GMULT(AES_TAG(aes), aes->H);                    \
4409     }                                                   \
4410     while (0)
4411 
4412 /* Hash in the lengths of the AAD and cipher text in bits.
4413  *
4414  * Default implementation.
4415  *
4416  * @param [in, out] aes  AES GCM object.
4417  */
4418 #define GHASH_LEN_BLOCK(aes)                    \
4419     do {                                        \
4420         byte scratch[AES_BLOCK_SIZE];           \
4421         FlattenSzInBits(&scratch[0], aes->aSz); \
4422         FlattenSzInBits(&scratch[8], aes->cSz); \
4423         GHASH_ONE_BLOCK(aes, scratch);          \
4424     }                                           \
4425     while (0)
4426 
IncCtr(byte * ctr,word32 ctrSz)4427 static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
4428 {
4429     int i;
4430     for (i = ctrSz-1; i >= 0; i--) {
4431         if (++ctr[i])
4432             break;
4433     }
4434 }
4435 
4436 /* Initialize a GHASH for streaming operations.
4437  *
4438  * @param [in, out] aes  AES GCM object.
4439  */
GHASH_INIT(Aes * aes)4440 static void GHASH_INIT(Aes* aes) {
4441     /* Set tag to all zeros as initial value. */
4442     XMEMSET(AES_TAG(aes), 0, AES_BLOCK_SIZE);
4443     /* Reset counts of AAD and cipher text. */
4444     aes->aOver = 0;
4445     aes->cOver = 0;
4446 }
4447 
4448 /* Update the GHASH with AAD and/or cipher text.
4449  *
4450  * @param [in,out] aes   AES GCM object.
4451  * @param [in]     a     Additional authentication data buffer.
4452  * @param [in]     aSz   Size of data in AAD buffer.
4453  * @param [in]     c     Cipher text buffer.
4454  * @param [in]     cSz   Size of data in cipher text buffer.
4455  */
GHASH_UPDATE(Aes * aes,const byte * a,word32 aSz,const byte * c,word32 cSz)4456 static void GHASH_UPDATE(Aes* aes, const byte* a, word32 aSz, const byte* c,
4457     word32 cSz)
4458 {
4459     word32 blocks;
4460     word32 partial;
4461 
4462     /* Hash in A, the Additional Authentication Data */
4463     if (aSz != 0 && a != NULL) {
4464         /* Update count of AAD we have hashed. */
4465         aes->aSz += aSz;
4466         /* Check if we have unprocessed data. */
4467         if (aes->aOver > 0) {
4468             /* Calculate amount we can use - fill up the block. */
4469             byte sz = AES_BLOCK_SIZE - aes->aOver;
4470             if (sz > aSz) {
4471                 sz = aSz;
4472             }
4473             /* Copy extra into last GHASH block array and update count. */
4474             XMEMCPY(AES_LASTGBLOCK(aes) + aes->aOver, a, sz);
4475             aes->aOver += sz;
4476             if (aes->aOver == AES_BLOCK_SIZE) {
4477                 /* We have filled up the block and can process. */
4478                 GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes));
4479                 /* Reset count. */
4480                 aes->aOver = 0;
4481             }
4482             /* Used up some data. */
4483             aSz -= sz;
4484             a += sz;
4485         }
4486 
4487         /* Calculate number of blocks of AAD and the leftover. */
4488         blocks = aSz / AES_BLOCK_SIZE;
4489         partial = aSz % AES_BLOCK_SIZE;
4490         /* GHASH full blocks now. */
4491         while (blocks--) {
4492             GHASH_ONE_BLOCK(aes, a);
4493             a += AES_BLOCK_SIZE;
4494         }
4495         if (partial != 0) {
4496             /* Cache the partial block. */
4497             XMEMCPY(AES_LASTGBLOCK(aes), a, partial);
4498             aes->aOver = (byte)partial;
4499         }
4500     }
4501     if (aes->aOver > 0 && cSz > 0 && c != NULL) {
4502         /* No more AAD coming and we have a partial block. */
4503         /* Fill the rest of the block with zeros. */
4504         byte sz = AES_BLOCK_SIZE - aes->aOver;
4505         XMEMSET(AES_LASTGBLOCK(aes) + aes->aOver, 0, sz);
4506         /* GHASH last AAD block. */
4507         GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes));
4508         /* Clear partial count for next time through. */
4509         aes->aOver = 0;
4510     }
4511 
4512     /* Hash in C, the Ciphertext */
4513     if (cSz != 0 && c != NULL) {
4514         /* Update count of cipher text we have hashed. */
4515         aes->cSz += cSz;
4516         if (aes->cOver > 0) {
4517             /* Calculate amount we can use - fill up the block. */
4518             byte sz = AES_BLOCK_SIZE - aes->cOver;
4519             if (sz > cSz) {
4520                 sz = cSz;
4521             }
4522             XMEMCPY(AES_LASTGBLOCK(aes) + aes->cOver, c, sz);
4523             /* Update count of unsed encrypted counter. */
4524             aes->cOver += sz;
4525             if (aes->cOver == AES_BLOCK_SIZE) {
4526                 /* We have filled up the block and can process. */
4527                 GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes));
4528                 /* Reset count. */
4529                 aes->cOver = 0;
4530             }
4531             /* Used up some data. */
4532             cSz -= sz;
4533             c += sz;
4534         }
4535 
4536         /* Calculate number of blocks of cipher text and the leftover. */
4537         blocks = cSz / AES_BLOCK_SIZE;
4538         partial = cSz % AES_BLOCK_SIZE;
4539         /* GHASH full blocks now. */
4540         while (blocks--) {
4541             GHASH_ONE_BLOCK(aes, c);
4542             c += AES_BLOCK_SIZE;
4543         }
4544         if (partial != 0) {
4545             /* Cache the partial block. */
4546             XMEMCPY(AES_LASTGBLOCK(aes), c, partial);
4547             aes->cOver = (byte)partial;
4548         }
4549     }
4550 }
4551 
4552 /* Finalize the GHASH calculation.
4553  *
4554  * Complete hashing cipher text and hash the AAD and cipher text lengths.
4555  *
4556  * @param [in, out] aes  AES GCM object.
4557  * @param [out]     s    Authentication tag.
4558  * @param [in]      sSz  Size of authentication tag required.
4559  */
GHASH_FINAL(Aes * aes,byte * s,word32 sSz)4560 static void GHASH_FINAL(Aes* aes, byte* s, word32 sSz)
4561 {
4562     /* AAD block incomplete when > 0 */
4563     byte over = aes->aOver;
4564 
4565     if (aes->cOver > 0) {
4566         /* Cipher text block incomplete. */
4567         over = aes->cOver;
4568     }
4569     if (over > 0) {
4570         /* Zeroize the unused part of the block. */
4571         XMEMSET(AES_LASTGBLOCK(aes) + over, 0, AES_BLOCK_SIZE - over);
4572         /* Hash the last block of cipher text. */
4573         GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes));
4574     }
4575     /* Hash in the lengths of AAD and cipher text in bits */
4576     GHASH_LEN_BLOCK(aes);
4577     /* Copy the result into s. */
4578     XMEMCPY(s, AES_TAG(aes), sSz);
4579 }
4580 
4581 /* Initialize the AES GCM cipher with an IV. C implementation.
4582  *
4583  * @param [in, out] aes   AES object.
4584  * @param [in]      iv    IV/nonce buffer.
4585  * @param [in]      ivSz  Length of IV/nonce data.
4586  */
AesGcmInit_C(Aes * aes,const byte * iv,word32 ivSz)4587 static void AesGcmInit_C(Aes* aes, const byte* iv, word32 ivSz)
4588 {
4589     ALIGN32 byte counter[AES_BLOCK_SIZE];
4590 
4591     if (ivSz == GCM_NONCE_MID_SZ) {
4592         /* Counter is IV with bottom 4 bytes set to: 0x00,0x00,0x00,0x01. */
4593         XMEMCPY(counter, iv, ivSz);
4594         XMEMSET(counter + GCM_NONCE_MID_SZ, 0,
4595                                          AES_BLOCK_SIZE - GCM_NONCE_MID_SZ - 1);
4596         counter[AES_BLOCK_SIZE - 1] = 1;
4597     }
4598     else {
4599         /* Counter is GHASH of IV. */
4600     #ifdef OPENSSL_EXTRA
4601         word32 aadTemp = aes->aadLen;
4602         aes->aadLen = 0;
4603     #endif
4604         GHASH(aes, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE);
4605         GMULT(counter, aes->H);
4606     #ifdef OPENSSL_EXTRA
4607         aes->aadLen = aadTemp;
4608     #endif
4609     }
4610 
4611     /* Copy in the counter for use with cipher. */
4612     XMEMCPY(AES_COUNTER(aes), counter, AES_BLOCK_SIZE);
4613     /* Encrypt initial counter into a buffer for GCM. */
4614     wc_AesEncrypt(aes, counter, AES_INITCTR(aes));
4615     /* Reset state fields. */
4616     aes->over = 0;
4617     aes->aSz = 0;
4618     aes->cSz = 0;
4619     /* Initialization for GHASH. */
4620     GHASH_INIT(aes);
4621 }
4622 
4623 /* Update the AES GCM cipher with data. C implementation.
4624  *
4625  * Only enciphers data.
4626  *
4627  * @param [in, out] aes  AES object.
4628  * @param [in]      out  Cipher text or plaintext buffer.
4629  * @param [in]      in   Plaintext or cipher text buffer.
4630  * @param [in]      sz   Length of data.
4631  */
AesGcmCryptUpdate_C(Aes * aes,byte * out,const byte * in,word32 sz)4632 static void AesGcmCryptUpdate_C(Aes* aes, byte* out, const byte* in, word32 sz)
4633 {
4634     word32 blocks;
4635     word32 partial;
4636 
4637     /* Check if previous encrypted block was not used up. */
4638     if (aes->over > 0) {
4639         byte pSz = AES_BLOCK_SIZE - aes->over;
4640         if (pSz > sz) pSz = sz;
4641 
4642         /* Use some/all of last encrypted block. */
4643         xorbufout(out, AES_LASTBLOCK(aes) + aes->over, in, pSz);
4644         aes->over = (aes->over + pSz) & (AES_BLOCK_SIZE - 1);
4645 
4646         /* Some data used. */
4647         sz  -= pSz;
4648         in  += pSz;
4649         out += pSz;
4650     }
4651 
4652     /* Calculate the number of blocks needing to be encrypted and any leftover.
4653      */
4654     blocks  = sz / AES_BLOCK_SIZE;
4655     partial = sz & (AES_BLOCK_SIZE - 1);
4656 
4657     /* Encrypt block by block. */
4658     while (blocks--) {
4659         ALIGN32 byte scratch[AES_BLOCK_SIZE];
4660         IncrementGcmCounter(AES_COUNTER(aes));
4661         /* Encrypt counter into a buffer. */
4662         wc_AesEncrypt(aes, AES_COUNTER(aes), scratch);
4663         /* XOR plain text into encrypted counter into cipher text buffer. */
4664         xorbufout(out, scratch, in, AES_BLOCK_SIZE);
4665         /* Data complete. */
4666         in  += AES_BLOCK_SIZE;
4667         out += AES_BLOCK_SIZE;
4668     }
4669 
4670     if (partial != 0) {
4671         /* Generate an extra block and use up as much as needed. */
4672         IncrementGcmCounter(AES_COUNTER(aes));
4673         /* Encrypt counter into cache. */
4674         wc_AesEncrypt(aes, AES_COUNTER(aes), AES_LASTBLOCK(aes));
4675         /* XOR plain text into encrypted counter into cipher text buffer. */
4676         xorbufout(out, AES_LASTBLOCK(aes), in, partial);
4677         /* Keep amount of encrypted block used. */
4678         aes->over = partial;
4679     }
4680 }
4681 
4682 /* Calculates authentication tag for AES GCM. C implementation.
4683  *
4684  * @param [in, out] aes        AES object.
4685  * @param [out]     authTag    Buffer to store authentication tag in.
4686  * @param [in]      authTagSz  Length of tag to create.
4687  */
AesGcmFinal_C(Aes * aes,byte * authTag,word32 authTagSz)4688 static void AesGcmFinal_C(Aes* aes, byte* authTag, word32 authTagSz)
4689 {
4690     /* Calculate authentication tag. */
4691     GHASH_FINAL(aes, authTag, authTagSz);
4692     /* XOR in as much of encrypted counter as is required. */
4693     xorbuf(authTag, AES_INITCTR(aes), authTagSz);
4694 #ifdef OPENSSL_EXTRA
4695     /* store AAD size for next call */
4696     aes->aadLen = aes->aSz;
4697 #endif
4698     /* Zeroize last block to protect sensitive data. */
4699     ForceZero(AES_LASTBLOCK(aes), AES_BLOCK_SIZE);
4700 }
4701 
4702 /* Initialize an AES GCM cipher for encryption or decryption.
4703  *
4704  * Must call wc_AesInit() before calling this function.
4705  *
4706  * @param [in, out] aes   AES object.
4707  * @param [in]      key   Buffer holding key.
4708  * @param [in]      len   Length of key in bytes.
4709  * @param [in]      iv    Buffer holding IV/nonce.
4710  * @param [in]      ivSz  Length of IV/nonce in bytes.
4711  * @return  0 on success.
4712  * @return  BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4713  *          is NULL, or the IV is NULL and no previous IV has been set.
4714  * @return  MEMORY_E when dynamic memory allocation fails. (WOLFSSL_SMALL_STACK)
4715  */
wc_AesGcmInit(Aes * aes,const byte * key,word32 len,const byte * iv,word32 ivSz)4716 int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv,
4717     word32 ivSz)
4718 {
4719     int ret = 0;
4720 
4721     /* Check validity of parameters. */
4722     if ((aes == NULL) || ((len > 0) && (key == NULL)) ||
4723             ((ivSz == 0) && (iv != NULL)) || (ivSz > AES_BLOCK_SIZE) ||
4724             ((ivSz > 0) && (iv == NULL))) {
4725         ret = BAD_FUNC_ARG;
4726     }
4727 
4728 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_AESNI)
4729     if ((ret == 0) && (aes->streamData == NULL)) {
4730         /* Allocate buffers for streaming. */
4731         aes->streamData = (byte*)XMALLOC(5 * AES_BLOCK_SIZE, aes->heap,
4732                                                               DYNAMIC_TYPE_AES);
4733         if (aes->streamData == NULL) {
4734             ret = MEMORY_E;
4735         }
4736     }
4737 #endif
4738 
4739     /* Set the key if passed in. */
4740     if ((ret == 0) && (key != NULL)) {
4741         ret = wc_AesGcmSetKey(aes, key, len);
4742     }
4743 
4744     if (ret == 0) {
4745         /* Setup with IV if needed. */
4746         if (iv != NULL) {
4747             /* Cache the IV in AES GCM object. */
4748             XMEMCPY((byte*)aes->reg, iv, ivSz);
4749             aes->nonceSz = ivSz;
4750         }
4751         else if (aes->nonceSz != 0) {
4752             /* Copy out the cached copy. */
4753             iv = (byte*)aes->reg;
4754             ivSz = aes->nonceSz;
4755         }
4756 
4757         if (iv != NULL) {
4758             /* Initialize with the IV. */
4759             AesGcmInit_C(aes, iv, ivSz);
4760 
4761             aes->nonceSet = 1;
4762         }
4763     }
4764 
4765     return ret;
4766 }
4767 
4768 /* Initialize an AES GCM cipher for encryption.
4769  *
4770  * Must call wc_AesInit() before calling this function.
4771  *
4772  * @param [in, out] aes   AES object.
4773  * @param [in]      key   Buffer holding key.
4774  * @param [in]      len   Length of key in bytes.
4775  * @param [in]      iv    Buffer holding IV/nonce.
4776  * @param [in]      ivSz  Length of IV/nonce in bytes.
4777  * @return  0 on success.
4778  * @return  BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4779  *          is NULL, or the IV is NULL and no previous IV has been set.
4780  */
wc_AesGcmEncryptInit(Aes * aes,const byte * key,word32 len,const byte * iv,word32 ivSz)4781 int wc_AesGcmEncryptInit(Aes* aes, const byte* key, word32 len, const byte* iv,
4782     word32 ivSz)
4783 {
4784     return wc_AesGcmInit(aes, key, len, iv, ivSz);
4785 }
4786 
4787 /* Initialize an AES GCM cipher for encryption or decryption. Get IV.
4788  *
4789  * Must call wc_AesInit() before calling this function.
4790  *
4791  * @param [in, out] aes   AES object.
4792  * @param [in]      key   Buffer holding key.
4793  * @param [in]      len   Length of key in bytes.
4794  * @param [in]      iv    Buffer holding IV/nonce.
4795  * @param [in]      ivSz  Length of IV/nonce in bytes.
4796  * @return  0 on success.
4797  * @return  BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4798  *          is NULL, or the IV is NULL and no previous IV has been set.
4799  */
wc_AesGcmEncryptInit_ex(Aes * aes,const byte * key,word32 len,byte * ivOut,word32 ivOutSz)4800 int wc_AesGcmEncryptInit_ex(Aes* aes, const byte* key, word32 len, byte* ivOut,
4801     word32 ivOutSz)
4802 {
4803     XMEMCPY(ivOut, aes->reg, ivOutSz);
4804     return wc_AesGcmInit(aes, key, len, NULL, 0);
4805 }
4806 
4807 /* Update the AES GCM for encryption with data and/or authentication data.
4808  *
4809  * All the AAD must be passed to update before the plaintext.
4810  * Last part of AAD can be passed with first part of plaintext.
4811  *
4812  * Must set key and IV before calling this function.
4813  * Must call wc_AesGcmInit() before calling this function.
4814  *
4815  * @param [in, out] aes       AES object.
4816  * @param [out]     out       Buffer to hold cipher text.
4817  * @param [in]      in        Buffer holding plaintext.
4818  * @param [in]      sz        Length of plaintext in bytes.
4819  * @param [in]      authIn    Buffer holding authentication data.
4820  * @param [in]      authInSz  Length of authentication data in bytes.
4821  * @return  0 on success.
4822  * @return  BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4823  *          is NULL.
4824  */
wc_AesGcmEncryptUpdate(Aes * aes,byte * out,const byte * in,word32 sz,const byte * authIn,word32 authInSz)4825 int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
4826     const byte* authIn, word32 authInSz)
4827 {
4828     int ret = 0;
4829 
4830     /* Check validity of parameters. */
4831     if ((aes == NULL) || ((authInSz > 0) && (authIn == NULL)) || ((sz > 0) &&
4832             ((out == NULL) || (in == NULL)))) {
4833         ret = BAD_FUNC_ARG;
4834     }
4835 
4836     /* Check key has been set. */
4837     if ((ret == 0) && (!aes->gcmKeySet)) {
4838         ret = MISSING_KEY;
4839     }
4840     /* Check IV has been set. */
4841     if ((ret == 0) && (!aes->nonceSet)) {
4842         ret = MISSING_IV;
4843     }
4844 
4845     if ((ret == 0) && aes->ctrSet && (aes->aSz == 0) && (aes->cSz == 0)) {
4846         aes->invokeCtr[0]++;
4847         if (aes->invokeCtr[0] == 0) {
4848             aes->invokeCtr[1]++;
4849             if (aes->invokeCtr[1] == 0)
4850                 ret = AES_GCM_OVERFLOW_E;
4851         }
4852     }
4853 
4854     if (ret == 0) {
4855         /* Encrypt the plaintext. */
4856         AesGcmCryptUpdate_C(aes, out, in, sz);
4857         /* Update the authenication tag with any authentication data and the
4858          * new cipher text. */
4859         GHASH_UPDATE(aes, authIn, authInSz, out, sz);
4860     }
4861 
4862     return ret;
4863 }
4864 
4865 /* Finalize the AES GCM for encryption and return the authentication tag.
4866  *
4867  * Must set key and IV before calling this function.
4868  * Must call wc_AesGcmInit() before calling this function.
4869  *
4870  * @param [in, out] aes        AES object.
4871  * @param [out]     authTag    Buffer to hold authentication tag.
4872  * @param [in]      authTagSz  Length of authentication tag in bytes.
4873  * @return  0 on success.
4874  */
wc_AesGcmEncryptFinal(Aes * aes,byte * authTag,word32 authTagSz)4875 int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, word32 authTagSz)
4876 {
4877     int ret = 0;
4878 
4879     /* Check validity of parameters. */
4880     if ((aes == NULL) || (authTag == NULL) || (authTagSz > AES_BLOCK_SIZE) ||
4881             (authTagSz == 0)) {
4882         ret = BAD_FUNC_ARG;
4883     }
4884 
4885     /* Check key has been set. */
4886     if ((ret == 0) && (!aes->gcmKeySet)) {
4887         ret = MISSING_KEY;
4888     }
4889     /* Check IV has been set. */
4890     if ((ret == 0) && (!aes->nonceSet)) {
4891         ret = MISSING_IV;
4892     }
4893 
4894     if (ret == 0) {
4895         /* Calculate authentication tag. */
4896         AesGcmFinal_C(aes, authTag, authTagSz);
4897     }
4898 
4899     if ((ret == 0) && aes->ctrSet) {
4900         IncCtr((byte*)aes->reg, aes->nonceSz);
4901     }
4902 
4903     return ret;
4904 }
4905 
4906 #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT)
4907 /* Initialize an AES GCM cipher for decryption.
4908  *
4909  * Must call wc_AesInit() before calling this function.
4910  *
4911  * @param [in, out] aes   AES object.
4912  * @param [in]      key   Buffer holding key.
4913  * @param [in]      len   Length of key in bytes.
4914  * @param [in]      iv    Buffer holding IV/nonce.
4915  * @param [in]      ivSz  Length of IV/nonce in bytes.
4916  * @return  0 on success.
4917  * @return  BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4918  *          is NULL, or the IV is NULL and no previous IV has been set.
4919  */
wc_AesGcmDecryptInit(Aes * aes,const byte * key,word32 len,const byte * iv,word32 ivSz)4920 int wc_AesGcmDecryptInit(Aes* aes, const byte* key, word32 len, const byte* iv,
4921     word32 ivSz)
4922 {
4923     return wc_AesGcmInit(aes, key, len, iv, ivSz);
4924 }
4925 
4926 /* Update the AES GCM for decryption with data and/or authentication data.
4927  *
4928  * All the AAD must be passed to update before the cipher text.
4929  * Last part of AAD can be passed with first part of cipher text.
4930  *
4931  * Must set key and IV before calling this function.
4932  * Must call wc_AesGcmInit() before calling this function.
4933  *
4934  * @param [in, out] aes       AES object.
4935  * @param [out]     out       Buffer to hold plaintext.
4936  * @param [in]      in        Buffer holding cipher text.
4937  * @param [in]      sz        Length of cipher text in bytes.
4938  * @param [in]      authIn    Buffer holding authentication data.
4939  * @param [in]      authInSz  Length of authentication data in bytes.
4940  * @return  0 on success.
4941  * @return  BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer
4942  *          is NULL.
4943  */
wc_AesGcmDecryptUpdate(Aes * aes,byte * out,const byte * in,word32 sz,const byte * authIn,word32 authInSz)4944 int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz,
4945     const byte* authIn, word32 authInSz)
4946 {
4947     int ret = 0;
4948 
4949     /* Check validity of parameters. */
4950     if ((aes == NULL) || ((authInSz > 0) && (authIn == NULL)) || ((sz > 0) &&
4951             ((out == NULL) || (in == NULL)))) {
4952         ret = BAD_FUNC_ARG;
4953     }
4954 
4955     /* Check key has been set. */
4956     if ((ret == 0) && (!aes->gcmKeySet)) {
4957         ret = MISSING_KEY;
4958     }
4959     /* Check IV has been set. */
4960     if ((ret == 0) && (!aes->nonceSet)) {
4961         ret = MISSING_IV;
4962     }
4963 
4964     if (ret == 0) {
4965         /* Decrypt with AAD and/or cipher text. */
4966         /* Update the authenication tag with any authentication data and
4967          * cipher text. */
4968         GHASH_UPDATE(aes, authIn, authInSz, in, sz);
4969         /* Decrypt the cipher text. */
4970         AesGcmCryptUpdate_C(aes, out, in, sz);
4971     }
4972 
4973     return ret;
4974 }
4975 
4976 /* Finalize the AES GCM for decryption and check the authentication tag.
4977  *
4978  * Must set key and IV before calling this function.
4979  * Must call wc_AesGcmInit() before calling this function.
4980  *
4981  * @param [in, out] aes        AES object.
4982  * @param [in]      authTag    Buffer holding authentication tag.
4983  * @param [in]      authTagSz  Length of authentication tag in bytes.
4984  * @return  0 on success.
4985  */
wc_AesGcmDecryptFinal(Aes * aes,const byte * authTag,word32 authTagSz)4986 int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz)
4987 {
4988     int ret = 0;
4989 
4990     /* Check validity of parameters. */
4991     if ((aes == NULL) || (authTag == NULL) || (authTagSz > AES_BLOCK_SIZE) ||
4992             (authTagSz == 0)) {
4993         ret = BAD_FUNC_ARG;
4994     }
4995 
4996     /* Check key has been set. */
4997     if ((ret == 0) && (!aes->gcmKeySet)) {
4998         ret = MISSING_KEY;
4999     }
5000     /* Check IV has been set. */
5001     if ((ret == 0) && (!aes->nonceSet)) {
5002         ret = MISSING_IV;
5003     }
5004 
5005     if (ret == 0) {
5006         /* Calculate authentication tag and compare with one passed in.. */
5007         ALIGN32 byte calcTag[AES_BLOCK_SIZE];
5008         /* Calculate authentication tag. */
5009         AesGcmFinal_C(aes, calcTag, authTagSz);
5010         /* Check calculated tag matches the one passed in. */
5011         if (ConstantCompare(authTag, calcTag, authTagSz) != 0) {
5012             ret = AES_GCM_AUTH_E;
5013         }
5014     }
5015 
5016     return ret;
5017 }
5018 #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */
5019 #endif /* WOLFSSL_AESGCM_STREAM */
5020 #endif /* HAVE_AESGCM */
5021 
5022 
5023 #ifdef HAVE_AESCCM
5024 /* Software version of AES-CCM from wolfcrypt/src/aes.c
5025  * Gets some speed up from hardware acceleration of wc_AesEncrypt */
5026 
roll_x(Aes * aes,const byte * in,word32 inSz,byte * out)5027 static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out)
5028 {
5029     /* process the bulk of the data */
5030     while (inSz >= AES_BLOCK_SIZE) {
5031         xorbuf(out, in, AES_BLOCK_SIZE);
5032         in += AES_BLOCK_SIZE;
5033         inSz -= AES_BLOCK_SIZE;
5034 
5035         wc_AesEncrypt(aes, out, out);
5036     }
5037 
5038     /* process remainder of the data */
5039     if (inSz > 0) {
5040         xorbuf(out, in, inSz);
5041         wc_AesEncrypt(aes, out, out);
5042     }
5043 }
5044 
5045 
roll_auth(Aes * aes,const byte * in,word32 inSz,byte * out)5046 static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out)
5047 {
5048     word32 authLenSz;
5049     word32 remainder;
5050 
5051     /* encode the length in */
5052     if (inSz <= 0xFEFF) {
5053         authLenSz = 2;
5054         out[0] ^= ((inSz & 0xFF00) >> 8);
5055         out[1] ^=  (inSz & 0x00FF);
5056     }
5057     else if (inSz <= 0xFFFFFFFF) {
5058         authLenSz = 6;
5059         out[0] ^= 0xFF; out[1] ^= 0xFE;
5060         out[2] ^= ((inSz & 0xFF000000) >> 24);
5061         out[3] ^= ((inSz & 0x00FF0000) >> 16);
5062         out[4] ^= ((inSz & 0x0000FF00) >>  8);
5063         out[5] ^=  (inSz & 0x000000FF);
5064     }
5065     /* Note, the protocol handles auth data up to 2^64, but we are
5066      * using 32-bit sizes right now, so the bigger data isn't handled
5067      * else if (inSz <= 0xFFFFFFFFFFFFFFFF) {} */
5068     else
5069         return;
5070 
5071     /* start fill out the rest of the first block */
5072     remainder = AES_BLOCK_SIZE - authLenSz;
5073     if (inSz >= remainder) {
5074         /* plenty of bulk data to fill the remainder of this block */
5075         xorbuf(out + authLenSz, in, remainder);
5076         inSz -= remainder;
5077         in += remainder;
5078     }
5079     else {
5080         /* not enough bulk data, copy what is available, and pad zero */
5081         xorbuf(out + authLenSz, in, inSz);
5082         inSz = 0;
5083     }
5084     wc_AesEncrypt(aes, out, out);
5085 
5086     if (inSz > 0)
5087         roll_x(aes, in, inSz, out);
5088 }
5089 
5090 
AesCcmCtrInc(byte * B,word32 lenSz)5091 static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz)
5092 {
5093     word32 i;
5094 
5095     for (i = 0; i < lenSz; i++) {
5096         if (++B[AES_BLOCK_SIZE - 1 - i] != 0) return;
5097     }
5098 }
5099 
5100 
5101 /* return 0 on success */
wc_AesCcmEncrypt(Aes * aes,byte * out,const byte * in,word32 inSz,const byte * nonce,word32 nonceSz,byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)5102 int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
5103                    const byte* nonce, word32 nonceSz,
5104                    byte* authTag, word32 authTagSz,
5105                    const byte* authIn, word32 authInSz)
5106 {
5107     byte A[AES_BLOCK_SIZE];
5108     byte B[AES_BLOCK_SIZE];
5109     byte lenSz;
5110     word32 i;
5111     byte mask     = 0xFF;
5112     word32 wordSz = (word32)sizeof(word32);
5113 
5114     /* sanity check on arguments */
5115     if (aes == NULL || out == NULL || in == NULL || nonce == NULL
5116             || authTag == NULL || nonceSz < 7 || nonceSz > 13)
5117         return BAD_FUNC_ARG;
5118 
5119     if (wc_AesCcmCheckTagSize(authTagSz) != 0) {
5120         return BAD_FUNC_ARG;
5121     }
5122 
5123     XMEMCPY(B+1, nonce, nonceSz);
5124     lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
5125     B[0] = (authInSz > 0 ? 64 : 0)
5126          + (8 * (((byte)authTagSz - 2) / 2))
5127          + (lenSz - 1);
5128     for (i = 0; i < lenSz; i++) {
5129         if (mask && i >= wordSz)
5130             mask = 0x00;
5131         B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask;
5132     }
5133 
5134     wc_AesEncrypt(aes, B, A);
5135 
5136     if (authInSz > 0)
5137         roll_auth(aes, authIn, authInSz, A);
5138     if (inSz > 0)
5139         roll_x(aes, in, inSz, A);
5140     XMEMCPY(authTag, A, authTagSz);
5141 
5142     B[0] = lenSz - 1;
5143     for (i = 0; i < lenSz; i++)
5144         B[AES_BLOCK_SIZE - 1 - i] = 0;
5145     wc_AesEncrypt(aes, B, A);
5146     xorbuf(authTag, A, authTagSz);
5147 
5148     B[15] = 1;
5149     while (inSz >= AES_BLOCK_SIZE) {
5150         wc_AesEncrypt(aes, B, A);
5151         xorbuf(A, in, AES_BLOCK_SIZE);
5152         XMEMCPY(out, A, AES_BLOCK_SIZE);
5153 
5154         AesCcmCtrInc(B, lenSz);
5155         inSz -= AES_BLOCK_SIZE;
5156         in += AES_BLOCK_SIZE;
5157         out += AES_BLOCK_SIZE;
5158     }
5159     if (inSz > 0) {
5160         wc_AesEncrypt(aes, B, A);
5161         xorbuf(A, in, inSz);
5162         XMEMCPY(out, A, inSz);
5163     }
5164 
5165     ForceZero(A, AES_BLOCK_SIZE);
5166     ForceZero(B, AES_BLOCK_SIZE);
5167 
5168     return 0;
5169 }
5170 
5171 #ifdef HAVE_AES_DECRYPT
wc_AesCcmDecrypt(Aes * aes,byte * out,const byte * in,word32 inSz,const byte * nonce,word32 nonceSz,const byte * authTag,word32 authTagSz,const byte * authIn,word32 authInSz)5172 int  wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
5173                    const byte* nonce, word32 nonceSz,
5174                    const byte* authTag, word32 authTagSz,
5175                    const byte* authIn, word32 authInSz)
5176 {
5177     byte A[AES_BLOCK_SIZE];
5178     byte B[AES_BLOCK_SIZE];
5179     byte* o;
5180     byte lenSz;
5181     word32 i, oSz;
5182     int result = 0;
5183     byte mask     = 0xFF;
5184     word32 wordSz = (word32)sizeof(word32);
5185 
5186     /* sanity check on arguments */
5187     if (aes == NULL || out == NULL || in == NULL || nonce == NULL
5188             || authTag == NULL || nonceSz < 7 || nonceSz > 13)
5189         return BAD_FUNC_ARG;
5190 
5191     if (wc_AesCcmCheckTagSize(authTagSz) != 0) {
5192         return BAD_FUNC_ARG;
5193     }
5194 
5195     o = out;
5196     oSz = inSz;
5197     XMEMCPY(B+1, nonce, nonceSz);
5198     lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
5199 
5200     B[0] = lenSz - 1;
5201     for (i = 0; i < lenSz; i++)
5202         B[AES_BLOCK_SIZE - 1 - i] = 0;
5203     B[15] = 1;
5204 
5205     while (oSz >= AES_BLOCK_SIZE) {
5206         wc_AesEncrypt(aes, B, A);
5207         xorbuf(A, in, AES_BLOCK_SIZE);
5208         XMEMCPY(o, A, AES_BLOCK_SIZE);
5209 
5210         AesCcmCtrInc(B, lenSz);
5211         oSz -= AES_BLOCK_SIZE;
5212         in += AES_BLOCK_SIZE;
5213         o += AES_BLOCK_SIZE;
5214     }
5215     if (inSz > 0) {
5216         wc_AesEncrypt(aes, B, A);
5217         xorbuf(A, in, oSz);
5218         XMEMCPY(o, A, oSz);
5219     }
5220 
5221     for (i = 0; i < lenSz; i++)
5222         B[AES_BLOCK_SIZE - 1 - i] = 0;
5223     wc_AesEncrypt(aes, B, A);
5224 
5225     o = out;
5226     oSz = inSz;
5227 
5228     B[0] = (authInSz > 0 ? 64 : 0)
5229          + (8 * (((byte)authTagSz - 2) / 2))
5230          + (lenSz - 1);
5231     for (i = 0; i < lenSz; i++) {
5232         if (mask && i >= wordSz)
5233             mask = 0x00;
5234         B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask;
5235     }
5236 
5237     wc_AesEncrypt(aes, B, A);
5238 
5239     if (authInSz > 0)
5240         roll_auth(aes, authIn, authInSz, A);
5241     if (inSz > 0)
5242         roll_x(aes, o, oSz, A);
5243 
5244     B[0] = lenSz - 1;
5245     for (i = 0; i < lenSz; i++)
5246         B[AES_BLOCK_SIZE - 1 - i] = 0;
5247     wc_AesEncrypt(aes, B, B);
5248     xorbuf(A, B, authTagSz);
5249 
5250     if (ConstantCompare(A, authTag, authTagSz) != 0) {
5251         /* If the authTag check fails, don't keep the decrypted data.
5252          * Unfortunately, you need the decrypted data to calculate the
5253          * check value. */
5254         XMEMSET(out, 0, inSz);
5255         result = AES_CCM_AUTH_E;
5256     }
5257 
5258     ForceZero(A, AES_BLOCK_SIZE);
5259     ForceZero(B, AES_BLOCK_SIZE);
5260     o = NULL;
5261 
5262     return result;
5263 }
5264 #endif /* HAVE_AES_DECRYPT */
5265 #endif /* HAVE_AESCCM */
5266 
5267 
5268 
5269 #ifdef HAVE_AESGCM /* common GCM functions 32 and 64 bit */
wc_AesGcmSetKey(Aes * aes,const byte * key,word32 len)5270 int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
5271 {
5272     int  ret;
5273     byte iv[AES_BLOCK_SIZE];
5274 
5275     if (!((len == 16) || (len == 24) || (len == 32)))
5276         return BAD_FUNC_ARG;
5277 
5278     XMEMSET(iv, 0, AES_BLOCK_SIZE);
5279     ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION);
5280 
5281     if (ret == 0) {
5282 #ifdef WOLFSSL_AESGCM_STREAM
5283         aes->gcmKeySet = 1;
5284 #endif
5285 
5286         wc_AesEncrypt(aes, iv, aes->H);
5287     #if defined(__aarch64__)
5288         {
5289             word32* pt = (word32*)aes->H;
5290             __asm__ volatile (
5291                 "LD1 {v0.16b}, [%[h]] \n"
5292                 "RBIT v0.16b, v0.16b \n"
5293                 "ST1 {v0.16b}, [%[out]] \n"
5294                 : [out] "=r" (pt)
5295                 : [h] "0" (pt)
5296                 : "cc", "memory", "v0"
5297             );
5298         }
5299     #else
5300         {
5301             word32* pt = (word32*)aes->H;
5302             __asm__ volatile (
5303                 "VLD1.32 {q0}, [%[h]] \n"
5304                 "VREV64.8 q0, q0 \n"
5305                 "VSWP.8 d0, d1 \n"
5306                 "VST1.32 {q0}, [%[out]] \n"
5307                 : [out] "=r" (pt)
5308                 : [h] "0" (pt)
5309                 : "cc", "memory", "q0"
5310             );
5311         }
5312     #endif
5313     }
5314 
5315     return ret;
5316 }
5317 
5318 #endif /* HAVE_AESGCM */
5319 
5320 /* AES-DIRECT */
5321 #if defined(WOLFSSL_AES_DIRECT)
5322         /* Allow direct access to one block encrypt */
wc_AesEncryptDirect(Aes * aes,byte * out,const byte * in)5323         void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
5324         {
5325             if (aes == NULL || out == NULL || in == NULL) {
5326                 WOLFSSL_MSG("Invalid input to wc_AesEncryptDirect");
5327                 return;
5328             }
5329             wc_AesEncrypt(aes, in, out);
5330         }
5331     #ifdef HAVE_AES_DECRYPT
5332         /* Allow direct access to one block decrypt */
wc_AesDecryptDirect(Aes * aes,byte * out,const byte * in)5333         void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
5334         {
5335             if (aes == NULL || out == NULL || in == NULL) {
5336                 WOLFSSL_MSG("Invalid input to wc_AesDecryptDirect");
5337                 return;
5338             }
5339             wc_AesDecrypt(aes, in, out);
5340         }
5341     #endif /* HAVE_AES_DECRYPT */
5342 #endif /* WOLFSSL_AES_DIRECT */
5343 #endif /* !NO_AES && WOLFSSL_ARMASM */
5344