1 /* ====================================================================
2  * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ==================================================================== */
48 
49 #include <openssl/base.h>
50 
51 #include <assert.h>
52 #include <string.h>
53 
54 #include <openssl/mem.h>
55 #include <openssl/cpu.h>
56 
57 #include "internal.h"
58 #include "../../internal.h"
59 
60 
61 // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
62 // bits of a |size_t|.
63 static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
64 
65 
66 #define GCM_MUL(ctx, Xi) gcm_gmult_nohw((ctx)->Xi.u, (ctx)->gcm_key.Htable)
67 #define GHASH(ctx, in, len) \
68   gcm_ghash_nohw((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
69 // GHASH_CHUNK is "stride parameter" missioned to mitigate cache
70 // trashing effect. In other words idea is to hash data while it's
71 // still in L1 cache after encryption pass...
72 #define GHASH_CHUNK (3 * 1024)
73 
74 #if defined(GHASH_ASM_X86_64) || defined(GHASH_ASM_X86)
gcm_reduce_1bit(u128 * V)75 static inline void gcm_reduce_1bit(u128 *V) {
76   if (sizeof(crypto_word_t) == 8) {
77     uint64_t T = UINT64_C(0xe100000000000000) & (0 - (V->hi & 1));
78     V->hi = (V->lo << 63) | (V->hi >> 1);
79     V->lo = (V->lo >> 1) ^ T;
80   } else {
81     uint32_t T = 0xe1000000U & (0 - (uint32_t)(V->hi & 1));
82     V->hi = (V->lo << 63) | (V->hi >> 1);
83     V->lo = (V->lo >> 1) ^ ((uint64_t)T << 32);
84   }
85 }
86 
gcm_init_ssse3(u128 Htable[16],const uint64_t H[2])87 void gcm_init_ssse3(u128 Htable[16], const uint64_t H[2]) {
88   Htable[0].hi = 0;
89   Htable[0].lo = 0;
90   u128 V;
91   V.hi = H[1];
92   V.lo = H[0];
93 
94   Htable[8] = V;
95   gcm_reduce_1bit(&V);
96   Htable[4] = V;
97   gcm_reduce_1bit(&V);
98   Htable[2] = V;
99   gcm_reduce_1bit(&V);
100   Htable[1] = V;
101   Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
102   V = Htable[4];
103   Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
104   Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
105   Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
106   V = Htable[8];
107   Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
108   Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
109   Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
110   Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
111   Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
112   Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
113   Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
114 
115   // Treat |Htable| as a 16x16 byte table and transpose it. Thus, Htable[i]
116   // contains the i'th byte of j*H for all j.
117   uint8_t *Hbytes = (uint8_t *)Htable;
118   for (int i = 0; i < 16; i++) {
119     for (int j = 0; j < i; j++) {
120       uint8_t tmp = Hbytes[16*i + j];
121       Hbytes[16*i + j] = Hbytes[16*j + i];
122       Hbytes[16*j + i] = tmp;
123     }
124   }
125 }
126 #endif  // GHASH_ASM_X86_64 || GHASH_ASM_X86
127 
128 #ifdef GCM_FUNCREF
129 #undef GCM_MUL
130 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable)
131 #undef GHASH
132 #define GHASH(ctx, in, len) \
133   (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
134 #endif  // GCM_FUNCREF
135 
CRYPTO_ghash_init(gmult_func * out_mult,ghash_func * out_hash,u128 * out_key,u128 out_table[16],int * out_is_avx,const uint8_t gcm_key[16])136 void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
137                        u128 *out_key, u128 out_table[16], int *out_is_avx,
138                        const uint8_t gcm_key[16]) {
139   *out_is_avx = 0;
140 
141   union {
142     uint64_t u[2];
143     uint8_t c[16];
144   } H;
145 
146   OPENSSL_memcpy(H.c, gcm_key, 16);
147 
148   // H is stored in host byte order
149   H.u[0] = CRYPTO_bswap8(H.u[0]);
150   H.u[1] = CRYPTO_bswap8(H.u[1]);
151 
152   OPENSSL_memcpy(out_key, H.c, 16);
153 
154 #if defined(GHASH_ASM_X86_64)
155   if (crypto_gcm_clmul_enabled()) {
156     if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) {  // AVX+MOVBE
157       gcm_init_avx(out_table, H.u);
158       *out_mult = gcm_gmult_avx;
159       *out_hash = gcm_ghash_avx;
160       *out_is_avx = 1;
161       return;
162     }
163     gcm_init_clmul(out_table, H.u);
164     *out_mult = gcm_gmult_clmul;
165     *out_hash = gcm_ghash_clmul;
166     return;
167   }
168   if (gcm_ssse3_capable()) {
169     gcm_init_ssse3(out_table, H.u);
170     *out_mult = gcm_gmult_ssse3;
171     *out_hash = gcm_ghash_ssse3;
172     return;
173   }
174 #elif defined(GHASH_ASM_X86)
175   if (crypto_gcm_clmul_enabled()) {
176     gcm_init_clmul(out_table, H.u);
177     *out_mult = gcm_gmult_clmul;
178     *out_hash = gcm_ghash_clmul;
179     return;
180   }
181   if (gcm_ssse3_capable()) {
182     gcm_init_ssse3(out_table, H.u);
183     *out_mult = gcm_gmult_ssse3;
184     *out_hash = gcm_ghash_ssse3;
185     return;
186   }
187 #elif defined(GHASH_ASM_ARM)
188   if (gcm_pmull_capable()) {
189     gcm_init_v8(out_table, H.u);
190     *out_mult = gcm_gmult_v8;
191     *out_hash = gcm_ghash_v8;
192     return;
193   }
194 
195   if (gcm_neon_capable()) {
196     gcm_init_neon(out_table, H.u);
197     *out_mult = gcm_gmult_neon;
198     *out_hash = gcm_ghash_neon;
199     return;
200   }
201 #elif defined(GHASH_ASM_PPC64LE)
202   if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
203     gcm_init_p8(out_table, H.u);
204     *out_mult = gcm_gmult_p8;
205     *out_hash = gcm_ghash_p8;
206     return;
207   }
208 #endif
209 
210   gcm_init_nohw(out_table, H.u);
211   *out_mult = gcm_gmult_nohw;
212   *out_hash = gcm_ghash_nohw;
213 }
214 
CRYPTO_gcm128_init_key(GCM128_KEY * gcm_key,const AES_KEY * aes_key,block128_f block,int block_is_hwaes)215 void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key, const AES_KEY *aes_key,
216                             block128_f block, int block_is_hwaes) {
217   OPENSSL_memset(gcm_key, 0, sizeof(*gcm_key));
218   gcm_key->block = block;
219 
220   uint8_t ghash_key[16];
221   OPENSSL_memset(ghash_key, 0, sizeof(ghash_key));
222   (*block)(ghash_key, ghash_key, aes_key);
223 
224   int is_avx;
225   CRYPTO_ghash_init(&gcm_key->gmult, &gcm_key->ghash, &gcm_key->H,
226                     gcm_key->Htable, &is_avx, ghash_key);
227 
228   gcm_key->use_aesni_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
229 }
230 
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const AES_KEY * key,const uint8_t * iv,size_t len)231 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const AES_KEY *key,
232                          const uint8_t *iv, size_t len) {
233 #ifdef GCM_FUNCREF
234   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
235       ctx->gcm_key.gmult;
236 #endif
237 
238   ctx->Yi.u[0] = 0;
239   ctx->Yi.u[1] = 0;
240   ctx->Xi.u[0] = 0;
241   ctx->Xi.u[1] = 0;
242   ctx->len.u[0] = 0;  // AAD length
243   ctx->len.u[1] = 0;  // message length
244   ctx->ares = 0;
245   ctx->mres = 0;
246 
247   uint32_t ctr;
248   if (len == 12) {
249     OPENSSL_memcpy(ctx->Yi.c, iv, 12);
250     ctx->Yi.c[15] = 1;
251     ctr = 1;
252   } else {
253     uint64_t len0 = len;
254 
255     while (len >= 16) {
256       for (size_t i = 0; i < 16; ++i) {
257         ctx->Yi.c[i] ^= iv[i];
258       }
259       GCM_MUL(ctx, Yi);
260       iv += 16;
261       len -= 16;
262     }
263     if (len) {
264       for (size_t i = 0; i < len; ++i) {
265         ctx->Yi.c[i] ^= iv[i];
266       }
267       GCM_MUL(ctx, Yi);
268     }
269     len0 <<= 3;
270     ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
271 
272     GCM_MUL(ctx, Yi);
273     ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
274   }
275 
276   (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EK0.c, key);
277   ++ctr;
278   ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
279 }
280 
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const uint8_t * aad,size_t len)281 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
282 #ifdef GCM_FUNCREF
283   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
284       ctx->gcm_key.gmult;
285   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
286                       size_t len) = ctx->gcm_key.ghash;
287 #endif
288 
289   if (ctx->len.u[1]) {
290     return 0;
291   }
292 
293   uint64_t alen = ctx->len.u[0] + len;
294   if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
295     return 0;
296   }
297   ctx->len.u[0] = alen;
298 
299   unsigned n = ctx->ares;
300   if (n) {
301     while (n && len) {
302       ctx->Xi.c[n] ^= *(aad++);
303       --len;
304       n = (n + 1) % 16;
305     }
306     if (n == 0) {
307       GCM_MUL(ctx, Xi);
308     } else {
309       ctx->ares = n;
310       return 1;
311     }
312   }
313 
314   // Process a whole number of blocks.
315   size_t len_blocks = len & kSizeTWithoutLower4Bits;
316   if (len_blocks != 0) {
317     GHASH(ctx, aad, len_blocks);
318     aad += len_blocks;
319     len -= len_blocks;
320   }
321 
322   // Process the remainder.
323   if (len != 0) {
324     n = (unsigned int)len;
325     for (size_t i = 0; i < len; ++i) {
326       ctx->Xi.c[i] ^= aad[i];
327     }
328   }
329 
330   ctx->ares = n;
331   return 1;
332 }
333 
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const AES_KEY * key,const uint8_t * in,uint8_t * out,size_t len)334 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
335                           const uint8_t *in, uint8_t *out, size_t len) {
336   block128_f block = ctx->gcm_key.block;
337 #ifdef GCM_FUNCREF
338   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
339       ctx->gcm_key.gmult;
340   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
341                       size_t len) = ctx->gcm_key.ghash;
342 #endif
343 
344   uint64_t mlen = ctx->len.u[1] + len;
345   if (mlen > ((UINT64_C(1) << 36) - 32) ||
346       (sizeof(len) == 8 && mlen < len)) {
347     return 0;
348   }
349   ctx->len.u[1] = mlen;
350 
351   if (ctx->ares) {
352     // First call to encrypt finalizes GHASH(AAD)
353     GCM_MUL(ctx, Xi);
354     ctx->ares = 0;
355   }
356 
357   unsigned n = ctx->mres;
358   if (n) {
359     while (n && len) {
360       ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
361       --len;
362       n = (n + 1) % 16;
363     }
364     if (n == 0) {
365       GCM_MUL(ctx, Xi);
366     } else {
367       ctx->mres = n;
368       return 1;
369     }
370   }
371 
372   uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
373   while (len >= GHASH_CHUNK) {
374     size_t j = GHASH_CHUNK;
375 
376     while (j) {
377       (*block)(ctx->Yi.c, ctx->EKi.c, key);
378       ++ctr;
379       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
380       for (size_t i = 0; i < 16; i += sizeof(crypto_word_t)) {
381         CRYPTO_store_word_le(out + i,
382                              CRYPTO_load_word_le(in + i) ^
383                                  ctx->EKi.t[i / sizeof(crypto_word_t)]);
384       }
385       out += 16;
386       in += 16;
387       j -= 16;
388     }
389     GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
390     len -= GHASH_CHUNK;
391   }
392   size_t len_blocks = len & kSizeTWithoutLower4Bits;
393   if (len_blocks != 0) {
394     while (len >= 16) {
395       (*block)(ctx->Yi.c, ctx->EKi.c, key);
396       ++ctr;
397       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
398       for (size_t i = 0; i < 16; i += sizeof(crypto_word_t)) {
399         CRYPTO_store_word_le(out + i,
400                              CRYPTO_load_word_le(in + i) ^
401                                  ctx->EKi.t[i / sizeof(crypto_word_t)]);
402       }
403       out += 16;
404       in += 16;
405       len -= 16;
406     }
407     GHASH(ctx, out - len_blocks, len_blocks);
408   }
409   if (len) {
410     (*block)(ctx->Yi.c, ctx->EKi.c, key);
411     ++ctr;
412     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
413     while (len--) {
414       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
415       ++n;
416     }
417   }
418 
419   ctx->mres = n;
420   return 1;
421 }
422 
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const AES_KEY * key,const unsigned char * in,unsigned char * out,size_t len)423 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
424                           const unsigned char *in, unsigned char *out,
425                           size_t len) {
426   block128_f block = ctx->gcm_key.block;
427 #ifdef GCM_FUNCREF
428   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
429       ctx->gcm_key.gmult;
430   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
431                       size_t len) = ctx->gcm_key.ghash;
432 #endif
433 
434   uint64_t mlen = ctx->len.u[1] + len;
435   if (mlen > ((UINT64_C(1) << 36) - 32) ||
436       (sizeof(len) == 8 && mlen < len)) {
437     return 0;
438   }
439   ctx->len.u[1] = mlen;
440 
441   if (ctx->ares) {
442     // First call to decrypt finalizes GHASH(AAD)
443     GCM_MUL(ctx, Xi);
444     ctx->ares = 0;
445   }
446 
447   unsigned n = ctx->mres;
448   if (n) {
449     while (n && len) {
450       uint8_t c = *(in++);
451       *(out++) = c ^ ctx->EKi.c[n];
452       ctx->Xi.c[n] ^= c;
453       --len;
454       n = (n + 1) % 16;
455     }
456     if (n == 0) {
457       GCM_MUL(ctx, Xi);
458     } else {
459       ctx->mres = n;
460       return 1;
461     }
462   }
463 
464   uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
465   while (len >= GHASH_CHUNK) {
466     size_t j = GHASH_CHUNK;
467 
468     GHASH(ctx, in, GHASH_CHUNK);
469     while (j) {
470       (*block)(ctx->Yi.c, ctx->EKi.c, key);
471       ++ctr;
472       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
473       for (size_t i = 0; i < 16; i += sizeof(crypto_word_t)) {
474         CRYPTO_store_word_le(out + i,
475                              CRYPTO_load_word_le(in + i) ^
476                                  ctx->EKi.t[i / sizeof(crypto_word_t)]);
477       }
478       out += 16;
479       in += 16;
480       j -= 16;
481     }
482     len -= GHASH_CHUNK;
483   }
484   size_t len_blocks = len & kSizeTWithoutLower4Bits;
485   if (len_blocks != 0) {
486     GHASH(ctx, in, len_blocks);
487     while (len >= 16) {
488       (*block)(ctx->Yi.c, ctx->EKi.c, key);
489       ++ctr;
490       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
491       for (size_t i = 0; i < 16; i += sizeof(crypto_word_t)) {
492         CRYPTO_store_word_le(out + i,
493                              CRYPTO_load_word_le(in + i) ^
494                                  ctx->EKi.t[i / sizeof(crypto_word_t)]);
495       }
496       out += 16;
497       in += 16;
498       len -= 16;
499     }
500   }
501   if (len) {
502     (*block)(ctx->Yi.c, ctx->EKi.c, key);
503     ++ctr;
504     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
505     while (len--) {
506       uint8_t c = in[n];
507       ctx->Xi.c[n] ^= c;
508       out[n] = c ^ ctx->EKi.c[n];
509       ++n;
510     }
511   }
512 
513   ctx->mres = n;
514   return 1;
515 }
516 
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const AES_KEY * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)517 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
518                                 const uint8_t *in, uint8_t *out, size_t len,
519                                 ctr128_f stream) {
520 #ifdef GCM_FUNCREF
521   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
522       ctx->gcm_key.gmult;
523   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
524                       size_t len) = ctx->gcm_key.ghash;
525 #endif
526 
527   uint64_t mlen = ctx->len.u[1] + len;
528   if (mlen > ((UINT64_C(1) << 36) - 32) ||
529       (sizeof(len) == 8 && mlen < len)) {
530     return 0;
531   }
532   ctx->len.u[1] = mlen;
533 
534   if (ctx->ares) {
535     // First call to encrypt finalizes GHASH(AAD)
536     GCM_MUL(ctx, Xi);
537     ctx->ares = 0;
538   }
539 
540   unsigned n = ctx->mres;
541   if (n) {
542     while (n && len) {
543       ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
544       --len;
545       n = (n + 1) % 16;
546     }
547     if (n == 0) {
548       GCM_MUL(ctx, Xi);
549     } else {
550       ctx->mres = n;
551       return 1;
552     }
553   }
554 
555 #if defined(AESNI_GCM)
556   // Check |len| to work around a C language bug. See https://crbug.com/1019588.
557   if (ctx->gcm_key.use_aesni_gcm_crypt && len > 0) {
558     // |aesni_gcm_encrypt| may not process all the input given to it. It may
559     // not process *any* of its input if it is deemed too small.
560     size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
561     in += bulk;
562     out += bulk;
563     len -= bulk;
564   }
565 #endif
566 
567   uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
568   while (len >= GHASH_CHUNK) {
569     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
570     ctr += GHASH_CHUNK / 16;
571     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
572     GHASH(ctx, out, GHASH_CHUNK);
573     out += GHASH_CHUNK;
574     in += GHASH_CHUNK;
575     len -= GHASH_CHUNK;
576   }
577   size_t len_blocks = len & kSizeTWithoutLower4Bits;
578   if (len_blocks != 0) {
579     size_t j = len_blocks / 16;
580 
581     (*stream)(in, out, j, key, ctx->Yi.c);
582     ctr += (unsigned int)j;
583     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
584     in += len_blocks;
585     len -= len_blocks;
586     GHASH(ctx, out, len_blocks);
587     out += len_blocks;
588   }
589   if (len) {
590     (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
591     ++ctr;
592     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
593     while (len--) {
594       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
595       ++n;
596     }
597   }
598 
599   ctx->mres = n;
600   return 1;
601 }
602 
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const AES_KEY * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)603 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
604                                 const uint8_t *in, uint8_t *out, size_t len,
605                                 ctr128_f stream) {
606 #ifdef GCM_FUNCREF
607   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
608       ctx->gcm_key.gmult;
609   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
610                       size_t len) = ctx->gcm_key.ghash;
611 #endif
612 
613   uint64_t mlen = ctx->len.u[1] + len;
614   if (mlen > ((UINT64_C(1) << 36) - 32) ||
615       (sizeof(len) == 8 && mlen < len)) {
616     return 0;
617   }
618   ctx->len.u[1] = mlen;
619 
620   if (ctx->ares) {
621     // First call to decrypt finalizes GHASH(AAD)
622     GCM_MUL(ctx, Xi);
623     ctx->ares = 0;
624   }
625 
626   unsigned n = ctx->mres;
627   if (n) {
628     while (n && len) {
629       uint8_t c = *(in++);
630       *(out++) = c ^ ctx->EKi.c[n];
631       ctx->Xi.c[n] ^= c;
632       --len;
633       n = (n + 1) % 16;
634     }
635     if (n == 0) {
636       GCM_MUL(ctx, Xi);
637     } else {
638       ctx->mres = n;
639       return 1;
640     }
641   }
642 
643 #if defined(AESNI_GCM)
644   // Check |len| to work around a C language bug. See https://crbug.com/1019588.
645   if (ctx->gcm_key.use_aesni_gcm_crypt && len > 0) {
646     // |aesni_gcm_decrypt| may not process all the input given to it. It may
647     // not process *any* of its input if it is deemed too small.
648     size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
649     in += bulk;
650     out += bulk;
651     len -= bulk;
652   }
653 #endif
654 
655   uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
656   while (len >= GHASH_CHUNK) {
657     GHASH(ctx, in, GHASH_CHUNK);
658     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
659     ctr += GHASH_CHUNK / 16;
660     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
661     out += GHASH_CHUNK;
662     in += GHASH_CHUNK;
663     len -= GHASH_CHUNK;
664   }
665   size_t len_blocks = len & kSizeTWithoutLower4Bits;
666   if (len_blocks != 0) {
667     size_t j = len_blocks / 16;
668 
669     GHASH(ctx, in, len_blocks);
670     (*stream)(in, out, j, key, ctx->Yi.c);
671     ctr += (unsigned int)j;
672     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
673     out += len_blocks;
674     in += len_blocks;
675     len -= len_blocks;
676   }
677   if (len) {
678     (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
679     ++ctr;
680     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
681     while (len--) {
682       uint8_t c = in[n];
683       ctx->Xi.c[n] ^= c;
684       out[n] = c ^ ctx->EKi.c[n];
685       ++n;
686     }
687   }
688 
689   ctx->mres = n;
690   return 1;
691 }
692 
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const uint8_t * tag,size_t len)693 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
694 #ifdef GCM_FUNCREF
695   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
696       ctx->gcm_key.gmult;
697 #endif
698 
699   if (ctx->mres || ctx->ares) {
700     GCM_MUL(ctx, Xi);
701   }
702 
703   ctx->Xi.u[0] ^= CRYPTO_bswap8(ctx->len.u[0] << 3);
704   ctx->Xi.u[1] ^= CRYPTO_bswap8(ctx->len.u[1] << 3);
705   GCM_MUL(ctx, Xi);
706 
707   ctx->Xi.u[0] ^= ctx->EK0.u[0];
708   ctx->Xi.u[1] ^= ctx->EK0.u[1];
709 
710   if (tag && len <= sizeof(ctx->Xi)) {
711     return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
712   } else {
713     return 0;
714   }
715 }
716 
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)717 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
718   CRYPTO_gcm128_finish(ctx, NULL, 0);
719   OPENSSL_memcpy(tag, ctx->Xi.c,
720                  len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
721 }
722 
723 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
crypto_gcm_clmul_enabled(void)724 int crypto_gcm_clmul_enabled(void) {
725 #if defined(GHASH_ASM_X86) || defined(GHASH_ASM_X86_64)
726   const uint32_t *ia32cap = OPENSSL_ia32cap_get();
727   return (ia32cap[0] & (1 << 24)) &&  // check FXSR bit
728          (ia32cap[1] & (1 << 1));     // check PCLMULQDQ bit
729 #else
730   return 0;
731 #endif
732 }
733 #endif
734