1 /* ====================================================================
2  * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * 3. All advertising materials mentioning features or use of this
17  *    software must display the following acknowledgment:
18  *    "This product includes software developed by the OpenSSL Project
19  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20  *
21  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22  *    endorse or promote products derived from this software without
23  *    prior written permission. For written permission, please contact
24  *    openssl-core@openssl.org.
25  *
26  * 5. Products derived from this software may not be called "OpenSSL"
27  *    nor may "OpenSSL" appear in their names without prior written
28  *    permission of the OpenSSL Project.
29  *
30  * 6. Redistributions of any form whatsoever must retain the following
31  *    acknowledgment:
32  *    "This product includes software developed by the OpenSSL Project
33  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34  *
35  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46  * OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ==================================================================== */
48 
49 #include <openssl/base.h>
50 
51 #include <assert.h>
52 #include <string.h>
53 
54 #include <openssl/mem.h>
55 #include <openssl/cpu.h>
56 
57 #include "internal.h"
58 #include "../../internal.h"
59 
60 
61 // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
62 // bits of a |size_t|.
63 static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
64 
65 
66 #define GCM_MUL(ctx, Xi) gcm_gmult_nohw((ctx)->Xi.u, (ctx)->gcm_key.Htable)
67 #define GHASH(ctx, in, len) \
68   gcm_ghash_nohw((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
69 // GHASH_CHUNK is "stride parameter" missioned to mitigate cache
70 // trashing effect. In other words idea is to hash data while it's
71 // still in L1 cache after encryption pass...
72 #define GHASH_CHUNK (3 * 1024)
73 
74 #if defined(GHASH_ASM_X86_64) || defined(GHASH_ASM_X86)
gcm_reduce_1bit(u128 * V)75 static inline void gcm_reduce_1bit(u128 *V) {
76   if (sizeof(size_t) == 8) {
77     uint64_t T = UINT64_C(0xe100000000000000) & (0 - (V->hi & 1));
78     V->hi = (V->lo << 63) | (V->hi >> 1);
79     V->lo = (V->lo >> 1) ^ T;
80   } else {
81     uint32_t T = 0xe1000000U & (0 - (uint32_t)(V->hi & 1));
82     V->hi = (V->lo << 63) | (V->hi >> 1);
83     V->lo = (V->lo >> 1) ^ ((uint64_t)T << 32);
84   }
85 }
86 
gcm_init_ssse3(u128 Htable[16],const uint64_t H[2])87 void gcm_init_ssse3(u128 Htable[16], const uint64_t H[2]) {
88   Htable[0].hi = 0;
89   Htable[0].lo = 0;
90   u128 V;
91   V.hi = H[1];
92   V.lo = H[0];
93 
94   Htable[8] = V;
95   gcm_reduce_1bit(&V);
96   Htable[4] = V;
97   gcm_reduce_1bit(&V);
98   Htable[2] = V;
99   gcm_reduce_1bit(&V);
100   Htable[1] = V;
101   Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
102   V = Htable[4];
103   Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
104   Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
105   Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
106   V = Htable[8];
107   Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
108   Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
109   Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
110   Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
111   Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
112   Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
113   Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
114 
115   // Treat |Htable| as a 16x16 byte table and transpose it. Thus, Htable[i]
116   // contains the i'th byte of j*H for all j.
117   uint8_t *Hbytes = (uint8_t *)Htable;
118   for (int i = 0; i < 16; i++) {
119     for (int j = 0; j < i; j++) {
120       uint8_t tmp = Hbytes[16*i + j];
121       Hbytes[16*i + j] = Hbytes[16*j + i];
122       Hbytes[16*j + i] = tmp;
123     }
124   }
125 }
126 #endif  // GHASH_ASM_X86_64 || GHASH_ASM_X86
127 
128 #ifdef GCM_FUNCREF
129 #undef GCM_MUL
130 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable)
131 #undef GHASH
132 #define GHASH(ctx, in, len) \
133   (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
134 #endif  // GCM_FUNCREF
135 
CRYPTO_ghash_init(gmult_func * out_mult,ghash_func * out_hash,u128 * out_key,u128 out_table[16],int * out_is_avx,const uint8_t gcm_key[16])136 void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
137                        u128 *out_key, u128 out_table[16], int *out_is_avx,
138                        const uint8_t gcm_key[16]) {
139   *out_is_avx = 0;
140 
141   union {
142     uint64_t u[2];
143     uint8_t c[16];
144   } H;
145 
146   OPENSSL_memcpy(H.c, gcm_key, 16);
147 
148   // H is stored in host byte order
149   H.u[0] = CRYPTO_bswap8(H.u[0]);
150   H.u[1] = CRYPTO_bswap8(H.u[1]);
151 
152   OPENSSL_memcpy(out_key, H.c, 16);
153 
154 #if defined(GHASH_ASM_X86_64)
155   if (crypto_gcm_clmul_enabled()) {
156     if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) {  // AVX+MOVBE
157       gcm_init_avx(out_table, H.u);
158       *out_mult = gcm_gmult_avx;
159       *out_hash = gcm_ghash_avx;
160       *out_is_avx = 1;
161       return;
162     }
163     gcm_init_clmul(out_table, H.u);
164     *out_mult = gcm_gmult_clmul;
165     *out_hash = gcm_ghash_clmul;
166     return;
167   }
168   if (gcm_ssse3_capable()) {
169     gcm_init_ssse3(out_table, H.u);
170     *out_mult = gcm_gmult_ssse3;
171     *out_hash = gcm_ghash_ssse3;
172     return;
173   }
174 #elif defined(GHASH_ASM_X86)
175   if (crypto_gcm_clmul_enabled()) {
176     gcm_init_clmul(out_table, H.u);
177     *out_mult = gcm_gmult_clmul;
178     *out_hash = gcm_ghash_clmul;
179     return;
180   }
181   if (gcm_ssse3_capable()) {
182     gcm_init_ssse3(out_table, H.u);
183     *out_mult = gcm_gmult_ssse3;
184     *out_hash = gcm_ghash_ssse3;
185     return;
186   }
187 #elif defined(GHASH_ASM_ARM)
188   if (gcm_pmull_capable()) {
189     gcm_init_v8(out_table, H.u);
190     *out_mult = gcm_gmult_v8;
191     *out_hash = gcm_ghash_v8;
192     return;
193   }
194 
195   if (gcm_neon_capable()) {
196     gcm_init_neon(out_table, H.u);
197     *out_mult = gcm_gmult_neon;
198     *out_hash = gcm_ghash_neon;
199     return;
200   }
201 #elif defined(GHASH_ASM_PPC64LE)
202   if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
203     gcm_init_p8(out_table, H.u);
204     *out_mult = gcm_gmult_p8;
205     *out_hash = gcm_ghash_p8;
206     return;
207   }
208 #endif
209 
210   gcm_init_nohw(out_table, H.u);
211   *out_mult = gcm_gmult_nohw;
212   *out_hash = gcm_ghash_nohw;
213 }
214 
CRYPTO_gcm128_init_key(GCM128_KEY * gcm_key,const AES_KEY * aes_key,block128_f block,int block_is_hwaes)215 void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key, const AES_KEY *aes_key,
216                             block128_f block, int block_is_hwaes) {
217   OPENSSL_memset(gcm_key, 0, sizeof(*gcm_key));
218   gcm_key->block = block;
219 
220   uint8_t ghash_key[16];
221   OPENSSL_memset(ghash_key, 0, sizeof(ghash_key));
222   (*block)(ghash_key, ghash_key, aes_key);
223 
224   int is_avx;
225   CRYPTO_ghash_init(&gcm_key->gmult, &gcm_key->ghash, &gcm_key->H,
226                     gcm_key->Htable, &is_avx, ghash_key);
227 
228   gcm_key->use_aesni_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
229 }
230 
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const AES_KEY * key,const uint8_t * iv,size_t len)231 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const AES_KEY *key,
232                          const uint8_t *iv, size_t len) {
233 #ifdef GCM_FUNCREF
234   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
235       ctx->gcm_key.gmult;
236 #endif
237 
238   ctx->Yi.u[0] = 0;
239   ctx->Yi.u[1] = 0;
240   ctx->Xi.u[0] = 0;
241   ctx->Xi.u[1] = 0;
242   ctx->len.u[0] = 0;  // AAD length
243   ctx->len.u[1] = 0;  // message length
244   ctx->ares = 0;
245   ctx->mres = 0;
246 
247   uint32_t ctr;
248   if (len == 12) {
249     OPENSSL_memcpy(ctx->Yi.c, iv, 12);
250     ctx->Yi.c[15] = 1;
251     ctr = 1;
252   } else {
253     uint64_t len0 = len;
254 
255     while (len >= 16) {
256       for (size_t i = 0; i < 16; ++i) {
257         ctx->Yi.c[i] ^= iv[i];
258       }
259       GCM_MUL(ctx, Yi);
260       iv += 16;
261       len -= 16;
262     }
263     if (len) {
264       for (size_t i = 0; i < len; ++i) {
265         ctx->Yi.c[i] ^= iv[i];
266       }
267       GCM_MUL(ctx, Yi);
268     }
269     len0 <<= 3;
270     ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
271 
272     GCM_MUL(ctx, Yi);
273     ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
274   }
275 
276   (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EK0.c, key);
277   ++ctr;
278   ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
279 }
280 
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const uint8_t * aad,size_t len)281 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
282 #ifdef GCM_FUNCREF
283   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
284       ctx->gcm_key.gmult;
285   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
286                       size_t len) = ctx->gcm_key.ghash;
287 #endif
288 
289   if (ctx->len.u[1]) {
290     return 0;
291   }
292 
293   uint64_t alen = ctx->len.u[0] + len;
294   if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
295     return 0;
296   }
297   ctx->len.u[0] = alen;
298 
299   unsigned n = ctx->ares;
300   if (n) {
301     while (n && len) {
302       ctx->Xi.c[n] ^= *(aad++);
303       --len;
304       n = (n + 1) % 16;
305     }
306     if (n == 0) {
307       GCM_MUL(ctx, Xi);
308     } else {
309       ctx->ares = n;
310       return 1;
311     }
312   }
313 
314   // Process a whole number of blocks.
315   size_t len_blocks = len & kSizeTWithoutLower4Bits;
316   if (len_blocks != 0) {
317     GHASH(ctx, aad, len_blocks);
318     aad += len_blocks;
319     len -= len_blocks;
320   }
321 
322   // Process the remainder.
323   if (len != 0) {
324     n = (unsigned int)len;
325     for (size_t i = 0; i < len; ++i) {
326       ctx->Xi.c[i] ^= aad[i];
327     }
328   }
329 
330   ctx->ares = n;
331   return 1;
332 }
333 
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const AES_KEY * key,const uint8_t * in,uint8_t * out,size_t len)334 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
335                           const uint8_t *in, uint8_t *out, size_t len) {
336   block128_f block = ctx->gcm_key.block;
337 #ifdef GCM_FUNCREF
338   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
339       ctx->gcm_key.gmult;
340   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
341                       size_t len) = ctx->gcm_key.ghash;
342 #endif
343 
344   uint64_t mlen = ctx->len.u[1] + len;
345   if (mlen > ((UINT64_C(1) << 36) - 32) ||
346       (sizeof(len) == 8 && mlen < len)) {
347     return 0;
348   }
349   ctx->len.u[1] = mlen;
350 
351   if (ctx->ares) {
352     // First call to encrypt finalizes GHASH(AAD)
353     GCM_MUL(ctx, Xi);
354     ctx->ares = 0;
355   }
356 
357   unsigned n = ctx->mres;
358   if (n) {
359     while (n && len) {
360       ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
361       --len;
362       n = (n + 1) % 16;
363     }
364     if (n == 0) {
365       GCM_MUL(ctx, Xi);
366     } else {
367       ctx->mres = n;
368       return 1;
369     }
370   }
371 
372   uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
373   while (len >= GHASH_CHUNK) {
374     size_t j = GHASH_CHUNK;
375 
376     while (j) {
377       (*block)(ctx->Yi.c, ctx->EKi.c, key);
378       ++ctr;
379       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
380       for (size_t i = 0; i < 16; i += sizeof(size_t)) {
381         store_word_le(out + i,
382                       load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
383       }
384       out += 16;
385       in += 16;
386       j -= 16;
387     }
388     GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
389     len -= GHASH_CHUNK;
390   }
391   size_t len_blocks = len & kSizeTWithoutLower4Bits;
392   if (len_blocks != 0) {
393     while (len >= 16) {
394       (*block)(ctx->Yi.c, ctx->EKi.c, key);
395       ++ctr;
396       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
397       for (size_t i = 0; i < 16; i += sizeof(size_t)) {
398         store_word_le(out + i,
399                       load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
400       }
401       out += 16;
402       in += 16;
403       len -= 16;
404     }
405     GHASH(ctx, out - len_blocks, len_blocks);
406   }
407   if (len) {
408     (*block)(ctx->Yi.c, ctx->EKi.c, key);
409     ++ctr;
410     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
411     while (len--) {
412       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
413       ++n;
414     }
415   }
416 
417   ctx->mres = n;
418   return 1;
419 }
420 
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const AES_KEY * key,const unsigned char * in,unsigned char * out,size_t len)421 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
422                           const unsigned char *in, unsigned char *out,
423                           size_t len) {
424   block128_f block = ctx->gcm_key.block;
425 #ifdef GCM_FUNCREF
426   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
427       ctx->gcm_key.gmult;
428   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
429                       size_t len) = ctx->gcm_key.ghash;
430 #endif
431 
432   uint64_t mlen = ctx->len.u[1] + len;
433   if (mlen > ((UINT64_C(1) << 36) - 32) ||
434       (sizeof(len) == 8 && mlen < len)) {
435     return 0;
436   }
437   ctx->len.u[1] = mlen;
438 
439   if (ctx->ares) {
440     // First call to decrypt finalizes GHASH(AAD)
441     GCM_MUL(ctx, Xi);
442     ctx->ares = 0;
443   }
444 
445   unsigned n = ctx->mres;
446   if (n) {
447     while (n && len) {
448       uint8_t c = *(in++);
449       *(out++) = c ^ ctx->EKi.c[n];
450       ctx->Xi.c[n] ^= c;
451       --len;
452       n = (n + 1) % 16;
453     }
454     if (n == 0) {
455       GCM_MUL(ctx, Xi);
456     } else {
457       ctx->mres = n;
458       return 1;
459     }
460   }
461 
462   uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
463   while (len >= GHASH_CHUNK) {
464     size_t j = GHASH_CHUNK;
465 
466     GHASH(ctx, in, GHASH_CHUNK);
467     while (j) {
468       (*block)(ctx->Yi.c, ctx->EKi.c, key);
469       ++ctr;
470       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
471       for (size_t i = 0; i < 16; i += sizeof(size_t)) {
472         store_word_le(out + i,
473                       load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
474       }
475       out += 16;
476       in += 16;
477       j -= 16;
478     }
479     len -= GHASH_CHUNK;
480   }
481   size_t len_blocks = len & kSizeTWithoutLower4Bits;
482   if (len_blocks != 0) {
483     GHASH(ctx, in, len_blocks);
484     while (len >= 16) {
485       (*block)(ctx->Yi.c, ctx->EKi.c, key);
486       ++ctr;
487       ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
488       for (size_t i = 0; i < 16; i += sizeof(size_t)) {
489         store_word_le(out + i,
490                       load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
491       }
492       out += 16;
493       in += 16;
494       len -= 16;
495     }
496   }
497   if (len) {
498     (*block)(ctx->Yi.c, ctx->EKi.c, key);
499     ++ctr;
500     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
501     while (len--) {
502       uint8_t c = in[n];
503       ctx->Xi.c[n] ^= c;
504       out[n] = c ^ ctx->EKi.c[n];
505       ++n;
506     }
507   }
508 
509   ctx->mres = n;
510   return 1;
511 }
512 
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const AES_KEY * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)513 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
514                                 const uint8_t *in, uint8_t *out, size_t len,
515                                 ctr128_f stream) {
516 #ifdef GCM_FUNCREF
517   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
518       ctx->gcm_key.gmult;
519   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
520                       size_t len) = ctx->gcm_key.ghash;
521 #endif
522 
523   uint64_t mlen = ctx->len.u[1] + len;
524   if (mlen > ((UINT64_C(1) << 36) - 32) ||
525       (sizeof(len) == 8 && mlen < len)) {
526     return 0;
527   }
528   ctx->len.u[1] = mlen;
529 
530   if (ctx->ares) {
531     // First call to encrypt finalizes GHASH(AAD)
532     GCM_MUL(ctx, Xi);
533     ctx->ares = 0;
534   }
535 
536   unsigned n = ctx->mres;
537   if (n) {
538     while (n && len) {
539       ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
540       --len;
541       n = (n + 1) % 16;
542     }
543     if (n == 0) {
544       GCM_MUL(ctx, Xi);
545     } else {
546       ctx->mres = n;
547       return 1;
548     }
549   }
550 
551 #if defined(AESNI_GCM)
552   // Check |len| to work around a C language bug. See https://crbug.com/1019588.
553   if (ctx->gcm_key.use_aesni_gcm_crypt && len > 0) {
554     // |aesni_gcm_encrypt| may not process all the input given to it. It may
555     // not process *any* of its input if it is deemed too small.
556     size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
557     in += bulk;
558     out += bulk;
559     len -= bulk;
560   }
561 #endif
562 
563   uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
564   while (len >= GHASH_CHUNK) {
565     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
566     ctr += GHASH_CHUNK / 16;
567     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
568     GHASH(ctx, out, GHASH_CHUNK);
569     out += GHASH_CHUNK;
570     in += GHASH_CHUNK;
571     len -= GHASH_CHUNK;
572   }
573   size_t len_blocks = len & kSizeTWithoutLower4Bits;
574   if (len_blocks != 0) {
575     size_t j = len_blocks / 16;
576 
577     (*stream)(in, out, j, key, ctx->Yi.c);
578     ctr += (unsigned int)j;
579     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
580     in += len_blocks;
581     len -= len_blocks;
582     GHASH(ctx, out, len_blocks);
583     out += len_blocks;
584   }
585   if (len) {
586     (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
587     ++ctr;
588     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
589     while (len--) {
590       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
591       ++n;
592     }
593   }
594 
595   ctx->mres = n;
596   return 1;
597 }
598 
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const AES_KEY * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)599 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
600                                 const uint8_t *in, uint8_t *out, size_t len,
601                                 ctr128_f stream) {
602 #ifdef GCM_FUNCREF
603   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
604       ctx->gcm_key.gmult;
605   void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
606                       size_t len) = ctx->gcm_key.ghash;
607 #endif
608 
609   uint64_t mlen = ctx->len.u[1] + len;
610   if (mlen > ((UINT64_C(1) << 36) - 32) ||
611       (sizeof(len) == 8 && mlen < len)) {
612     return 0;
613   }
614   ctx->len.u[1] = mlen;
615 
616   if (ctx->ares) {
617     // First call to decrypt finalizes GHASH(AAD)
618     GCM_MUL(ctx, Xi);
619     ctx->ares = 0;
620   }
621 
622   unsigned n = ctx->mres;
623   if (n) {
624     while (n && len) {
625       uint8_t c = *(in++);
626       *(out++) = c ^ ctx->EKi.c[n];
627       ctx->Xi.c[n] ^= c;
628       --len;
629       n = (n + 1) % 16;
630     }
631     if (n == 0) {
632       GCM_MUL(ctx, Xi);
633     } else {
634       ctx->mres = n;
635       return 1;
636     }
637   }
638 
639 #if defined(AESNI_GCM)
640   // Check |len| to work around a C language bug. See https://crbug.com/1019588.
641   if (ctx->gcm_key.use_aesni_gcm_crypt && len > 0) {
642     // |aesni_gcm_decrypt| may not process all the input given to it. It may
643     // not process *any* of its input if it is deemed too small.
644     size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
645     in += bulk;
646     out += bulk;
647     len -= bulk;
648   }
649 #endif
650 
651   uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
652   while (len >= GHASH_CHUNK) {
653     GHASH(ctx, in, GHASH_CHUNK);
654     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
655     ctr += GHASH_CHUNK / 16;
656     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
657     out += GHASH_CHUNK;
658     in += GHASH_CHUNK;
659     len -= GHASH_CHUNK;
660   }
661   size_t len_blocks = len & kSizeTWithoutLower4Bits;
662   if (len_blocks != 0) {
663     size_t j = len_blocks / 16;
664 
665     GHASH(ctx, in, len_blocks);
666     (*stream)(in, out, j, key, ctx->Yi.c);
667     ctr += (unsigned int)j;
668     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
669     out += len_blocks;
670     in += len_blocks;
671     len -= len_blocks;
672   }
673   if (len) {
674     (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
675     ++ctr;
676     ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
677     while (len--) {
678       uint8_t c = in[n];
679       ctx->Xi.c[n] ^= c;
680       out[n] = c ^ ctx->EKi.c[n];
681       ++n;
682     }
683   }
684 
685   ctx->mres = n;
686   return 1;
687 }
688 
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const uint8_t * tag,size_t len)689 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
690 #ifdef GCM_FUNCREF
691   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
692       ctx->gcm_key.gmult;
693 #endif
694 
695   if (ctx->mres || ctx->ares) {
696     GCM_MUL(ctx, Xi);
697   }
698 
699   ctx->Xi.u[0] ^= CRYPTO_bswap8(ctx->len.u[0] << 3);
700   ctx->Xi.u[1] ^= CRYPTO_bswap8(ctx->len.u[1] << 3);
701   GCM_MUL(ctx, Xi);
702 
703   ctx->Xi.u[0] ^= ctx->EK0.u[0];
704   ctx->Xi.u[1] ^= ctx->EK0.u[1];
705 
706   if (tag && len <= sizeof(ctx->Xi)) {
707     return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
708   } else {
709     return 0;
710   }
711 }
712 
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)713 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
714   CRYPTO_gcm128_finish(ctx, NULL, 0);
715   OPENSSL_memcpy(tag, ctx->Xi.c,
716                  len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
717 }
718 
719 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
crypto_gcm_clmul_enabled(void)720 int crypto_gcm_clmul_enabled(void) {
721 #if defined(GHASH_ASM_X86) || defined(GHASH_ASM_X86_64)
722   const uint32_t *ia32cap = OPENSSL_ia32cap_get();
723   return (ia32cap[0] & (1 << 24)) &&  // check FXSR bit
724          (ia32cap[1] & (1 << 1));     // check PCLMULQDQ bit
725 #else
726   return 0;
727 #endif
728 }
729 #endif
730