1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50 #define OPENSSL_FIPSAPI
51
52 #include <openssl/crypto.h>
53 #include "modes_lcl.h"
54 #include <string.h>
55
56 #ifndef MODES_DEBUG
57 # ifndef NDEBUG
58 # define NDEBUG
59 # endif
60 #endif
61 #include <assert.h>
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 # undef GETU32
66 # define GETU32(p) BSWAP4(*(const u32 *)(p))
67 # undef PUTU32
68 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83 } while(0)
84
85 /*-
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
119 #if TABLE_BITS==8
120
gcm_init_8bit(u128 Htable[256],u64 H[2])121 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122 {
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
132 REDUCE1BIT(V);
133 Htable[i] = V;
134 }
135
136 for (i = 2; i < 256; i <<= 1) {
137 u128 *Hi = Htable + i, H0 = *Hi;
138 for (j = 1; j < i; ++j) {
139 Hi[j].hi = H0.hi ^ Htable[j].hi;
140 Hi[j].lo = H0.lo ^ Htable[j].lo;
141 }
142 }
143 }
144
gcm_gmult_8bit(u64 Xi[2],const u128 Htable[256])145 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146 {
147 u128 Z = { 0, 0 };
148 const u8 *xi = (const u8 *)Xi + 15;
149 size_t rem, n = *xi;
150 const union {
151 long one;
152 char little;
153 } is_endian = {
154 1
155 };
156 static const size_t rem_8bit[256] = {
157 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
158 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
159 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
160 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
161 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
162 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
163 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
164 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
165 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
166 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
167 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
168 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
169 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
170 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
171 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
172 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
173 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
174 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
175 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
176 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
177 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
178 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
179 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
180 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
181 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
182 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
183 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
184 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
185 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
186 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
187 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
188 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
189 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
190 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
191 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
192 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
193 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
194 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
195 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
196 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
197 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
198 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
199 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
200 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
201 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
202 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
203 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
204 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
205 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
206 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
207 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
208 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
209 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
210 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
211 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
212 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
213 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
214 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
215 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
216 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
217 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
218 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
219 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
220 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
221 };
222
223 while (1) {
224 Z.hi ^= Htable[n].hi;
225 Z.lo ^= Htable[n].lo;
226
227 if ((u8 *)Xi == xi)
228 break;
229
230 n = *(--xi);
231
232 rem = (size_t)Z.lo & 0xff;
233 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
234 Z.hi = (Z.hi >> 8);
235 if (sizeof(size_t) == 8)
236 Z.hi ^= rem_8bit[rem];
237 else
238 Z.hi ^= (u64)rem_8bit[rem] << 32;
239 }
240
241 if (is_endian.little) {
242 # ifdef BSWAP8
243 Xi[0] = BSWAP8(Z.hi);
244 Xi[1] = BSWAP8(Z.lo);
245 # else
246 u8 *p = (u8 *)Xi;
247 u32 v;
248 v = (u32)(Z.hi >> 32);
249 PUTU32(p, v);
250 v = (u32)(Z.hi);
251 PUTU32(p + 4, v);
252 v = (u32)(Z.lo >> 32);
253 PUTU32(p + 8, v);
254 v = (u32)(Z.lo);
255 PUTU32(p + 12, v);
256 # endif
257 } else {
258 Xi[0] = Z.hi;
259 Xi[1] = Z.lo;
260 }
261 }
262
263 # define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
264
265 #elif TABLE_BITS==4
266
gcm_init_4bit(u128 Htable[16],u64 H[2])267 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
268 {
269 u128 V;
270 # if defined(OPENSSL_SMALL_FOOTPRINT)
271 int i;
272 # endif
273
274 Htable[0].hi = 0;
275 Htable[0].lo = 0;
276 V.hi = H[0];
277 V.lo = H[1];
278
279 # if defined(OPENSSL_SMALL_FOOTPRINT)
280 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
281 REDUCE1BIT(V);
282 Htable[i] = V;
283 }
284
285 for (i = 2; i < 16; i <<= 1) {
286 u128 *Hi = Htable + i;
287 int j;
288 for (V = *Hi, j = 1; j < i; ++j) {
289 Hi[j].hi = V.hi ^ Htable[j].hi;
290 Hi[j].lo = V.lo ^ Htable[j].lo;
291 }
292 }
293 # else
294 Htable[8] = V;
295 REDUCE1BIT(V);
296 Htable[4] = V;
297 REDUCE1BIT(V);
298 Htable[2] = V;
299 REDUCE1BIT(V);
300 Htable[1] = V;
301 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
302 V = Htable[4];
303 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
304 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
305 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
306 V = Htable[8];
307 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
308 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
309 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
310 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
311 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
312 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
313 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
314 # endif
315 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
316 /*
317 * ARM assembler expects specific dword order in Htable.
318 */
319 {
320 int j;
321 const union {
322 long one;
323 char little;
324 } is_endian = {
325 1
326 };
327
328 if (is_endian.little)
329 for (j = 0; j < 16; ++j) {
330 V = Htable[j];
331 Htable[j].hi = V.lo;
332 Htable[j].lo = V.hi;
333 } else
334 for (j = 0; j < 16; ++j) {
335 V = Htable[j];
336 Htable[j].hi = V.lo << 32 | V.lo >> 32;
337 Htable[j].lo = V.hi << 32 | V.hi >> 32;
338 }
339 }
340 # endif
341 }
342
343 # ifndef GHASH_ASM
344 static const size_t rem_4bit[16] = {
345 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
346 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
347 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
348 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
349 };
350
gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])351 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
352 {
353 u128 Z;
354 int cnt = 15;
355 size_t rem, nlo, nhi;
356 const union {
357 long one;
358 char little;
359 } is_endian = {
360 1
361 };
362
363 nlo = ((const u8 *)Xi)[15];
364 nhi = nlo >> 4;
365 nlo &= 0xf;
366
367 Z.hi = Htable[nlo].hi;
368 Z.lo = Htable[nlo].lo;
369
370 while (1) {
371 rem = (size_t)Z.lo & 0xf;
372 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
373 Z.hi = (Z.hi >> 4);
374 if (sizeof(size_t) == 8)
375 Z.hi ^= rem_4bit[rem];
376 else
377 Z.hi ^= (u64)rem_4bit[rem] << 32;
378
379 Z.hi ^= Htable[nhi].hi;
380 Z.lo ^= Htable[nhi].lo;
381
382 if (--cnt < 0)
383 break;
384
385 nlo = ((const u8 *)Xi)[cnt];
386 nhi = nlo >> 4;
387 nlo &= 0xf;
388
389 rem = (size_t)Z.lo & 0xf;
390 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
391 Z.hi = (Z.hi >> 4);
392 if (sizeof(size_t) == 8)
393 Z.hi ^= rem_4bit[rem];
394 else
395 Z.hi ^= (u64)rem_4bit[rem] << 32;
396
397 Z.hi ^= Htable[nlo].hi;
398 Z.lo ^= Htable[nlo].lo;
399 }
400
401 if (is_endian.little) {
402 # ifdef BSWAP8
403 Xi[0] = BSWAP8(Z.hi);
404 Xi[1] = BSWAP8(Z.lo);
405 # else
406 u8 *p = (u8 *)Xi;
407 u32 v;
408 v = (u32)(Z.hi >> 32);
409 PUTU32(p, v);
410 v = (u32)(Z.hi);
411 PUTU32(p + 4, v);
412 v = (u32)(Z.lo >> 32);
413 PUTU32(p + 8, v);
414 v = (u32)(Z.lo);
415 PUTU32(p + 12, v);
416 # endif
417 } else {
418 Xi[0] = Z.hi;
419 Xi[1] = Z.lo;
420 }
421 }
422
423 # if !defined(OPENSSL_SMALL_FOOTPRINT)
424 /*
425 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
426 * details... Compiler-generated code doesn't seem to give any
427 * performance improvement, at least not on x86[_64]. It's here
428 * mostly as reference and a placeholder for possible future
429 * non-trivial optimization[s]...
430 */
gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)431 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
432 const u8 *inp, size_t len)
433 {
434 u128 Z;
435 int cnt;
436 size_t rem, nlo, nhi;
437 const union {
438 long one;
439 char little;
440 } is_endian = {
441 1
442 };
443
444 # if 1
445 do {
446 cnt = 15;
447 nlo = ((const u8 *)Xi)[15];
448 nlo ^= inp[15];
449 nhi = nlo >> 4;
450 nlo &= 0xf;
451
452 Z.hi = Htable[nlo].hi;
453 Z.lo = Htable[nlo].lo;
454
455 while (1) {
456 rem = (size_t)Z.lo & 0xf;
457 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
458 Z.hi = (Z.hi >> 4);
459 if (sizeof(size_t) == 8)
460 Z.hi ^= rem_4bit[rem];
461 else
462 Z.hi ^= (u64)rem_4bit[rem] << 32;
463
464 Z.hi ^= Htable[nhi].hi;
465 Z.lo ^= Htable[nhi].lo;
466
467 if (--cnt < 0)
468 break;
469
470 nlo = ((const u8 *)Xi)[cnt];
471 nlo ^= inp[cnt];
472 nhi = nlo >> 4;
473 nlo &= 0xf;
474
475 rem = (size_t)Z.lo & 0xf;
476 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
477 Z.hi = (Z.hi >> 4);
478 if (sizeof(size_t) == 8)
479 Z.hi ^= rem_4bit[rem];
480 else
481 Z.hi ^= (u64)rem_4bit[rem] << 32;
482
483 Z.hi ^= Htable[nlo].hi;
484 Z.lo ^= Htable[nlo].lo;
485 }
486 # else
487 /*
488 * Extra 256+16 bytes per-key plus 512 bytes shared tables
489 * [should] give ~50% improvement... One could have PACK()-ed
490 * the rem_8bit even here, but the priority is to minimize
491 * cache footprint...
492 */
493 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
494 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
495 static const unsigned short rem_8bit[256] = {
496 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
497 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
498 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
499 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
500 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
501 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
502 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
503 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
504 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
505 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
506 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
507 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
508 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
509 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
510 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
511 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
512 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
513 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
514 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
515 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
516 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
517 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
518 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
519 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
520 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
521 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
522 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
523 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
524 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
525 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
526 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
527 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
528 };
529 /*
530 * This pre-processing phase slows down procedure by approximately
531 * same time as it makes each loop spin faster. In other words
532 * single block performance is approximately same as straightforward
533 * "4-bit" implementation, and then it goes only faster...
534 */
535 for (cnt = 0; cnt < 16; ++cnt) {
536 Z.hi = Htable[cnt].hi;
537 Z.lo = Htable[cnt].lo;
538 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
539 Hshr4[cnt].hi = (Z.hi >> 4);
540 Hshl4[cnt] = (u8)(Z.lo << 4);
541 }
542
543 do {
544 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
545 nlo = ((const u8 *)Xi)[cnt];
546 nlo ^= inp[cnt];
547 nhi = nlo >> 4;
548 nlo &= 0xf;
549
550 Z.hi ^= Htable[nlo].hi;
551 Z.lo ^= Htable[nlo].lo;
552
553 rem = (size_t)Z.lo & 0xff;
554
555 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
556 Z.hi = (Z.hi >> 8);
557
558 Z.hi ^= Hshr4[nhi].hi;
559 Z.lo ^= Hshr4[nhi].lo;
560 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
561 }
562
563 nlo = ((const u8 *)Xi)[0];
564 nlo ^= inp[0];
565 nhi = nlo >> 4;
566 nlo &= 0xf;
567
568 Z.hi ^= Htable[nlo].hi;
569 Z.lo ^= Htable[nlo].lo;
570
571 rem = (size_t)Z.lo & 0xf;
572
573 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
574 Z.hi = (Z.hi >> 4);
575
576 Z.hi ^= Htable[nhi].hi;
577 Z.lo ^= Htable[nhi].lo;
578 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
579 # endif
580
581 if (is_endian.little) {
582 # ifdef BSWAP8
583 Xi[0] = BSWAP8(Z.hi);
584 Xi[1] = BSWAP8(Z.lo);
585 # else
586 u8 *p = (u8 *)Xi;
587 u32 v;
588 v = (u32)(Z.hi >> 32);
589 PUTU32(p, v);
590 v = (u32)(Z.hi);
591 PUTU32(p + 4, v);
592 v = (u32)(Z.lo >> 32);
593 PUTU32(p + 8, v);
594 v = (u32)(Z.lo);
595 PUTU32(p + 12, v);
596 # endif
597 } else {
598 Xi[0] = Z.hi;
599 Xi[1] = Z.lo;
600 }
601 } while (inp += 16, len -= 16);
602 }
603 # endif
604 # else
605 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
606 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
607 size_t len);
608 # endif
609
610 # define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
611 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
612 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
613 /*
614 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
615 * effect. In other words idea is to hash data while it's still in L1 cache
616 * after encryption pass...
617 */
618 # define GHASH_CHUNK (3*1024)
619 # endif
620
621 #else /* TABLE_BITS */
622
623 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
624 {
625 u128 V, Z = { 0, 0 };
626 long X;
627 int i, j;
628 const long *xi = (const long *)Xi;
629 const union {
630 long one;
631 char little;
632 } is_endian = {
633 1
634 };
635
636 V.hi = H[0]; /* H is in host byte order, no byte swapping */
637 V.lo = H[1];
638
639 for (j = 0; j < 16 / sizeof(long); ++j) {
640 if (is_endian.little) {
641 if (sizeof(long) == 8) {
642 # ifdef BSWAP8
643 X = (long)(BSWAP8(xi[j]));
644 # else
645 const u8 *p = (const u8 *)(xi + j);
646 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
647 # endif
648 } else {
649 const u8 *p = (const u8 *)(xi + j);
650 X = (long)GETU32(p);
651 }
652 } else
653 X = xi[j];
654
655 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
656 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
657 Z.hi ^= V.hi & M;
658 Z.lo ^= V.lo & M;
659
660 REDUCE1BIT(V);
661 }
662 }
663
664 if (is_endian.little) {
665 # ifdef BSWAP8
666 Xi[0] = BSWAP8(Z.hi);
667 Xi[1] = BSWAP8(Z.lo);
668 # else
669 u8 *p = (u8 *)Xi;
670 u32 v;
671 v = (u32)(Z.hi >> 32);
672 PUTU32(p, v);
673 v = (u32)(Z.hi);
674 PUTU32(p + 4, v);
675 v = (u32)(Z.lo >> 32);
676 PUTU32(p + 8, v);
677 v = (u32)(Z.lo);
678 PUTU32(p + 12, v);
679 # endif
680 } else {
681 Xi[0] = Z.hi;
682 Xi[1] = Z.lo;
683 }
684 }
685
686 # define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
687
688 #endif
689
690 #if TABLE_BITS==4 && defined(GHASH_ASM)
691 # if !defined(I386_ONLY) && \
692 (defined(__i386) || defined(__i386__) || \
693 defined(__x86_64) || defined(__x86_64__) || \
694 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
695 # define GHASH_ASM_X86_OR_64
696 # define GCM_FUNCREF_4BIT
697 extern unsigned int OPENSSL_ia32cap_P[2];
698
699 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
700 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
701 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
702 size_t len);
703
704 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
705 # define GHASH_ASM_X86
706 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
707 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
708 size_t len);
709
710 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
711 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
712 size_t len);
713 # endif
714 # elif defined(__arm__) || defined(__arm)
715 # include "arm_arch.h"
716 # if __ARM_ARCH__>=7
717 # define GHASH_ASM_ARM
718 # define GCM_FUNCREF_4BIT
719 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
720 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
721 size_t len);
722 # endif
723 # endif
724 #endif
725
726 #ifdef GCM_FUNCREF_4BIT
727 # undef GCM_MUL
728 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
729 # ifdef GHASH
730 # undef GHASH
731 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
732 # endif
733 #endif
734
735 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
736 {
737 const union {
738 long one;
739 char little;
740 } is_endian = {
741 1
742 };
743
744 memset(ctx, 0, sizeof(*ctx));
745 ctx->block = block;
746 ctx->key = key;
747
748 (*block) (ctx->H.c, ctx->H.c, key);
749
750 if (is_endian.little) {
751 /* H is stored in host byte order */
752 #ifdef BSWAP8
753 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
754 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
755 #else
756 u8 *p = ctx->H.c;
757 u64 hi, lo;
758 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
759 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
760 ctx->H.u[0] = hi;
761 ctx->H.u[1] = lo;
762 #endif
763 }
764 #if TABLE_BITS==8
765 gcm_init_8bit(ctx->Htable, ctx->H.u);
766 #elif TABLE_BITS==4
767 # if defined(GHASH_ASM_X86_OR_64)
768 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
769 if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
770 OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
771 gcm_init_clmul(ctx->Htable, ctx->H.u);
772 ctx->gmult = gcm_gmult_clmul;
773 ctx->ghash = gcm_ghash_clmul;
774 return;
775 }
776 # endif
777 gcm_init_4bit(ctx->Htable, ctx->H.u);
778 # if defined(GHASH_ASM_X86) /* x86 only */
779 # if defined(OPENSSL_IA32_SSE2)
780 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
781 # else
782 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
783 # endif
784 ctx->gmult = gcm_gmult_4bit_mmx;
785 ctx->ghash = gcm_ghash_4bit_mmx;
786 } else {
787 ctx->gmult = gcm_gmult_4bit_x86;
788 ctx->ghash = gcm_ghash_4bit_x86;
789 }
790 # else
791 ctx->gmult = gcm_gmult_4bit;
792 ctx->ghash = gcm_ghash_4bit;
793 # endif
794 # elif defined(GHASH_ASM_ARM)
795 if (OPENSSL_armcap_P & ARMV7_NEON) {
796 ctx->gmult = gcm_gmult_neon;
797 ctx->ghash = gcm_ghash_neon;
798 } else {
799 gcm_init_4bit(ctx->Htable, ctx->H.u);
800 ctx->gmult = gcm_gmult_4bit;
801 ctx->ghash = gcm_ghash_4bit;
802 }
803 # else
804 gcm_init_4bit(ctx->Htable, ctx->H.u);
805 # endif
806 #endif
807 }
808
809 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
810 size_t len)
811 {
812 const union {
813 long one;
814 char little;
815 } is_endian = {
816 1
817 };
818 unsigned int ctr;
819 #ifdef GCM_FUNCREF_4BIT
820 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
821 #endif
822
823 ctx->Yi.u[0] = 0;
824 ctx->Yi.u[1] = 0;
825 ctx->Xi.u[0] = 0;
826 ctx->Xi.u[1] = 0;
827 ctx->len.u[0] = 0; /* AAD length */
828 ctx->len.u[1] = 0; /* message length */
829 ctx->ares = 0;
830 ctx->mres = 0;
831
832 if (len == 12) {
833 memcpy(ctx->Yi.c, iv, 12);
834 ctx->Yi.c[15] = 1;
835 ctr = 1;
836 } else {
837 size_t i;
838 u64 len0 = len;
839
840 while (len >= 16) {
841 for (i = 0; i < 16; ++i)
842 ctx->Yi.c[i] ^= iv[i];
843 GCM_MUL(ctx, Yi);
844 iv += 16;
845 len -= 16;
846 }
847 if (len) {
848 for (i = 0; i < len; ++i)
849 ctx->Yi.c[i] ^= iv[i];
850 GCM_MUL(ctx, Yi);
851 }
852 len0 <<= 3;
853 if (is_endian.little) {
854 #ifdef BSWAP8
855 ctx->Yi.u[1] ^= BSWAP8(len0);
856 #else
857 ctx->Yi.c[8] ^= (u8)(len0 >> 56);
858 ctx->Yi.c[9] ^= (u8)(len0 >> 48);
859 ctx->Yi.c[10] ^= (u8)(len0 >> 40);
860 ctx->Yi.c[11] ^= (u8)(len0 >> 32);
861 ctx->Yi.c[12] ^= (u8)(len0 >> 24);
862 ctx->Yi.c[13] ^= (u8)(len0 >> 16);
863 ctx->Yi.c[14] ^= (u8)(len0 >> 8);
864 ctx->Yi.c[15] ^= (u8)(len0);
865 #endif
866 } else
867 ctx->Yi.u[1] ^= len0;
868
869 GCM_MUL(ctx, Yi);
870
871 if (is_endian.little)
872 #ifdef BSWAP4
873 ctr = BSWAP4(ctx->Yi.d[3]);
874 #else
875 ctr = GETU32(ctx->Yi.c + 12);
876 #endif
877 else
878 ctr = ctx->Yi.d[3];
879 }
880
881 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
882 ++ctr;
883 if (is_endian.little)
884 #ifdef BSWAP4
885 ctx->Yi.d[3] = BSWAP4(ctr);
886 #else
887 PUTU32(ctx->Yi.c + 12, ctr);
888 #endif
889 else
890 ctx->Yi.d[3] = ctr;
891 }
892
893 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
894 size_t len)
895 {
896 size_t i;
897 unsigned int n;
898 u64 alen = ctx->len.u[0];
899 #ifdef GCM_FUNCREF_4BIT
900 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
901 # ifdef GHASH
902 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
903 const u8 *inp, size_t len) = ctx->ghash;
904 # endif
905 #endif
906
907 if (ctx->len.u[1])
908 return -2;
909
910 alen += len;
911 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
912 return -1;
913 ctx->len.u[0] = alen;
914
915 n = ctx->ares;
916 if (n) {
917 while (n && len) {
918 ctx->Xi.c[n] ^= *(aad++);
919 --len;
920 n = (n + 1) % 16;
921 }
922 if (n == 0)
923 GCM_MUL(ctx, Xi);
924 else {
925 ctx->ares = n;
926 return 0;
927 }
928 }
929 #ifdef GHASH
930 if ((i = (len & (size_t)-16))) {
931 GHASH(ctx, aad, i);
932 aad += i;
933 len -= i;
934 }
935 #else
936 while (len >= 16) {
937 for (i = 0; i < 16; ++i)
938 ctx->Xi.c[i] ^= aad[i];
939 GCM_MUL(ctx, Xi);
940 aad += 16;
941 len -= 16;
942 }
943 #endif
944 if (len) {
945 n = (unsigned int)len;
946 for (i = 0; i < len; ++i)
947 ctx->Xi.c[i] ^= aad[i];
948 }
949
950 ctx->ares = n;
951 return 0;
952 }
953
954 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
955 const unsigned char *in, unsigned char *out,
956 size_t len)
957 {
958 const union {
959 long one;
960 char little;
961 } is_endian = {
962 1
963 };
964 unsigned int n, ctr;
965 size_t i;
966 u64 mlen = ctx->len.u[1];
967 block128_f block = ctx->block;
968 void *key = ctx->key;
969 #ifdef GCM_FUNCREF_4BIT
970 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
971 # ifdef GHASH
972 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
973 const u8 *inp, size_t len) = ctx->ghash;
974 # endif
975 #endif
976
977 #if 0
978 n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
979 #endif
980 mlen += len;
981 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
982 return -1;
983 ctx->len.u[1] = mlen;
984
985 if (ctx->ares) {
986 /* First call to encrypt finalizes GHASH(AAD) */
987 GCM_MUL(ctx, Xi);
988 ctx->ares = 0;
989 }
990
991 if (is_endian.little)
992 #ifdef BSWAP4
993 ctr = BSWAP4(ctx->Yi.d[3]);
994 #else
995 ctr = GETU32(ctx->Yi.c + 12);
996 #endif
997 else
998 ctr = ctx->Yi.d[3];
999
1000 n = ctx->mres;
1001 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1002 if (16 % sizeof(size_t) == 0) { /* always true actually */
1003 do {
1004 if (n) {
1005 while (n && len) {
1006 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1007 --len;
1008 n = (n + 1) % 16;
1009 }
1010 if (n == 0)
1011 GCM_MUL(ctx, Xi);
1012 else {
1013 ctx->mres = n;
1014 return 0;
1015 }
1016 }
1017 # if defined(STRICT_ALIGNMENT)
1018 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1019 break;
1020 # endif
1021 # if defined(GHASH) && defined(GHASH_CHUNK)
1022 while (len >= GHASH_CHUNK) {
1023 size_t j = GHASH_CHUNK;
1024
1025 while (j) {
1026 size_t *out_t = (size_t *)out;
1027 const size_t *in_t = (const size_t *)in;
1028
1029 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1030 ++ctr;
1031 if (is_endian.little)
1032 # ifdef BSWAP4
1033 ctx->Yi.d[3] = BSWAP4(ctr);
1034 # else
1035 PUTU32(ctx->Yi.c + 12, ctr);
1036 # endif
1037 else
1038 ctx->Yi.d[3] = ctr;
1039 for (i = 0; i < 16 / sizeof(size_t); ++i)
1040 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1041 out += 16;
1042 in += 16;
1043 j -= 16;
1044 }
1045 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1046 len -= GHASH_CHUNK;
1047 }
1048 if ((i = (len & (size_t)-16))) {
1049 size_t j = i;
1050
1051 while (len >= 16) {
1052 size_t *out_t = (size_t *)out;
1053 const size_t *in_t = (const size_t *)in;
1054
1055 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1056 ++ctr;
1057 if (is_endian.little)
1058 # ifdef BSWAP4
1059 ctx->Yi.d[3] = BSWAP4(ctr);
1060 # else
1061 PUTU32(ctx->Yi.c + 12, ctr);
1062 # endif
1063 else
1064 ctx->Yi.d[3] = ctr;
1065 for (i = 0; i < 16 / sizeof(size_t); ++i)
1066 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1067 out += 16;
1068 in += 16;
1069 len -= 16;
1070 }
1071 GHASH(ctx, out - j, j);
1072 }
1073 # else
1074 while (len >= 16) {
1075 size_t *out_t = (size_t *)out;
1076 const size_t *in_t = (const size_t *)in;
1077
1078 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1079 ++ctr;
1080 if (is_endian.little)
1081 # ifdef BSWAP4
1082 ctx->Yi.d[3] = BSWAP4(ctr);
1083 # else
1084 PUTU32(ctx->Yi.c + 12, ctr);
1085 # endif
1086 else
1087 ctx->Yi.d[3] = ctr;
1088 for (i = 0; i < 16 / sizeof(size_t); ++i)
1089 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1090 GCM_MUL(ctx, Xi);
1091 out += 16;
1092 in += 16;
1093 len -= 16;
1094 }
1095 # endif
1096 if (len) {
1097 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1098 ++ctr;
1099 if (is_endian.little)
1100 # ifdef BSWAP4
1101 ctx->Yi.d[3] = BSWAP4(ctr);
1102 # else
1103 PUTU32(ctx->Yi.c + 12, ctr);
1104 # endif
1105 else
1106 ctx->Yi.d[3] = ctr;
1107 while (len--) {
1108 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1109 ++n;
1110 }
1111 }
1112
1113 ctx->mres = n;
1114 return 0;
1115 } while (0);
1116 }
1117 #endif
1118 for (i = 0; i < len; ++i) {
1119 if (n == 0) {
1120 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1121 ++ctr;
1122 if (is_endian.little)
1123 #ifdef BSWAP4
1124 ctx->Yi.d[3] = BSWAP4(ctr);
1125 #else
1126 PUTU32(ctx->Yi.c + 12, ctr);
1127 #endif
1128 else
1129 ctx->Yi.d[3] = ctr;
1130 }
1131 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1132 n = (n + 1) % 16;
1133 if (n == 0)
1134 GCM_MUL(ctx, Xi);
1135 }
1136
1137 ctx->mres = n;
1138 return 0;
1139 }
1140
1141 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1142 const unsigned char *in, unsigned char *out,
1143 size_t len)
1144 {
1145 const union {
1146 long one;
1147 char little;
1148 } is_endian = {
1149 1
1150 };
1151 unsigned int n, ctr;
1152 size_t i;
1153 u64 mlen = ctx->len.u[1];
1154 block128_f block = ctx->block;
1155 void *key = ctx->key;
1156 #ifdef GCM_FUNCREF_4BIT
1157 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1158 # ifdef GHASH
1159 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1160 const u8 *inp, size_t len) = ctx->ghash;
1161 # endif
1162 #endif
1163
1164 mlen += len;
1165 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1166 return -1;
1167 ctx->len.u[1] = mlen;
1168
1169 if (ctx->ares) {
1170 /* First call to decrypt finalizes GHASH(AAD) */
1171 GCM_MUL(ctx, Xi);
1172 ctx->ares = 0;
1173 }
1174
1175 if (is_endian.little)
1176 #ifdef BSWAP4
1177 ctr = BSWAP4(ctx->Yi.d[3]);
1178 #else
1179 ctr = GETU32(ctx->Yi.c + 12);
1180 #endif
1181 else
1182 ctr = ctx->Yi.d[3];
1183
1184 n = ctx->mres;
1185 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1186 if (16 % sizeof(size_t) == 0) { /* always true actually */
1187 do {
1188 if (n) {
1189 while (n && len) {
1190 u8 c = *(in++);
1191 *(out++) = c ^ ctx->EKi.c[n];
1192 ctx->Xi.c[n] ^= c;
1193 --len;
1194 n = (n + 1) % 16;
1195 }
1196 if (n == 0)
1197 GCM_MUL(ctx, Xi);
1198 else {
1199 ctx->mres = n;
1200 return 0;
1201 }
1202 }
1203 # if defined(STRICT_ALIGNMENT)
1204 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1205 break;
1206 # endif
1207 # if defined(GHASH) && defined(GHASH_CHUNK)
1208 while (len >= GHASH_CHUNK) {
1209 size_t j = GHASH_CHUNK;
1210
1211 GHASH(ctx, in, GHASH_CHUNK);
1212 while (j) {
1213 size_t *out_t = (size_t *)out;
1214 const size_t *in_t = (const size_t *)in;
1215
1216 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1217 ++ctr;
1218 if (is_endian.little)
1219 # ifdef BSWAP4
1220 ctx->Yi.d[3] = BSWAP4(ctr);
1221 # else
1222 PUTU32(ctx->Yi.c + 12, ctr);
1223 # endif
1224 else
1225 ctx->Yi.d[3] = ctr;
1226 for (i = 0; i < 16 / sizeof(size_t); ++i)
1227 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1228 out += 16;
1229 in += 16;
1230 j -= 16;
1231 }
1232 len -= GHASH_CHUNK;
1233 }
1234 if ((i = (len & (size_t)-16))) {
1235 GHASH(ctx, in, i);
1236 while (len >= 16) {
1237 size_t *out_t = (size_t *)out;
1238 const size_t *in_t = (const size_t *)in;
1239
1240 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1241 ++ctr;
1242 if (is_endian.little)
1243 # ifdef BSWAP4
1244 ctx->Yi.d[3] = BSWAP4(ctr);
1245 # else
1246 PUTU32(ctx->Yi.c + 12, ctr);
1247 # endif
1248 else
1249 ctx->Yi.d[3] = ctr;
1250 for (i = 0; i < 16 / sizeof(size_t); ++i)
1251 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1252 out += 16;
1253 in += 16;
1254 len -= 16;
1255 }
1256 }
1257 # else
1258 while (len >= 16) {
1259 size_t *out_t = (size_t *)out;
1260 const size_t *in_t = (const size_t *)in;
1261
1262 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1263 ++ctr;
1264 if (is_endian.little)
1265 # ifdef BSWAP4
1266 ctx->Yi.d[3] = BSWAP4(ctr);
1267 # else
1268 PUTU32(ctx->Yi.c + 12, ctr);
1269 # endif
1270 else
1271 ctx->Yi.d[3] = ctr;
1272 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1273 size_t c = in[i];
1274 out[i] = c ^ ctx->EKi.t[i];
1275 ctx->Xi.t[i] ^= c;
1276 }
1277 GCM_MUL(ctx, Xi);
1278 out += 16;
1279 in += 16;
1280 len -= 16;
1281 }
1282 # endif
1283 if (len) {
1284 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1285 ++ctr;
1286 if (is_endian.little)
1287 # ifdef BSWAP4
1288 ctx->Yi.d[3] = BSWAP4(ctr);
1289 # else
1290 PUTU32(ctx->Yi.c + 12, ctr);
1291 # endif
1292 else
1293 ctx->Yi.d[3] = ctr;
1294 while (len--) {
1295 u8 c = in[n];
1296 ctx->Xi.c[n] ^= c;
1297 out[n] = c ^ ctx->EKi.c[n];
1298 ++n;
1299 }
1300 }
1301
1302 ctx->mres = n;
1303 return 0;
1304 } while (0);
1305 }
1306 #endif
1307 for (i = 0; i < len; ++i) {
1308 u8 c;
1309 if (n == 0) {
1310 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1311 ++ctr;
1312 if (is_endian.little)
1313 #ifdef BSWAP4
1314 ctx->Yi.d[3] = BSWAP4(ctr);
1315 #else
1316 PUTU32(ctx->Yi.c + 12, ctr);
1317 #endif
1318 else
1319 ctx->Yi.d[3] = ctr;
1320 }
1321 c = in[i];
1322 out[i] = c ^ ctx->EKi.c[n];
1323 ctx->Xi.c[n] ^= c;
1324 n = (n + 1) % 16;
1325 if (n == 0)
1326 GCM_MUL(ctx, Xi);
1327 }
1328
1329 ctx->mres = n;
1330 return 0;
1331 }
1332
1333 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1334 const unsigned char *in, unsigned char *out,
1335 size_t len, ctr128_f stream)
1336 {
1337 const union {
1338 long one;
1339 char little;
1340 } is_endian = {
1341 1
1342 };
1343 unsigned int n, ctr;
1344 size_t i;
1345 u64 mlen = ctx->len.u[1];
1346 void *key = ctx->key;
1347 #ifdef GCM_FUNCREF_4BIT
1348 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1349 # ifdef GHASH
1350 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1351 const u8 *inp, size_t len) = ctx->ghash;
1352 # endif
1353 #endif
1354
1355 mlen += len;
1356 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1357 return -1;
1358 ctx->len.u[1] = mlen;
1359
1360 if (ctx->ares) {
1361 /* First call to encrypt finalizes GHASH(AAD) */
1362 GCM_MUL(ctx, Xi);
1363 ctx->ares = 0;
1364 }
1365
1366 if (is_endian.little)
1367 #ifdef BSWAP4
1368 ctr = BSWAP4(ctx->Yi.d[3]);
1369 #else
1370 ctr = GETU32(ctx->Yi.c + 12);
1371 #endif
1372 else
1373 ctr = ctx->Yi.d[3];
1374
1375 n = ctx->mres;
1376 if (n) {
1377 while (n && len) {
1378 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1379 --len;
1380 n = (n + 1) % 16;
1381 }
1382 if (n == 0)
1383 GCM_MUL(ctx, Xi);
1384 else {
1385 ctx->mres = n;
1386 return 0;
1387 }
1388 }
1389 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1390 while (len >= GHASH_CHUNK) {
1391 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1392 ctr += GHASH_CHUNK / 16;
1393 if (is_endian.little)
1394 # ifdef BSWAP4
1395 ctx->Yi.d[3] = BSWAP4(ctr);
1396 # else
1397 PUTU32(ctx->Yi.c + 12, ctr);
1398 # endif
1399 else
1400 ctx->Yi.d[3] = ctr;
1401 GHASH(ctx, out, GHASH_CHUNK);
1402 out += GHASH_CHUNK;
1403 in += GHASH_CHUNK;
1404 len -= GHASH_CHUNK;
1405 }
1406 #endif
1407 if ((i = (len & (size_t)-16))) {
1408 size_t j = i / 16;
1409
1410 (*stream) (in, out, j, key, ctx->Yi.c);
1411 ctr += (unsigned int)j;
1412 if (is_endian.little)
1413 #ifdef BSWAP4
1414 ctx->Yi.d[3] = BSWAP4(ctr);
1415 #else
1416 PUTU32(ctx->Yi.c + 12, ctr);
1417 #endif
1418 else
1419 ctx->Yi.d[3] = ctr;
1420 in += i;
1421 len -= i;
1422 #if defined(GHASH)
1423 GHASH(ctx, out, i);
1424 out += i;
1425 #else
1426 while (j--) {
1427 for (i = 0; i < 16; ++i)
1428 ctx->Xi.c[i] ^= out[i];
1429 GCM_MUL(ctx, Xi);
1430 out += 16;
1431 }
1432 #endif
1433 }
1434 if (len) {
1435 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1436 ++ctr;
1437 if (is_endian.little)
1438 #ifdef BSWAP4
1439 ctx->Yi.d[3] = BSWAP4(ctr);
1440 #else
1441 PUTU32(ctx->Yi.c + 12, ctr);
1442 #endif
1443 else
1444 ctx->Yi.d[3] = ctr;
1445 while (len--) {
1446 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1447 ++n;
1448 }
1449 }
1450
1451 ctx->mres = n;
1452 return 0;
1453 }
1454
1455 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1456 const unsigned char *in, unsigned char *out,
1457 size_t len, ctr128_f stream)
1458 {
1459 const union {
1460 long one;
1461 char little;
1462 } is_endian = {
1463 1
1464 };
1465 unsigned int n, ctr;
1466 size_t i;
1467 u64 mlen = ctx->len.u[1];
1468 void *key = ctx->key;
1469 #ifdef GCM_FUNCREF_4BIT
1470 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1471 # ifdef GHASH
1472 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1473 const u8 *inp, size_t len) = ctx->ghash;
1474 # endif
1475 #endif
1476
1477 mlen += len;
1478 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1479 return -1;
1480 ctx->len.u[1] = mlen;
1481
1482 if (ctx->ares) {
1483 /* First call to decrypt finalizes GHASH(AAD) */
1484 GCM_MUL(ctx, Xi);
1485 ctx->ares = 0;
1486 }
1487
1488 if (is_endian.little)
1489 #ifdef BSWAP4
1490 ctr = BSWAP4(ctx->Yi.d[3]);
1491 #else
1492 ctr = GETU32(ctx->Yi.c + 12);
1493 #endif
1494 else
1495 ctr = ctx->Yi.d[3];
1496
1497 n = ctx->mres;
1498 if (n) {
1499 while (n && len) {
1500 u8 c = *(in++);
1501 *(out++) = c ^ ctx->EKi.c[n];
1502 ctx->Xi.c[n] ^= c;
1503 --len;
1504 n = (n + 1) % 16;
1505 }
1506 if (n == 0)
1507 GCM_MUL(ctx, Xi);
1508 else {
1509 ctx->mres = n;
1510 return 0;
1511 }
1512 }
1513 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1514 while (len >= GHASH_CHUNK) {
1515 GHASH(ctx, in, GHASH_CHUNK);
1516 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1517 ctr += GHASH_CHUNK / 16;
1518 if (is_endian.little)
1519 # ifdef BSWAP4
1520 ctx->Yi.d[3] = BSWAP4(ctr);
1521 # else
1522 PUTU32(ctx->Yi.c + 12, ctr);
1523 # endif
1524 else
1525 ctx->Yi.d[3] = ctr;
1526 out += GHASH_CHUNK;
1527 in += GHASH_CHUNK;
1528 len -= GHASH_CHUNK;
1529 }
1530 #endif
1531 if ((i = (len & (size_t)-16))) {
1532 size_t j = i / 16;
1533
1534 #if defined(GHASH)
1535 GHASH(ctx, in, i);
1536 #else
1537 while (j--) {
1538 size_t k;
1539 for (k = 0; k < 16; ++k)
1540 ctx->Xi.c[k] ^= in[k];
1541 GCM_MUL(ctx, Xi);
1542 in += 16;
1543 }
1544 j = i / 16;
1545 in -= i;
1546 #endif
1547 (*stream) (in, out, j, key, ctx->Yi.c);
1548 ctr += (unsigned int)j;
1549 if (is_endian.little)
1550 #ifdef BSWAP4
1551 ctx->Yi.d[3] = BSWAP4(ctr);
1552 #else
1553 PUTU32(ctx->Yi.c + 12, ctr);
1554 #endif
1555 else
1556 ctx->Yi.d[3] = ctr;
1557 out += i;
1558 in += i;
1559 len -= i;
1560 }
1561 if (len) {
1562 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1563 ++ctr;
1564 if (is_endian.little)
1565 #ifdef BSWAP4
1566 ctx->Yi.d[3] = BSWAP4(ctr);
1567 #else
1568 PUTU32(ctx->Yi.c + 12, ctr);
1569 #endif
1570 else
1571 ctx->Yi.d[3] = ctr;
1572 while (len--) {
1573 u8 c = in[n];
1574 ctx->Xi.c[n] ^= c;
1575 out[n] = c ^ ctx->EKi.c[n];
1576 ++n;
1577 }
1578 }
1579
1580 ctx->mres = n;
1581 return 0;
1582 }
1583
1584 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1585 size_t len)
1586 {
1587 const union {
1588 long one;
1589 char little;
1590 } is_endian = {
1591 1
1592 };
1593 u64 alen = ctx->len.u[0] << 3;
1594 u64 clen = ctx->len.u[1] << 3;
1595 #ifdef GCM_FUNCREF_4BIT
1596 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1597 #endif
1598
1599 if (ctx->mres || ctx->ares)
1600 GCM_MUL(ctx, Xi);
1601
1602 if (is_endian.little) {
1603 #ifdef BSWAP8
1604 alen = BSWAP8(alen);
1605 clen = BSWAP8(clen);
1606 #else
1607 u8 *p = ctx->len.c;
1608
1609 ctx->len.u[0] = alen;
1610 ctx->len.u[1] = clen;
1611
1612 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1613 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1614 #endif
1615 }
1616
1617 ctx->Xi.u[0] ^= alen;
1618 ctx->Xi.u[1] ^= clen;
1619 GCM_MUL(ctx, Xi);
1620
1621 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1622 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1623
1624 if (tag && len <= sizeof(ctx->Xi))
1625 return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1626 else
1627 return -1;
1628 }
1629
1630 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1631 {
1632 CRYPTO_gcm128_finish(ctx, NULL, 0);
1633 memcpy(tag, ctx->Xi.c,
1634 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1635 }
1636
1637 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1638 {
1639 GCM128_CONTEXT *ret;
1640
1641 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1642 CRYPTO_gcm128_init(ret, key, block);
1643
1644 return ret;
1645 }
1646
1647 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1648 {
1649 if (ctx) {
1650 OPENSSL_cleanse(ctx, sizeof(*ctx));
1651 OPENSSL_free(ctx);
1652 }
1653 }
1654
1655 #if defined(SELFTEST)
1656 # include <stdio.h>
1657 # include <openssl/aes.h>
1658
1659 /* Test Case 1 */
1660 static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1661 static const u8 T1[] = {
1662 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1663 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1664 };
1665
1666 /* Test Case 2 */
1667 # define K2 K1
1668 # define A2 A1
1669 # define IV2 IV1
1670 static const u8 P2[16];
1671 static const u8 C2[] = {
1672 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1673 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1674 };
1675
1676 static const u8 T2[] = {
1677 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1678 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1679 };
1680
1681 /* Test Case 3 */
1682 # define A3 A2
1683 static const u8 K3[] = {
1684 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1685 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1686 };
1687
1688 static const u8 P3[] = {
1689 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1690 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1691 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1692 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1693 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1694 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1695 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1696 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1697 };
1698
1699 static const u8 IV3[] = {
1700 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1701 0xde, 0xca, 0xf8, 0x88
1702 };
1703
1704 static const u8 C3[] = {
1705 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1706 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1707 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1708 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1709 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1710 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1711 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1712 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1713 };
1714
1715 static const u8 T3[] = {
1716 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1717 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1718 };
1719
1720 /* Test Case 4 */
1721 # define K4 K3
1722 # define IV4 IV3
1723 static const u8 P4[] = {
1724 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1725 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1726 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1727 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1728 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1729 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1730 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1731 0xba, 0x63, 0x7b, 0x39
1732 };
1733
1734 static const u8 A4[] = {
1735 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1736 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1737 0xab, 0xad, 0xda, 0xd2
1738 };
1739
1740 static const u8 C4[] = {
1741 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1742 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1743 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1744 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1745 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1746 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1747 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1748 0x3d, 0x58, 0xe0, 0x91
1749 };
1750
1751 static const u8 T4[] = {
1752 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1753 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1754 };
1755
1756 /* Test Case 5 */
1757 # define K5 K4
1758 # define P5 P4
1759 # define A5 A4
1760 static const u8 IV5[] = {
1761 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1762 };
1763
1764 static const u8 C5[] = {
1765 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1766 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1767 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1768 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1769 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1770 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1771 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1772 0xc2, 0x3f, 0x45, 0x98
1773 };
1774
1775 static const u8 T5[] = {
1776 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1777 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1778 };
1779
1780 /* Test Case 6 */
1781 # define K6 K5
1782 # define P6 P5
1783 # define A6 A5
1784 static const u8 IV6[] = {
1785 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1786 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1787 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1788 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1789 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1790 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1791 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1792 0xa6, 0x37, 0xb3, 0x9b
1793 };
1794
1795 static const u8 C6[] = {
1796 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1797 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1798 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1799 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1800 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1801 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1802 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1803 0x4c, 0x34, 0xae, 0xe5
1804 };
1805
1806 static const u8 T6[] = {
1807 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1808 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1809 };
1810
1811 /* Test Case 7 */
1812 static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1813 static const u8 T7[] = {
1814 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1815 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1816 };
1817
1818 /* Test Case 8 */
1819 # define K8 K7
1820 # define IV8 IV7
1821 # define A8 A7
1822 static const u8 P8[16];
1823 static const u8 C8[] = {
1824 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1825 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1826 };
1827
1828 static const u8 T8[] = {
1829 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1830 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1831 };
1832
1833 /* Test Case 9 */
1834 # define A9 A8
1835 static const u8 K9[] = {
1836 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1837 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1838 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1839 };
1840
1841 static const u8 P9[] = {
1842 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1843 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1844 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1845 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1846 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1847 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1848 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1849 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1850 };
1851
1852 static const u8 IV9[] = {
1853 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1854 0xde, 0xca, 0xf8, 0x88
1855 };
1856
1857 static const u8 C9[] = {
1858 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1859 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1860 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1861 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1862 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1863 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1864 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1865 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1866 };
1867
1868 static const u8 T9[] = {
1869 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1870 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1871 };
1872
1873 /* Test Case 10 */
1874 # define K10 K9
1875 # define IV10 IV9
1876 static const u8 P10[] = {
1877 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1878 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1879 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1880 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1881 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1882 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1883 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1884 0xba, 0x63, 0x7b, 0x39
1885 };
1886
1887 static const u8 A10[] = {
1888 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1889 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1890 0xab, 0xad, 0xda, 0xd2
1891 };
1892
1893 static const u8 C10[] = {
1894 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1895 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1896 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1897 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1898 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1899 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1900 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1901 0xcc, 0xda, 0x27, 0x10
1902 };
1903
1904 static const u8 T10[] = {
1905 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1906 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1907 };
1908
1909 /* Test Case 11 */
1910 # define K11 K10
1911 # define P11 P10
1912 # define A11 A10
1913 static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1914
1915 static const u8 C11[] = {
1916 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1917 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1918 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1919 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
1920 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
1921 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
1922 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
1923 0xa0, 0xf0, 0x62, 0xf7
1924 };
1925
1926 static const u8 T11[] = {
1927 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
1928 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
1929 };
1930
1931 /* Test Case 12 */
1932 # define K12 K11
1933 # define P12 P11
1934 # define A12 A11
1935 static const u8 IV12[] = {
1936 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1937 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1938 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1939 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1940 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1941 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1942 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1943 0xa6, 0x37, 0xb3, 0x9b
1944 };
1945
1946 static const u8 C12[] = {
1947 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
1948 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
1949 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
1950 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
1951 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
1952 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
1953 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
1954 0xe9, 0xb7, 0x37, 0x3b
1955 };
1956
1957 static const u8 T12[] = {
1958 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
1959 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
1960 };
1961
1962 /* Test Case 13 */
1963 static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
1964 static const u8 T13[] = {
1965 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
1966 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
1967 };
1968
1969 /* Test Case 14 */
1970 # define K14 K13
1971 # define A14 A13
1972 static const u8 P14[16], IV14[12];
1973 static const u8 C14[] = {
1974 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
1975 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
1976 };
1977
1978 static const u8 T14[] = {
1979 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
1980 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
1981 };
1982
1983 /* Test Case 15 */
1984 # define A15 A14
1985 static const u8 K15[] = {
1986 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1987 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1988 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1989 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1990 };
1991
1992 static const u8 P15[] = {
1993 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1994 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1995 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1996 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1997 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1998 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1999 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2000 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2001 };
2002
2003 static const u8 IV15[] = {
2004 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2005 0xde, 0xca, 0xf8, 0x88
2006 };
2007
2008 static const u8 C15[] = {
2009 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2010 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2011 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2012 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2013 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2014 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2015 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2016 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2017 };
2018
2019 static const u8 T15[] = {
2020 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2021 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2022 };
2023
2024 /* Test Case 16 */
2025 # define K16 K15
2026 # define IV16 IV15
2027 static const u8 P16[] = {
2028 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2029 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2030 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2031 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2032 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2033 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2034 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2035 0xba, 0x63, 0x7b, 0x39
2036 };
2037
2038 static const u8 A16[] = {
2039 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2040 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2041 0xab, 0xad, 0xda, 0xd2
2042 };
2043
2044 static const u8 C16[] = {
2045 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2046 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2047 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2048 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2049 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2050 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2051 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2052 0xbc, 0xc9, 0xf6, 0x62
2053 };
2054
2055 static const u8 T16[] = {
2056 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2057 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2058 };
2059
2060 /* Test Case 17 */
2061 # define K17 K16
2062 # define P17 P16
2063 # define A17 A16
2064 static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2065
2066 static const u8 C17[] = {
2067 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2068 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2069 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2070 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2071 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2072 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2073 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2074 0xf4, 0x7c, 0x9b, 0x1f
2075 };
2076
2077 static const u8 T17[] = {
2078 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2079 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2080 };
2081
2082 /* Test Case 18 */
2083 # define K18 K17
2084 # define P18 P17
2085 # define A18 A17
2086 static const u8 IV18[] = {
2087 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2088 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2089 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2090 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2091 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2092 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2093 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2094 0xa6, 0x37, 0xb3, 0x9b
2095 };
2096
2097 static const u8 C18[] = {
2098 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2099 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2100 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2101 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2102 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2103 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2104 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2105 0x44, 0xae, 0x7e, 0x3f
2106 };
2107
2108 static const u8 T18[] = {
2109 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2110 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2111 };
2112
2113 /* Test Case 19 */
2114 # define K19 K1
2115 # define P19 P1
2116 # define IV19 IV1
2117 # define C19 C1
2118 static const u8 A19[] = {
2119 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2120 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2121 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2122 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2123 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2124 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2125 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2126 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2127 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2128 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2129 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2130 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2131 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2132 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2133 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2134 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2135 };
2136
2137 static const u8 T19[] = {
2138 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2139 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2140 };
2141
2142 /* Test Case 20 */
2143 # define K20 K1
2144 # define A20 A1
2145 /* this results in 0xff in counter LSB */
2146 static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2147
2148 static const u8 P20[288];
2149 static const u8 C20[] = {
2150 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2151 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2152 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2153 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2154 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2155 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2156 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2157 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2158 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2159 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2160 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2161 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2162 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2163 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2164 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2165 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2166 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2167 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2168 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2169 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2170 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2171 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2172 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2173 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2174 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2175 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2176 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2177 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2178 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2179 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2180 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2181 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2182 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2183 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2184 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2185 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2186 };
2187
2188 static const u8 T20[] = {
2189 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2190 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2191 };
2192
2193 # define TEST_CASE(n) do { \
2194 u8 out[sizeof(P##n)]; \
2195 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2196 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2197 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2198 memset(out,0,sizeof(out)); \
2199 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2200 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2201 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2202 (C##n && memcmp(out,C##n,sizeof(out)))) \
2203 ret++, printf ("encrypt test#%d failed.\n",n); \
2204 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2205 memset(out,0,sizeof(out)); \
2206 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2207 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2208 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2209 (P##n && memcmp(out,P##n,sizeof(out)))) \
2210 ret++, printf ("decrypt test#%d failed.\n",n); \
2211 } while(0)
2212
2213 int main()
2214 {
2215 GCM128_CONTEXT ctx;
2216 AES_KEY key;
2217 int ret = 0;
2218
2219 TEST_CASE(1);
2220 TEST_CASE(2);
2221 TEST_CASE(3);
2222 TEST_CASE(4);
2223 TEST_CASE(5);
2224 TEST_CASE(6);
2225 TEST_CASE(7);
2226 TEST_CASE(8);
2227 TEST_CASE(9);
2228 TEST_CASE(10);
2229 TEST_CASE(11);
2230 TEST_CASE(12);
2231 TEST_CASE(13);
2232 TEST_CASE(14);
2233 TEST_CASE(15);
2234 TEST_CASE(16);
2235 TEST_CASE(17);
2236 TEST_CASE(18);
2237 TEST_CASE(19);
2238 TEST_CASE(20);
2239
2240 # ifdef OPENSSL_CPUID_OBJ
2241 {
2242 size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2243 union {
2244 u64 u;
2245 u8 c[1024];
2246 } buf;
2247 int i;
2248
2249 AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2250 CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2251 CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2252
2253 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2254 start = OPENSSL_rdtsc();
2255 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2256 gcm_t = OPENSSL_rdtsc() - start;
2257
2258 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2259 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2260 (block128_f) AES_encrypt);
2261 start = OPENSSL_rdtsc();
2262 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2263 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2264 (block128_f) AES_encrypt);
2265 ctr_t = OPENSSL_rdtsc() - start;
2266
2267 printf("%.2f-%.2f=%.2f\n",
2268 gcm_t / (double)sizeof(buf),
2269 ctr_t / (double)sizeof(buf),
2270 (gcm_t - ctr_t) / (double)sizeof(buf));
2271 # ifdef GHASH
2272 {
2273 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2274 const u8 *inp, size_t len) = ctx.ghash;
2275
2276 GHASH((&ctx), buf.c, sizeof(buf));
2277 start = OPENSSL_rdtsc();
2278 for (i = 0; i < 100; ++i)
2279 GHASH((&ctx), buf.c, sizeof(buf));
2280 gcm_t = OPENSSL_rdtsc() - start;
2281 printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);
2282 }
2283 # endif
2284 }
2285 # endif
2286
2287 return ret;
2288 }
2289 #endif
2290