1 /* 2 * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the OpenSSL license (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 #include <openssl/crypto.h> 11 #include "modes_lcl.h" 12 #include <string.h> 13 14 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT) 15 /* redefine, because alignment is ensured */ 16 # undef GETU32 17 # define GETU32(p) BSWAP4(*(const u32 *)(p)) 18 # undef PUTU32 19 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) 20 #endif 21 22 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) 23 #define REDUCE1BIT(V) do { \ 24 if (sizeof(size_t)==8) { \ 25 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ 26 V.lo = (V.hi<<63)|(V.lo>>1); \ 27 V.hi = (V.hi>>1 )^T; \ 28 } \ 29 else { \ 30 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ 31 V.lo = (V.hi<<63)|(V.lo>>1); \ 32 V.hi = (V.hi>>1 )^((u64)T<<32); \ 33 } \ 34 } while(0) 35 36 /*- 37 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should 38 * never be set to 8. 8 is effectively reserved for testing purposes. 39 * TABLE_BITS>1 are lookup-table-driven implementations referred to as 40 * "Shoup's" in GCM specification. In other words OpenSSL does not cover 41 * whole spectrum of possible table driven implementations. Why? In 42 * non-"Shoup's" case memory access pattern is segmented in such manner, 43 * that it's trivial to see that cache timing information can reveal 44 * fair portion of intermediate hash value. Given that ciphertext is 45 * always available to attacker, it's possible for him to attempt to 46 * deduce secret parameter H and if successful, tamper with messages 47 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's 48 * not as trivial, but there is no reason to believe that it's resistant 49 * to cache-timing attack. And the thing about "8-bit" implementation is 50 * that it consumes 16 (sixteen) times more memory, 4KB per individual 51 * key + 1KB shared. Well, on pros side it should be twice as fast as 52 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version 53 * was observed to run ~75% faster, closer to 100% for commercial 54 * compilers... Yet "4-bit" procedure is preferred, because it's 55 * believed to provide better security-performance balance and adequate 56 * all-round performance. "All-round" refers to things like: 57 * 58 * - shorter setup time effectively improves overall timing for 59 * handling short messages; 60 * - larger table allocation can become unbearable because of VM 61 * subsystem penalties (for example on Windows large enough free 62 * results in VM working set trimming, meaning that consequent 63 * malloc would immediately incur working set expansion); 64 * - larger table has larger cache footprint, which can affect 65 * performance of other code paths (not necessarily even from same 66 * thread in Hyper-Threading world); 67 * 68 * Value of 1 is not appropriate for performance reasons. 69 */ 70 #if TABLE_BITS==8 71 72 static void gcm_init_8bit(u128 Htable[256], u64 H[2]) 73 { 74 int i, j; 75 u128 V; 76 77 Htable[0].hi = 0; 78 Htable[0].lo = 0; 79 V.hi = H[0]; 80 V.lo = H[1]; 81 82 for (Htable[128] = V, i = 64; i > 0; i >>= 1) { 83 REDUCE1BIT(V); 84 Htable[i] = V; 85 } 86 87 for (i = 2; i < 256; i <<= 1) { 88 u128 *Hi = Htable + i, H0 = *Hi; 89 for (j = 1; j < i; ++j) { 90 Hi[j].hi = H0.hi ^ Htable[j].hi; 91 Hi[j].lo = H0.lo ^ Htable[j].lo; 92 } 93 } 94 } 95 96 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) 97 { 98 u128 Z = { 0, 0 }; 99 const u8 *xi = (const u8 *)Xi + 15; 100 size_t rem, n = *xi; 101 const union { 102 long one; 103 char little; 104 } is_endian = { 1 }; 105 static const size_t rem_8bit[256] = { 106 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), 107 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), 108 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56), 109 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E), 110 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66), 111 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E), 112 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076), 113 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E), 114 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06), 115 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E), 116 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416), 117 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E), 118 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626), 119 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E), 120 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836), 121 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E), 122 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6), 123 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE), 124 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6), 125 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE), 126 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6), 127 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE), 128 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6), 129 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE), 130 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86), 131 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E), 132 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496), 133 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E), 134 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6), 135 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE), 136 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6), 137 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE), 138 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346), 139 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E), 140 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56), 141 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E), 142 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66), 143 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E), 144 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176), 145 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E), 146 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06), 147 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E), 148 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516), 149 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E), 150 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726), 151 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E), 152 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936), 153 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E), 154 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6), 155 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE), 156 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6), 157 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE), 158 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6), 159 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE), 160 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6), 161 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE), 162 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86), 163 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E), 164 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596), 165 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E), 166 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6), 167 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE), 168 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6), 169 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) 170 }; 171 172 while (1) { 173 Z.hi ^= Htable[n].hi; 174 Z.lo ^= Htable[n].lo; 175 176 if ((u8 *)Xi == xi) 177 break; 178 179 n = *(--xi); 180 181 rem = (size_t)Z.lo & 0xff; 182 Z.lo = (Z.hi << 56) | (Z.lo >> 8); 183 Z.hi = (Z.hi >> 8); 184 if (sizeof(size_t) == 8) 185 Z.hi ^= rem_8bit[rem]; 186 else 187 Z.hi ^= (u64)rem_8bit[rem] << 32; 188 } 189 190 if (is_endian.little) { 191 # ifdef BSWAP8 192 Xi[0] = BSWAP8(Z.hi); 193 Xi[1] = BSWAP8(Z.lo); 194 # else 195 u8 *p = (u8 *)Xi; 196 u32 v; 197 v = (u32)(Z.hi >> 32); 198 PUTU32(p, v); 199 v = (u32)(Z.hi); 200 PUTU32(p + 4, v); 201 v = (u32)(Z.lo >> 32); 202 PUTU32(p + 8, v); 203 v = (u32)(Z.lo); 204 PUTU32(p + 12, v); 205 # endif 206 } else { 207 Xi[0] = Z.hi; 208 Xi[1] = Z.lo; 209 } 210 } 211 212 # define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) 213 214 #elif TABLE_BITS==4 215 216 static void gcm_init_4bit(u128 Htable[16], u64 H[2]) 217 { 218 u128 V; 219 # if defined(OPENSSL_SMALL_FOOTPRINT) 220 int i; 221 # endif 222 223 Htable[0].hi = 0; 224 Htable[0].lo = 0; 225 V.hi = H[0]; 226 V.lo = H[1]; 227 228 # if defined(OPENSSL_SMALL_FOOTPRINT) 229 for (Htable[8] = V, i = 4; i > 0; i >>= 1) { 230 REDUCE1BIT(V); 231 Htable[i] = V; 232 } 233 234 for (i = 2; i < 16; i <<= 1) { 235 u128 *Hi = Htable + i; 236 int j; 237 for (V = *Hi, j = 1; j < i; ++j) { 238 Hi[j].hi = V.hi ^ Htable[j].hi; 239 Hi[j].lo = V.lo ^ Htable[j].lo; 240 } 241 } 242 # else 243 Htable[8] = V; 244 REDUCE1BIT(V); 245 Htable[4] = V; 246 REDUCE1BIT(V); 247 Htable[2] = V; 248 REDUCE1BIT(V); 249 Htable[1] = V; 250 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; 251 V = Htable[4]; 252 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; 253 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; 254 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; 255 V = Htable[8]; 256 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; 257 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; 258 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; 259 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; 260 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; 261 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; 262 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; 263 # endif 264 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) 265 /* 266 * ARM assembler expects specific dword order in Htable. 267 */ 268 { 269 int j; 270 const union { 271 long one; 272 char little; 273 } is_endian = { 1 }; 274 275 if (is_endian.little) 276 for (j = 0; j < 16; ++j) { 277 V = Htable[j]; 278 Htable[j].hi = V.lo; 279 Htable[j].lo = V.hi; 280 } else 281 for (j = 0; j < 16; ++j) { 282 V = Htable[j]; 283 Htable[j].hi = V.lo << 32 | V.lo >> 32; 284 Htable[j].lo = V.hi << 32 | V.hi >> 32; 285 } 286 } 287 # endif 288 } 289 290 # ifndef GHASH_ASM 291 static const size_t rem_4bit[16] = { 292 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 293 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 294 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 295 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) 296 }; 297 298 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) 299 { 300 u128 Z; 301 int cnt = 15; 302 size_t rem, nlo, nhi; 303 const union { 304 long one; 305 char little; 306 } is_endian = { 1 }; 307 308 nlo = ((const u8 *)Xi)[15]; 309 nhi = nlo >> 4; 310 nlo &= 0xf; 311 312 Z.hi = Htable[nlo].hi; 313 Z.lo = Htable[nlo].lo; 314 315 while (1) { 316 rem = (size_t)Z.lo & 0xf; 317 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 318 Z.hi = (Z.hi >> 4); 319 if (sizeof(size_t) == 8) 320 Z.hi ^= rem_4bit[rem]; 321 else 322 Z.hi ^= (u64)rem_4bit[rem] << 32; 323 324 Z.hi ^= Htable[nhi].hi; 325 Z.lo ^= Htable[nhi].lo; 326 327 if (--cnt < 0) 328 break; 329 330 nlo = ((const u8 *)Xi)[cnt]; 331 nhi = nlo >> 4; 332 nlo &= 0xf; 333 334 rem = (size_t)Z.lo & 0xf; 335 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 336 Z.hi = (Z.hi >> 4); 337 if (sizeof(size_t) == 8) 338 Z.hi ^= rem_4bit[rem]; 339 else 340 Z.hi ^= (u64)rem_4bit[rem] << 32; 341 342 Z.hi ^= Htable[nlo].hi; 343 Z.lo ^= Htable[nlo].lo; 344 } 345 346 if (is_endian.little) { 347 # ifdef BSWAP8 348 Xi[0] = BSWAP8(Z.hi); 349 Xi[1] = BSWAP8(Z.lo); 350 # else 351 u8 *p = (u8 *)Xi; 352 u32 v; 353 v = (u32)(Z.hi >> 32); 354 PUTU32(p, v); 355 v = (u32)(Z.hi); 356 PUTU32(p + 4, v); 357 v = (u32)(Z.lo >> 32); 358 PUTU32(p + 8, v); 359 v = (u32)(Z.lo); 360 PUTU32(p + 12, v); 361 # endif 362 } else { 363 Xi[0] = Z.hi; 364 Xi[1] = Z.lo; 365 } 366 } 367 368 # if !defined(OPENSSL_SMALL_FOOTPRINT) 369 /* 370 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 371 * details... Compiler-generated code doesn't seem to give any 372 * performance improvement, at least not on x86[_64]. It's here 373 * mostly as reference and a placeholder for possible future 374 * non-trivial optimization[s]... 375 */ 376 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], 377 const u8 *inp, size_t len) 378 { 379 u128 Z; 380 int cnt; 381 size_t rem, nlo, nhi; 382 const union { 383 long one; 384 char little; 385 } is_endian = { 1 }; 386 387 # if 1 388 do { 389 cnt = 15; 390 nlo = ((const u8 *)Xi)[15]; 391 nlo ^= inp[15]; 392 nhi = nlo >> 4; 393 nlo &= 0xf; 394 395 Z.hi = Htable[nlo].hi; 396 Z.lo = Htable[nlo].lo; 397 398 while (1) { 399 rem = (size_t)Z.lo & 0xf; 400 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 401 Z.hi = (Z.hi >> 4); 402 if (sizeof(size_t) == 8) 403 Z.hi ^= rem_4bit[rem]; 404 else 405 Z.hi ^= (u64)rem_4bit[rem] << 32; 406 407 Z.hi ^= Htable[nhi].hi; 408 Z.lo ^= Htable[nhi].lo; 409 410 if (--cnt < 0) 411 break; 412 413 nlo = ((const u8 *)Xi)[cnt]; 414 nlo ^= inp[cnt]; 415 nhi = nlo >> 4; 416 nlo &= 0xf; 417 418 rem = (size_t)Z.lo & 0xf; 419 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 420 Z.hi = (Z.hi >> 4); 421 if (sizeof(size_t) == 8) 422 Z.hi ^= rem_4bit[rem]; 423 else 424 Z.hi ^= (u64)rem_4bit[rem] << 32; 425 426 Z.hi ^= Htable[nlo].hi; 427 Z.lo ^= Htable[nlo].lo; 428 } 429 # else 430 /* 431 * Extra 256+16 bytes per-key plus 512 bytes shared tables 432 * [should] give ~50% improvement... One could have PACK()-ed 433 * the rem_8bit even here, but the priority is to minimize 434 * cache footprint... 435 */ 436 u128 Hshr4[16]; /* Htable shifted right by 4 bits */ 437 u8 Hshl4[16]; /* Htable shifted left by 4 bits */ 438 static const unsigned short rem_8bit[256] = { 439 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, 440 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, 441 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, 442 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, 443 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, 444 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, 445 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, 446 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, 447 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, 448 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, 449 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, 450 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, 451 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, 452 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, 453 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, 454 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, 455 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, 456 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, 457 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, 458 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, 459 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, 460 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, 461 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, 462 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, 463 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, 464 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, 465 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, 466 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, 467 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, 468 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, 469 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, 470 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE 471 }; 472 /* 473 * This pre-processing phase slows down procedure by approximately 474 * same time as it makes each loop spin faster. In other words 475 * single block performance is approximately same as straightforward 476 * "4-bit" implementation, and then it goes only faster... 477 */ 478 for (cnt = 0; cnt < 16; ++cnt) { 479 Z.hi = Htable[cnt].hi; 480 Z.lo = Htable[cnt].lo; 481 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4); 482 Hshr4[cnt].hi = (Z.hi >> 4); 483 Hshl4[cnt] = (u8)(Z.lo << 4); 484 } 485 486 do { 487 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) { 488 nlo = ((const u8 *)Xi)[cnt]; 489 nlo ^= inp[cnt]; 490 nhi = nlo >> 4; 491 nlo &= 0xf; 492 493 Z.hi ^= Htable[nlo].hi; 494 Z.lo ^= Htable[nlo].lo; 495 496 rem = (size_t)Z.lo & 0xff; 497 498 Z.lo = (Z.hi << 56) | (Z.lo >> 8); 499 Z.hi = (Z.hi >> 8); 500 501 Z.hi ^= Hshr4[nhi].hi; 502 Z.lo ^= Hshr4[nhi].lo; 503 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48; 504 } 505 506 nlo = ((const u8 *)Xi)[0]; 507 nlo ^= inp[0]; 508 nhi = nlo >> 4; 509 nlo &= 0xf; 510 511 Z.hi ^= Htable[nlo].hi; 512 Z.lo ^= Htable[nlo].lo; 513 514 rem = (size_t)Z.lo & 0xf; 515 516 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 517 Z.hi = (Z.hi >> 4); 518 519 Z.hi ^= Htable[nhi].hi; 520 Z.lo ^= Htable[nhi].lo; 521 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48; 522 # endif 523 524 if (is_endian.little) { 525 # ifdef BSWAP8 526 Xi[0] = BSWAP8(Z.hi); 527 Xi[1] = BSWAP8(Z.lo); 528 # else 529 u8 *p = (u8 *)Xi; 530 u32 v; 531 v = (u32)(Z.hi >> 32); 532 PUTU32(p, v); 533 v = (u32)(Z.hi); 534 PUTU32(p + 4, v); 535 v = (u32)(Z.lo >> 32); 536 PUTU32(p + 8, v); 537 v = (u32)(Z.lo); 538 PUTU32(p + 12, v); 539 # endif 540 } else { 541 Xi[0] = Z.hi; 542 Xi[1] = Z.lo; 543 } 544 } while (inp += 16, len -= 16); 545 } 546 # endif 547 # else 548 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]); 549 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp, 550 size_t len); 551 # endif 552 553 # define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) 554 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) 555 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) 556 /* 557 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing 558 * effect. In other words idea is to hash data while it's still in L1 cache 559 * after encryption pass... 560 */ 561 # define GHASH_CHUNK (3*1024) 562 # endif 563 564 #else /* TABLE_BITS */ 565 566 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) 567 { 568 u128 V, Z = { 0, 0 }; 569 long X; 570 int i, j; 571 const long *xi = (const long *)Xi; 572 const union { 573 long one; 574 char little; 575 } is_endian = { 1 }; 576 577 V.hi = H[0]; /* H is in host byte order, no byte swapping */ 578 V.lo = H[1]; 579 580 for (j = 0; j < 16 / sizeof(long); ++j) { 581 if (is_endian.little) { 582 if (sizeof(long) == 8) { 583 # ifdef BSWAP8 584 X = (long)(BSWAP8(xi[j])); 585 # else 586 const u8 *p = (const u8 *)(xi + j); 587 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4)); 588 # endif 589 } else { 590 const u8 *p = (const u8 *)(xi + j); 591 X = (long)GETU32(p); 592 } 593 } else 594 X = xi[j]; 595 596 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) { 597 u64 M = (u64)(X >> (8 * sizeof(long) - 1)); 598 Z.hi ^= V.hi & M; 599 Z.lo ^= V.lo & M; 600 601 REDUCE1BIT(V); 602 } 603 } 604 605 if (is_endian.little) { 606 # ifdef BSWAP8 607 Xi[0] = BSWAP8(Z.hi); 608 Xi[1] = BSWAP8(Z.lo); 609 # else 610 u8 *p = (u8 *)Xi; 611 u32 v; 612 v = (u32)(Z.hi >> 32); 613 PUTU32(p, v); 614 v = (u32)(Z.hi); 615 PUTU32(p + 4, v); 616 v = (u32)(Z.lo >> 32); 617 PUTU32(p + 8, v); 618 v = (u32)(Z.lo); 619 PUTU32(p + 12, v); 620 # endif 621 } else { 622 Xi[0] = Z.hi; 623 Xi[1] = Z.lo; 624 } 625 } 626 627 # define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) 628 629 #endif 630 631 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) 632 # if !defined(I386_ONLY) && \ 633 (defined(__i386) || defined(__i386__) || \ 634 defined(__x86_64) || defined(__x86_64__) || \ 635 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) 636 # define GHASH_ASM_X86_OR_64 637 # define GCM_FUNCREF_4BIT 638 extern unsigned int OPENSSL_ia32cap_P[]; 639 640 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); 641 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); 642 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, 643 size_t len); 644 645 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) 646 # define gcm_init_avx gcm_init_clmul 647 # define gcm_gmult_avx gcm_gmult_clmul 648 # define gcm_ghash_avx gcm_ghash_clmul 649 # else 650 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]); 651 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]); 652 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 653 size_t len); 654 # endif 655 656 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) 657 # define GHASH_ASM_X86 658 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); 659 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 660 size_t len); 661 662 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); 663 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, 664 size_t len); 665 # endif 666 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__) 667 # include "arm_arch.h" 668 # if __ARM_MAX_ARCH__>=7 669 # define GHASH_ASM_ARM 670 # define GCM_FUNCREF_4BIT 671 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) 672 # if defined(__arm__) || defined(__arm) 673 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) 674 # endif 675 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]); 676 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]); 677 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp, 678 size_t len); 679 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]); 680 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]); 681 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 682 size_t len); 683 # endif 684 # elif defined(__sparc__) || defined(__sparc) 685 # include "sparc_arch.h" 686 # define GHASH_ASM_SPARC 687 # define GCM_FUNCREF_4BIT 688 extern unsigned int OPENSSL_sparcv9cap_P[]; 689 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]); 690 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]); 691 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp, 692 size_t len); 693 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) 694 # include "ppc_arch.h" 695 # define GHASH_ASM_PPC 696 # define GCM_FUNCREF_4BIT 697 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]); 698 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]); 699 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 700 size_t len); 701 # endif 702 #endif 703 704 #ifdef GCM_FUNCREF_4BIT 705 # undef GCM_MUL 706 # define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) 707 # ifdef GHASH 708 # undef GHASH 709 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) 710 # endif 711 #endif 712 713 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) 714 { 715 const union { 716 long one; 717 char little; 718 } is_endian = { 1 }; 719 720 memset(ctx, 0, sizeof(*ctx)); 721 ctx->block = block; 722 ctx->key = key; 723 724 (*block) (ctx->H.c, ctx->H.c, key); 725 726 if (is_endian.little) { 727 /* H is stored in host byte order */ 728 #ifdef BSWAP8 729 ctx->H.u[0] = BSWAP8(ctx->H.u[0]); 730 ctx->H.u[1] = BSWAP8(ctx->H.u[1]); 731 #else 732 u8 *p = ctx->H.c; 733 u64 hi, lo; 734 hi = (u64)GETU32(p) << 32 | GETU32(p + 4); 735 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 736 ctx->H.u[0] = hi; 737 ctx->H.u[1] = lo; 738 #endif 739 } 740 #if TABLE_BITS==8 741 gcm_init_8bit(ctx->Htable, ctx->H.u); 742 #elif TABLE_BITS==4 743 # if defined(GHASH) 744 # define CTX__GHASH(f) (ctx->ghash = (f)) 745 # else 746 # define CTX__GHASH(f) (ctx->ghash = NULL) 747 # endif 748 # if defined(GHASH_ASM_X86_OR_64) 749 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) 750 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ 751 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ 752 gcm_init_avx(ctx->Htable, ctx->H.u); 753 ctx->gmult = gcm_gmult_avx; 754 CTX__GHASH(gcm_ghash_avx); 755 } else { 756 gcm_init_clmul(ctx->Htable, ctx->H.u); 757 ctx->gmult = gcm_gmult_clmul; 758 CTX__GHASH(gcm_ghash_clmul); 759 } 760 return; 761 } 762 # endif 763 gcm_init_4bit(ctx->Htable, ctx->H.u); 764 # if defined(GHASH_ASM_X86) /* x86 only */ 765 # if defined(OPENSSL_IA32_SSE2) 766 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */ 767 # else 768 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */ 769 # endif 770 ctx->gmult = gcm_gmult_4bit_mmx; 771 CTX__GHASH(gcm_ghash_4bit_mmx); 772 } else { 773 ctx->gmult = gcm_gmult_4bit_x86; 774 CTX__GHASH(gcm_ghash_4bit_x86); 775 } 776 # else 777 ctx->gmult = gcm_gmult_4bit; 778 CTX__GHASH(gcm_ghash_4bit); 779 # endif 780 # elif defined(GHASH_ASM_ARM) 781 # ifdef PMULL_CAPABLE 782 if (PMULL_CAPABLE) { 783 gcm_init_v8(ctx->Htable, ctx->H.u); 784 ctx->gmult = gcm_gmult_v8; 785 CTX__GHASH(gcm_ghash_v8); 786 } else 787 # endif 788 # ifdef NEON_CAPABLE 789 if (NEON_CAPABLE) { 790 gcm_init_neon(ctx->Htable, ctx->H.u); 791 ctx->gmult = gcm_gmult_neon; 792 CTX__GHASH(gcm_ghash_neon); 793 } else 794 # endif 795 { 796 gcm_init_4bit(ctx->Htable, ctx->H.u); 797 ctx->gmult = gcm_gmult_4bit; 798 CTX__GHASH(gcm_ghash_4bit); 799 } 800 # elif defined(GHASH_ASM_SPARC) 801 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { 802 gcm_init_vis3(ctx->Htable, ctx->H.u); 803 ctx->gmult = gcm_gmult_vis3; 804 CTX__GHASH(gcm_ghash_vis3); 805 } else { 806 gcm_init_4bit(ctx->Htable, ctx->H.u); 807 ctx->gmult = gcm_gmult_4bit; 808 CTX__GHASH(gcm_ghash_4bit); 809 } 810 # elif defined(GHASH_ASM_PPC) 811 if (OPENSSL_ppccap_P & PPC_CRYPTO207) { 812 gcm_init_p8(ctx->Htable, ctx->H.u); 813 ctx->gmult = gcm_gmult_p8; 814 CTX__GHASH(gcm_ghash_p8); 815 } else { 816 gcm_init_4bit(ctx->Htable, ctx->H.u); 817 ctx->gmult = gcm_gmult_4bit; 818 CTX__GHASH(gcm_ghash_4bit); 819 } 820 # else 821 gcm_init_4bit(ctx->Htable, ctx->H.u); 822 # endif 823 # undef CTX__GHASH 824 #endif 825 } 826 827 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, 828 size_t len) 829 { 830 const union { 831 long one; 832 char little; 833 } is_endian = { 1 }; 834 unsigned int ctr; 835 #ifdef GCM_FUNCREF_4BIT 836 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 837 #endif 838 839 ctx->len.u[0] = 0; /* AAD length */ 840 ctx->len.u[1] = 0; /* message length */ 841 ctx->ares = 0; 842 ctx->mres = 0; 843 844 if (len == 12) { 845 memcpy(ctx->Yi.c, iv, 12); 846 ctx->Yi.c[12] = 0; 847 ctx->Yi.c[13] = 0; 848 ctx->Yi.c[14] = 0; 849 ctx->Yi.c[15] = 1; 850 ctr = 1; 851 } else { 852 size_t i; 853 u64 len0 = len; 854 855 /* Borrow ctx->Xi to calculate initial Yi */ 856 ctx->Xi.u[0] = 0; 857 ctx->Xi.u[1] = 0; 858 859 while (len >= 16) { 860 for (i = 0; i < 16; ++i) 861 ctx->Xi.c[i] ^= iv[i]; 862 GCM_MUL(ctx); 863 iv += 16; 864 len -= 16; 865 } 866 if (len) { 867 for (i = 0; i < len; ++i) 868 ctx->Xi.c[i] ^= iv[i]; 869 GCM_MUL(ctx); 870 } 871 len0 <<= 3; 872 if (is_endian.little) { 873 #ifdef BSWAP8 874 ctx->Xi.u[1] ^= BSWAP8(len0); 875 #else 876 ctx->Xi.c[8] ^= (u8)(len0 >> 56); 877 ctx->Xi.c[9] ^= (u8)(len0 >> 48); 878 ctx->Xi.c[10] ^= (u8)(len0 >> 40); 879 ctx->Xi.c[11] ^= (u8)(len0 >> 32); 880 ctx->Xi.c[12] ^= (u8)(len0 >> 24); 881 ctx->Xi.c[13] ^= (u8)(len0 >> 16); 882 ctx->Xi.c[14] ^= (u8)(len0 >> 8); 883 ctx->Xi.c[15] ^= (u8)(len0); 884 #endif 885 } else { 886 ctx->Xi.u[1] ^= len0; 887 } 888 889 GCM_MUL(ctx); 890 891 if (is_endian.little) 892 #ifdef BSWAP4 893 ctr = BSWAP4(ctx->Xi.d[3]); 894 #else 895 ctr = GETU32(ctx->Xi.c + 12); 896 #endif 897 else 898 ctr = ctx->Xi.d[3]; 899 900 /* Copy borrowed Xi to Yi */ 901 ctx->Yi.u[0] = ctx->Xi.u[0]; 902 ctx->Yi.u[1] = ctx->Xi.u[1]; 903 } 904 905 ctx->Xi.u[0] = 0; 906 ctx->Xi.u[1] = 0; 907 908 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key); 909 ++ctr; 910 if (is_endian.little) 911 #ifdef BSWAP4 912 ctx->Yi.d[3] = BSWAP4(ctr); 913 #else 914 PUTU32(ctx->Yi.c + 12, ctr); 915 #endif 916 else 917 ctx->Yi.d[3] = ctr; 918 } 919 920 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, 921 size_t len) 922 { 923 size_t i; 924 unsigned int n; 925 u64 alen = ctx->len.u[0]; 926 #ifdef GCM_FUNCREF_4BIT 927 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 928 # ifdef GHASH 929 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 930 const u8 *inp, size_t len) = ctx->ghash; 931 # endif 932 #endif 933 934 if (ctx->len.u[1]) 935 return -2; 936 937 alen += len; 938 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) 939 return -1; 940 ctx->len.u[0] = alen; 941 942 n = ctx->ares; 943 if (n) { 944 while (n && len) { 945 ctx->Xi.c[n] ^= *(aad++); 946 --len; 947 n = (n + 1) % 16; 948 } 949 if (n == 0) 950 GCM_MUL(ctx); 951 else { 952 ctx->ares = n; 953 return 0; 954 } 955 } 956 #ifdef GHASH 957 if ((i = (len & (size_t)-16))) { 958 GHASH(ctx, aad, i); 959 aad += i; 960 len -= i; 961 } 962 #else 963 while (len >= 16) { 964 for (i = 0; i < 16; ++i) 965 ctx->Xi.c[i] ^= aad[i]; 966 GCM_MUL(ctx); 967 aad += 16; 968 len -= 16; 969 } 970 #endif 971 if (len) { 972 n = (unsigned int)len; 973 for (i = 0; i < len; ++i) 974 ctx->Xi.c[i] ^= aad[i]; 975 } 976 977 ctx->ares = n; 978 return 0; 979 } 980 981 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 982 const unsigned char *in, unsigned char *out, 983 size_t len) 984 { 985 const union { 986 long one; 987 char little; 988 } is_endian = { 1 }; 989 unsigned int n, ctr, mres; 990 size_t i; 991 u64 mlen = ctx->len.u[1]; 992 block128_f block = ctx->block; 993 void *key = ctx->key; 994 #ifdef GCM_FUNCREF_4BIT 995 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 996 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 997 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 998 const u8 *inp, size_t len) = ctx->ghash; 999 # endif 1000 #endif 1001 1002 mlen += len; 1003 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1004 return -1; 1005 ctx->len.u[1] = mlen; 1006 1007 mres = ctx->mres; 1008 1009 if (ctx->ares) { 1010 /* First call to encrypt finalizes GHASH(AAD) */ 1011 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1012 if (len == 0) { 1013 GCM_MUL(ctx); 1014 ctx->ares = 0; 1015 return 0; 1016 } 1017 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1018 ctx->Xi.u[0] = 0; 1019 ctx->Xi.u[1] = 0; 1020 mres = sizeof(ctx->Xi); 1021 #else 1022 GCM_MUL(ctx); 1023 #endif 1024 ctx->ares = 0; 1025 } 1026 1027 if (is_endian.little) 1028 #ifdef BSWAP4 1029 ctr = BSWAP4(ctx->Yi.d[3]); 1030 #else 1031 ctr = GETU32(ctx->Yi.c + 12); 1032 #endif 1033 else 1034 ctr = ctx->Yi.d[3]; 1035 1036 n = mres % 16; 1037 #if !defined(OPENSSL_SMALL_FOOTPRINT) 1038 if (16 % sizeof(size_t) == 0) { /* always true actually */ 1039 do { 1040 if (n) { 1041 # if defined(GHASH) 1042 while (n && len) { 1043 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1044 --len; 1045 n = (n + 1) % 16; 1046 } 1047 if (n == 0) { 1048 GHASH(ctx, ctx->Xn, mres); 1049 mres = 0; 1050 } else { 1051 ctx->mres = mres; 1052 return 0; 1053 } 1054 # else 1055 while (n && len) { 1056 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1057 --len; 1058 n = (n + 1) % 16; 1059 } 1060 if (n == 0) { 1061 GCM_MUL(ctx); 1062 mres = 0; 1063 } else { 1064 ctx->mres = n; 1065 return 0; 1066 } 1067 # endif 1068 } 1069 # if defined(STRICT_ALIGNMENT) 1070 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1071 break; 1072 # endif 1073 # if defined(GHASH) 1074 if (len >= 16 && mres) { 1075 GHASH(ctx, ctx->Xn, mres); 1076 mres = 0; 1077 } 1078 # if defined(GHASH_CHUNK) 1079 while (len >= GHASH_CHUNK) { 1080 size_t j = GHASH_CHUNK; 1081 1082 while (j) { 1083 size_t *out_t = (size_t *)out; 1084 const size_t *in_t = (const size_t *)in; 1085 1086 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1087 ++ctr; 1088 if (is_endian.little) 1089 # ifdef BSWAP4 1090 ctx->Yi.d[3] = BSWAP4(ctr); 1091 # else 1092 PUTU32(ctx->Yi.c + 12, ctr); 1093 # endif 1094 else 1095 ctx->Yi.d[3] = ctr; 1096 for (i = 0; i < 16 / sizeof(size_t); ++i) 1097 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1098 out += 16; 1099 in += 16; 1100 j -= 16; 1101 } 1102 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); 1103 len -= GHASH_CHUNK; 1104 } 1105 # endif 1106 if ((i = (len & (size_t)-16))) { 1107 size_t j = i; 1108 1109 while (len >= 16) { 1110 size_t *out_t = (size_t *)out; 1111 const size_t *in_t = (const size_t *)in; 1112 1113 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1114 ++ctr; 1115 if (is_endian.little) 1116 # ifdef BSWAP4 1117 ctx->Yi.d[3] = BSWAP4(ctr); 1118 # else 1119 PUTU32(ctx->Yi.c + 12, ctr); 1120 # endif 1121 else 1122 ctx->Yi.d[3] = ctr; 1123 for (i = 0; i < 16 / sizeof(size_t); ++i) 1124 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1125 out += 16; 1126 in += 16; 1127 len -= 16; 1128 } 1129 GHASH(ctx, out - j, j); 1130 } 1131 # else 1132 while (len >= 16) { 1133 size_t *out_t = (size_t *)out; 1134 const size_t *in_t = (const size_t *)in; 1135 1136 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1137 ++ctr; 1138 if (is_endian.little) 1139 # ifdef BSWAP4 1140 ctx->Yi.d[3] = BSWAP4(ctr); 1141 # else 1142 PUTU32(ctx->Yi.c + 12, ctr); 1143 # endif 1144 else 1145 ctx->Yi.d[3] = ctr; 1146 for (i = 0; i < 16 / sizeof(size_t); ++i) 1147 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1148 GCM_MUL(ctx); 1149 out += 16; 1150 in += 16; 1151 len -= 16; 1152 } 1153 # endif 1154 if (len) { 1155 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1156 ++ctr; 1157 if (is_endian.little) 1158 # ifdef BSWAP4 1159 ctx->Yi.d[3] = BSWAP4(ctr); 1160 # else 1161 PUTU32(ctx->Yi.c + 12, ctr); 1162 # endif 1163 else 1164 ctx->Yi.d[3] = ctr; 1165 # if defined(GHASH) 1166 while (len--) { 1167 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1168 ++n; 1169 } 1170 # else 1171 while (len--) { 1172 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1173 ++n; 1174 } 1175 mres = n; 1176 # endif 1177 } 1178 1179 ctx->mres = mres; 1180 return 0; 1181 } while (0); 1182 } 1183 #endif 1184 for (i = 0; i < len; ++i) { 1185 if (n == 0) { 1186 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1187 ++ctr; 1188 if (is_endian.little) 1189 #ifdef BSWAP4 1190 ctx->Yi.d[3] = BSWAP4(ctr); 1191 #else 1192 PUTU32(ctx->Yi.c + 12, ctr); 1193 #endif 1194 else 1195 ctx->Yi.d[3] = ctr; 1196 } 1197 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1198 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n]; 1199 n = (n + 1) % 16; 1200 if (mres == sizeof(ctx->Xn)) { 1201 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1202 mres = 0; 1203 } 1204 #else 1205 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 1206 mres = n = (n + 1) % 16; 1207 if (n == 0) 1208 GCM_MUL(ctx); 1209 #endif 1210 } 1211 1212 ctx->mres = mres; 1213 return 0; 1214 } 1215 1216 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 1217 const unsigned char *in, unsigned char *out, 1218 size_t len) 1219 { 1220 const union { 1221 long one; 1222 char little; 1223 } is_endian = { 1 }; 1224 unsigned int n, ctr, mres; 1225 size_t i; 1226 u64 mlen = ctx->len.u[1]; 1227 block128_f block = ctx->block; 1228 void *key = ctx->key; 1229 #ifdef GCM_FUNCREF_4BIT 1230 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1231 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1232 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1233 const u8 *inp, size_t len) = ctx->ghash; 1234 # endif 1235 #endif 1236 1237 mlen += len; 1238 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1239 return -1; 1240 ctx->len.u[1] = mlen; 1241 1242 mres = ctx->mres; 1243 1244 if (ctx->ares) { 1245 /* First call to decrypt finalizes GHASH(AAD) */ 1246 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1247 if (len == 0) { 1248 GCM_MUL(ctx); 1249 ctx->ares = 0; 1250 return 0; 1251 } 1252 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1253 ctx->Xi.u[0] = 0; 1254 ctx->Xi.u[1] = 0; 1255 mres = sizeof(ctx->Xi); 1256 #else 1257 GCM_MUL(ctx); 1258 #endif 1259 ctx->ares = 0; 1260 } 1261 1262 if (is_endian.little) 1263 #ifdef BSWAP4 1264 ctr = BSWAP4(ctx->Yi.d[3]); 1265 #else 1266 ctr = GETU32(ctx->Yi.c + 12); 1267 #endif 1268 else 1269 ctr = ctx->Yi.d[3]; 1270 1271 n = mres % 16; 1272 #if !defined(OPENSSL_SMALL_FOOTPRINT) 1273 if (16 % sizeof(size_t) == 0) { /* always true actually */ 1274 do { 1275 if (n) { 1276 # if defined(GHASH) 1277 while (n && len) { 1278 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1279 --len; 1280 n = (n + 1) % 16; 1281 } 1282 if (n == 0) { 1283 GHASH(ctx, ctx->Xn, mres); 1284 mres = 0; 1285 } else { 1286 ctx->mres = mres; 1287 return 0; 1288 } 1289 # else 1290 while (n && len) { 1291 u8 c = *(in++); 1292 *(out++) = c ^ ctx->EKi.c[n]; 1293 ctx->Xi.c[n] ^= c; 1294 --len; 1295 n = (n + 1) % 16; 1296 } 1297 if (n == 0) { 1298 GCM_MUL(ctx); 1299 mres = 0; 1300 } else { 1301 ctx->mres = n; 1302 return 0; 1303 } 1304 # endif 1305 } 1306 # if defined(STRICT_ALIGNMENT) 1307 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1308 break; 1309 # endif 1310 # if defined(GHASH) 1311 if (len >= 16 && mres) { 1312 GHASH(ctx, ctx->Xn, mres); 1313 mres = 0; 1314 } 1315 # if defined(GHASH_CHUNK) 1316 while (len >= GHASH_CHUNK) { 1317 size_t j = GHASH_CHUNK; 1318 1319 GHASH(ctx, in, GHASH_CHUNK); 1320 while (j) { 1321 size_t *out_t = (size_t *)out; 1322 const size_t *in_t = (const size_t *)in; 1323 1324 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1325 ++ctr; 1326 if (is_endian.little) 1327 # ifdef BSWAP4 1328 ctx->Yi.d[3] = BSWAP4(ctr); 1329 # else 1330 PUTU32(ctx->Yi.c + 12, ctr); 1331 # endif 1332 else 1333 ctx->Yi.d[3] = ctr; 1334 for (i = 0; i < 16 / sizeof(size_t); ++i) 1335 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1336 out += 16; 1337 in += 16; 1338 j -= 16; 1339 } 1340 len -= GHASH_CHUNK; 1341 } 1342 # endif 1343 if ((i = (len & (size_t)-16))) { 1344 GHASH(ctx, in, i); 1345 while (len >= 16) { 1346 size_t *out_t = (size_t *)out; 1347 const size_t *in_t = (const size_t *)in; 1348 1349 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1350 ++ctr; 1351 if (is_endian.little) 1352 # ifdef BSWAP4 1353 ctx->Yi.d[3] = BSWAP4(ctr); 1354 # else 1355 PUTU32(ctx->Yi.c + 12, ctr); 1356 # endif 1357 else 1358 ctx->Yi.d[3] = ctr; 1359 for (i = 0; i < 16 / sizeof(size_t); ++i) 1360 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1361 out += 16; 1362 in += 16; 1363 len -= 16; 1364 } 1365 } 1366 # else 1367 while (len >= 16) { 1368 size_t *out_t = (size_t *)out; 1369 const size_t *in_t = (const size_t *)in; 1370 1371 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1372 ++ctr; 1373 if (is_endian.little) 1374 # ifdef BSWAP4 1375 ctx->Yi.d[3] = BSWAP4(ctr); 1376 # else 1377 PUTU32(ctx->Yi.c + 12, ctr); 1378 # endif 1379 else 1380 ctx->Yi.d[3] = ctr; 1381 for (i = 0; i < 16 / sizeof(size_t); ++i) { 1382 size_t c = in[i]; 1383 out[i] = c ^ ctx->EKi.t[i]; 1384 ctx->Xi.t[i] ^= c; 1385 } 1386 GCM_MUL(ctx); 1387 out += 16; 1388 in += 16; 1389 len -= 16; 1390 } 1391 # endif 1392 if (len) { 1393 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1394 ++ctr; 1395 if (is_endian.little) 1396 # ifdef BSWAP4 1397 ctx->Yi.d[3] = BSWAP4(ctr); 1398 # else 1399 PUTU32(ctx->Yi.c + 12, ctr); 1400 # endif 1401 else 1402 ctx->Yi.d[3] = ctr; 1403 # if defined(GHASH) 1404 while (len--) { 1405 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1406 ++n; 1407 } 1408 # else 1409 while (len--) { 1410 u8 c = in[n]; 1411 ctx->Xi.c[n] ^= c; 1412 out[n] = c ^ ctx->EKi.c[n]; 1413 ++n; 1414 } 1415 mres = n; 1416 # endif 1417 } 1418 1419 ctx->mres = mres; 1420 return 0; 1421 } while (0); 1422 } 1423 #endif 1424 for (i = 0; i < len; ++i) { 1425 u8 c; 1426 if (n == 0) { 1427 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1428 ++ctr; 1429 if (is_endian.little) 1430 #ifdef BSWAP4 1431 ctx->Yi.d[3] = BSWAP4(ctr); 1432 #else 1433 PUTU32(ctx->Yi.c + 12, ctr); 1434 #endif 1435 else 1436 ctx->Yi.d[3] = ctr; 1437 } 1438 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1439 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n]; 1440 n = (n + 1) % 16; 1441 if (mres == sizeof(ctx->Xn)) { 1442 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1443 mres = 0; 1444 } 1445 #else 1446 c = in[i]; 1447 out[i] = c ^ ctx->EKi.c[n]; 1448 ctx->Xi.c[n] ^= c; 1449 mres = n = (n + 1) % 16; 1450 if (n == 0) 1451 GCM_MUL(ctx); 1452 #endif 1453 } 1454 1455 ctx->mres = mres; 1456 return 0; 1457 } 1458 1459 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 1460 const unsigned char *in, unsigned char *out, 1461 size_t len, ctr128_f stream) 1462 { 1463 #if defined(OPENSSL_SMALL_FOOTPRINT) 1464 return CRYPTO_gcm128_encrypt(ctx, in, out, len); 1465 #else 1466 const union { 1467 long one; 1468 char little; 1469 } is_endian = { 1 }; 1470 unsigned int n, ctr, mres; 1471 size_t i; 1472 u64 mlen = ctx->len.u[1]; 1473 void *key = ctx->key; 1474 # ifdef GCM_FUNCREF_4BIT 1475 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1476 # ifdef GHASH 1477 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1478 const u8 *inp, size_t len) = ctx->ghash; 1479 # endif 1480 # endif 1481 1482 mlen += len; 1483 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1484 return -1; 1485 ctx->len.u[1] = mlen; 1486 1487 mres = ctx->mres; 1488 1489 if (ctx->ares) { 1490 /* First call to encrypt finalizes GHASH(AAD) */ 1491 #if defined(GHASH) 1492 if (len == 0) { 1493 GCM_MUL(ctx); 1494 ctx->ares = 0; 1495 return 0; 1496 } 1497 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1498 ctx->Xi.u[0] = 0; 1499 ctx->Xi.u[1] = 0; 1500 mres = sizeof(ctx->Xi); 1501 #else 1502 GCM_MUL(ctx); 1503 #endif 1504 ctx->ares = 0; 1505 } 1506 1507 if (is_endian.little) 1508 # ifdef BSWAP4 1509 ctr = BSWAP4(ctx->Yi.d[3]); 1510 # else 1511 ctr = GETU32(ctx->Yi.c + 12); 1512 # endif 1513 else 1514 ctr = ctx->Yi.d[3]; 1515 1516 n = mres % 16; 1517 if (n) { 1518 # if defined(GHASH) 1519 while (n && len) { 1520 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1521 --len; 1522 n = (n + 1) % 16; 1523 } 1524 if (n == 0) { 1525 GHASH(ctx, ctx->Xn, mres); 1526 mres = 0; 1527 } else { 1528 ctx->mres = mres; 1529 return 0; 1530 } 1531 # else 1532 while (n && len) { 1533 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1534 --len; 1535 n = (n + 1) % 16; 1536 } 1537 if (n == 0) { 1538 GCM_MUL(ctx); 1539 mres = 0; 1540 } else { 1541 ctx->mres = n; 1542 return 0; 1543 } 1544 # endif 1545 } 1546 # if defined(GHASH) 1547 if (len >= 16 && mres) { 1548 GHASH(ctx, ctx->Xn, mres); 1549 mres = 0; 1550 } 1551 # if defined(GHASH_CHUNK) 1552 while (len >= GHASH_CHUNK) { 1553 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1554 ctr += GHASH_CHUNK / 16; 1555 if (is_endian.little) 1556 # ifdef BSWAP4 1557 ctx->Yi.d[3] = BSWAP4(ctr); 1558 # else 1559 PUTU32(ctx->Yi.c + 12, ctr); 1560 # endif 1561 else 1562 ctx->Yi.d[3] = ctr; 1563 GHASH(ctx, out, GHASH_CHUNK); 1564 out += GHASH_CHUNK; 1565 in += GHASH_CHUNK; 1566 len -= GHASH_CHUNK; 1567 } 1568 # endif 1569 # endif 1570 if ((i = (len & (size_t)-16))) { 1571 size_t j = i / 16; 1572 1573 (*stream) (in, out, j, key, ctx->Yi.c); 1574 ctr += (unsigned int)j; 1575 if (is_endian.little) 1576 # ifdef BSWAP4 1577 ctx->Yi.d[3] = BSWAP4(ctr); 1578 # else 1579 PUTU32(ctx->Yi.c + 12, ctr); 1580 # endif 1581 else 1582 ctx->Yi.d[3] = ctr; 1583 in += i; 1584 len -= i; 1585 # if defined(GHASH) 1586 GHASH(ctx, out, i); 1587 out += i; 1588 # else 1589 while (j--) { 1590 for (i = 0; i < 16; ++i) 1591 ctx->Xi.c[i] ^= out[i]; 1592 GCM_MUL(ctx); 1593 out += 16; 1594 } 1595 # endif 1596 } 1597 if (len) { 1598 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1599 ++ctr; 1600 if (is_endian.little) 1601 # ifdef BSWAP4 1602 ctx->Yi.d[3] = BSWAP4(ctr); 1603 # else 1604 PUTU32(ctx->Yi.c + 12, ctr); 1605 # endif 1606 else 1607 ctx->Yi.d[3] = ctr; 1608 while (len--) { 1609 # if defined(GHASH) 1610 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1611 # else 1612 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1613 # endif 1614 ++n; 1615 } 1616 } 1617 1618 ctx->mres = mres; 1619 return 0; 1620 #endif 1621 } 1622 1623 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 1624 const unsigned char *in, unsigned char *out, 1625 size_t len, ctr128_f stream) 1626 { 1627 #if defined(OPENSSL_SMALL_FOOTPRINT) 1628 return CRYPTO_gcm128_decrypt(ctx, in, out, len); 1629 #else 1630 const union { 1631 long one; 1632 char little; 1633 } is_endian = { 1 }; 1634 unsigned int n, ctr, mres; 1635 size_t i; 1636 u64 mlen = ctx->len.u[1]; 1637 void *key = ctx->key; 1638 # ifdef GCM_FUNCREF_4BIT 1639 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1640 # ifdef GHASH 1641 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1642 const u8 *inp, size_t len) = ctx->ghash; 1643 # endif 1644 # endif 1645 1646 mlen += len; 1647 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1648 return -1; 1649 ctx->len.u[1] = mlen; 1650 1651 mres = ctx->mres; 1652 1653 if (ctx->ares) { 1654 /* First call to decrypt finalizes GHASH(AAD) */ 1655 # if defined(GHASH) 1656 if (len == 0) { 1657 GCM_MUL(ctx); 1658 ctx->ares = 0; 1659 return 0; 1660 } 1661 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1662 ctx->Xi.u[0] = 0; 1663 ctx->Xi.u[1] = 0; 1664 mres = sizeof(ctx->Xi); 1665 # else 1666 GCM_MUL(ctx); 1667 # endif 1668 ctx->ares = 0; 1669 } 1670 1671 if (is_endian.little) 1672 # ifdef BSWAP4 1673 ctr = BSWAP4(ctx->Yi.d[3]); 1674 # else 1675 ctr = GETU32(ctx->Yi.c + 12); 1676 # endif 1677 else 1678 ctr = ctx->Yi.d[3]; 1679 1680 n = mres % 16; 1681 if (n) { 1682 # if defined(GHASH) 1683 while (n && len) { 1684 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1685 --len; 1686 n = (n + 1) % 16; 1687 } 1688 if (n == 0) { 1689 GHASH(ctx, ctx->Xn, mres); 1690 mres = 0; 1691 } else { 1692 ctx->mres = mres; 1693 return 0; 1694 } 1695 # else 1696 while (n && len) { 1697 u8 c = *(in++); 1698 *(out++) = c ^ ctx->EKi.c[n]; 1699 ctx->Xi.c[n] ^= c; 1700 --len; 1701 n = (n + 1) % 16; 1702 } 1703 if (n == 0) { 1704 GCM_MUL(ctx); 1705 mres = 0; 1706 } else { 1707 ctx->mres = n; 1708 return 0; 1709 } 1710 # endif 1711 } 1712 # if defined(GHASH) 1713 if (len >= 16 && mres) { 1714 GHASH(ctx, ctx->Xn, mres); 1715 mres = 0; 1716 } 1717 # if defined(GHASH_CHUNK) 1718 while (len >= GHASH_CHUNK) { 1719 GHASH(ctx, in, GHASH_CHUNK); 1720 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1721 ctr += GHASH_CHUNK / 16; 1722 if (is_endian.little) 1723 # ifdef BSWAP4 1724 ctx->Yi.d[3] = BSWAP4(ctr); 1725 # else 1726 PUTU32(ctx->Yi.c + 12, ctr); 1727 # endif 1728 else 1729 ctx->Yi.d[3] = ctr; 1730 out += GHASH_CHUNK; 1731 in += GHASH_CHUNK; 1732 len -= GHASH_CHUNK; 1733 } 1734 # endif 1735 # endif 1736 if ((i = (len & (size_t)-16))) { 1737 size_t j = i / 16; 1738 1739 # if defined(GHASH) 1740 GHASH(ctx, in, i); 1741 # else 1742 while (j--) { 1743 size_t k; 1744 for (k = 0; k < 16; ++k) 1745 ctx->Xi.c[k] ^= in[k]; 1746 GCM_MUL(ctx); 1747 in += 16; 1748 } 1749 j = i / 16; 1750 in -= i; 1751 # endif 1752 (*stream) (in, out, j, key, ctx->Yi.c); 1753 ctr += (unsigned int)j; 1754 if (is_endian.little) 1755 # ifdef BSWAP4 1756 ctx->Yi.d[3] = BSWAP4(ctr); 1757 # else 1758 PUTU32(ctx->Yi.c + 12, ctr); 1759 # endif 1760 else 1761 ctx->Yi.d[3] = ctr; 1762 out += i; 1763 in += i; 1764 len -= i; 1765 } 1766 if (len) { 1767 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1768 ++ctr; 1769 if (is_endian.little) 1770 # ifdef BSWAP4 1771 ctx->Yi.d[3] = BSWAP4(ctr); 1772 # else 1773 PUTU32(ctx->Yi.c + 12, ctr); 1774 # endif 1775 else 1776 ctx->Yi.d[3] = ctr; 1777 while (len--) { 1778 # if defined(GHASH) 1779 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1780 # else 1781 u8 c = in[n]; 1782 ctx->Xi.c[mres++] ^= c; 1783 out[n] = c ^ ctx->EKi.c[n]; 1784 # endif 1785 ++n; 1786 } 1787 } 1788 1789 ctx->mres = mres; 1790 return 0; 1791 #endif 1792 } 1793 1794 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, 1795 size_t len) 1796 { 1797 const union { 1798 long one; 1799 char little; 1800 } is_endian = { 1 }; 1801 u64 alen = ctx->len.u[0] << 3; 1802 u64 clen = ctx->len.u[1] << 3; 1803 #ifdef GCM_FUNCREF_4BIT 1804 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1805 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1806 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1807 const u8 *inp, size_t len) = ctx->ghash; 1808 # endif 1809 #endif 1810 1811 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1812 u128 bitlen; 1813 unsigned int mres = ctx->mres; 1814 1815 if (mres) { 1816 unsigned blocks = (mres + 15) & -16; 1817 1818 memset(ctx->Xn + mres, 0, blocks - mres); 1819 mres = blocks; 1820 if (mres == sizeof(ctx->Xn)) { 1821 GHASH(ctx, ctx->Xn, mres); 1822 mres = 0; 1823 } 1824 } else if (ctx->ares) { 1825 GCM_MUL(ctx); 1826 } 1827 #else 1828 if (ctx->mres || ctx->ares) 1829 GCM_MUL(ctx); 1830 #endif 1831 1832 if (is_endian.little) { 1833 #ifdef BSWAP8 1834 alen = BSWAP8(alen); 1835 clen = BSWAP8(clen); 1836 #else 1837 u8 *p = ctx->len.c; 1838 1839 ctx->len.u[0] = alen; 1840 ctx->len.u[1] = clen; 1841 1842 alen = (u64)GETU32(p) << 32 | GETU32(p + 4); 1843 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 1844 #endif 1845 } 1846 1847 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1848 bitlen.hi = alen; 1849 bitlen.lo = clen; 1850 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen)); 1851 mres += sizeof(bitlen); 1852 GHASH(ctx, ctx->Xn, mres); 1853 #else 1854 ctx->Xi.u[0] ^= alen; 1855 ctx->Xi.u[1] ^= clen; 1856 GCM_MUL(ctx); 1857 #endif 1858 1859 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1860 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1861 1862 if (tag && len <= sizeof(ctx->Xi)) 1863 return CRYPTO_memcmp(ctx->Xi.c, tag, len); 1864 else 1865 return -1; 1866 } 1867 1868 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) 1869 { 1870 CRYPTO_gcm128_finish(ctx, NULL, 0); 1871 memcpy(tag, ctx->Xi.c, 1872 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); 1873 } 1874 1875 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) 1876 { 1877 GCM128_CONTEXT *ret; 1878 1879 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL) 1880 CRYPTO_gcm128_init(ret, key, block); 1881 1882 return ret; 1883 } 1884 1885 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) 1886 { 1887 OPENSSL_clear_free(ctx, sizeof(*ctx)); 1888 } 1889