1 /* 2 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the OpenSSL license (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 #include <openssl/crypto.h> 11 #include "modes_local.h" 12 #include <string.h> 13 14 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) 15 typedef size_t size_t_aX __attribute((__aligned__(1))); 16 #else 17 typedef size_t size_t_aX; 18 #endif 19 20 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT) 21 /* redefine, because alignment is ensured */ 22 # undef GETU32 23 # define GETU32(p) BSWAP4(*(const u32 *)(p)) 24 # undef PUTU32 25 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) 26 #endif 27 28 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) 29 #define REDUCE1BIT(V) do { \ 30 if (sizeof(size_t)==8) { \ 31 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ 32 V.lo = (V.hi<<63)|(V.lo>>1); \ 33 V.hi = (V.hi>>1 )^T; \ 34 } \ 35 else { \ 36 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ 37 V.lo = (V.hi<<63)|(V.lo>>1); \ 38 V.hi = (V.hi>>1 )^((u64)T<<32); \ 39 } \ 40 } while(0) 41 42 /*- 43 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should 44 * never be set to 8. 8 is effectively reserved for testing purposes. 45 * TABLE_BITS>1 are lookup-table-driven implementations referred to as 46 * "Shoup's" in GCM specification. In other words OpenSSL does not cover 47 * whole spectrum of possible table driven implementations. Why? In 48 * non-"Shoup's" case memory access pattern is segmented in such manner, 49 * that it's trivial to see that cache timing information can reveal 50 * fair portion of intermediate hash value. Given that ciphertext is 51 * always available to attacker, it's possible for him to attempt to 52 * deduce secret parameter H and if successful, tamper with messages 53 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's 54 * not as trivial, but there is no reason to believe that it's resistant 55 * to cache-timing attack. And the thing about "8-bit" implementation is 56 * that it consumes 16 (sixteen) times more memory, 4KB per individual 57 * key + 1KB shared. Well, on pros side it should be twice as fast as 58 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version 59 * was observed to run ~75% faster, closer to 100% for commercial 60 * compilers... Yet "4-bit" procedure is preferred, because it's 61 * believed to provide better security-performance balance and adequate 62 * all-round performance. "All-round" refers to things like: 63 * 64 * - shorter setup time effectively improves overall timing for 65 * handling short messages; 66 * - larger table allocation can become unbearable because of VM 67 * subsystem penalties (for example on Windows large enough free 68 * results in VM working set trimming, meaning that consequent 69 * malloc would immediately incur working set expansion); 70 * - larger table has larger cache footprint, which can affect 71 * performance of other code paths (not necessarily even from same 72 * thread in Hyper-Threading world); 73 * 74 * Value of 1 is not appropriate for performance reasons. 75 */ 76 #if TABLE_BITS==8 77 78 static void gcm_init_8bit(u128 Htable[256], u64 H[2]) 79 { 80 int i, j; 81 u128 V; 82 83 Htable[0].hi = 0; 84 Htable[0].lo = 0; 85 V.hi = H[0]; 86 V.lo = H[1]; 87 88 for (Htable[128] = V, i = 64; i > 0; i >>= 1) { 89 REDUCE1BIT(V); 90 Htable[i] = V; 91 } 92 93 for (i = 2; i < 256; i <<= 1) { 94 u128 *Hi = Htable + i, H0 = *Hi; 95 for (j = 1; j < i; ++j) { 96 Hi[j].hi = H0.hi ^ Htable[j].hi; 97 Hi[j].lo = H0.lo ^ Htable[j].lo; 98 } 99 } 100 } 101 102 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) 103 { 104 u128 Z = { 0, 0 }; 105 const u8 *xi = (const u8 *)Xi + 15; 106 size_t rem, n = *xi; 107 const union { 108 long one; 109 char little; 110 } is_endian = { 1 }; 111 static const size_t rem_8bit[256] = { 112 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), 113 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), 114 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56), 115 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E), 116 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66), 117 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E), 118 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076), 119 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E), 120 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06), 121 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E), 122 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416), 123 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E), 124 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626), 125 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E), 126 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836), 127 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E), 128 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6), 129 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE), 130 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6), 131 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE), 132 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6), 133 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE), 134 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6), 135 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE), 136 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86), 137 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E), 138 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496), 139 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E), 140 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6), 141 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE), 142 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6), 143 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE), 144 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346), 145 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E), 146 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56), 147 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E), 148 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66), 149 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E), 150 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176), 151 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E), 152 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06), 153 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E), 154 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516), 155 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E), 156 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726), 157 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E), 158 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936), 159 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E), 160 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6), 161 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE), 162 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6), 163 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE), 164 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6), 165 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE), 166 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6), 167 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE), 168 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86), 169 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E), 170 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596), 171 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E), 172 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6), 173 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE), 174 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6), 175 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) 176 }; 177 178 while (1) { 179 Z.hi ^= Htable[n].hi; 180 Z.lo ^= Htable[n].lo; 181 182 if ((u8 *)Xi == xi) 183 break; 184 185 n = *(--xi); 186 187 rem = (size_t)Z.lo & 0xff; 188 Z.lo = (Z.hi << 56) | (Z.lo >> 8); 189 Z.hi = (Z.hi >> 8); 190 if (sizeof(size_t) == 8) 191 Z.hi ^= rem_8bit[rem]; 192 else 193 Z.hi ^= (u64)rem_8bit[rem] << 32; 194 } 195 196 if (is_endian.little) { 197 # ifdef BSWAP8 198 Xi[0] = BSWAP8(Z.hi); 199 Xi[1] = BSWAP8(Z.lo); 200 # else 201 u8 *p = (u8 *)Xi; 202 u32 v; 203 v = (u32)(Z.hi >> 32); 204 PUTU32(p, v); 205 v = (u32)(Z.hi); 206 PUTU32(p + 4, v); 207 v = (u32)(Z.lo >> 32); 208 PUTU32(p + 8, v); 209 v = (u32)(Z.lo); 210 PUTU32(p + 12, v); 211 # endif 212 } else { 213 Xi[0] = Z.hi; 214 Xi[1] = Z.lo; 215 } 216 } 217 218 # define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) 219 220 #elif TABLE_BITS==4 221 222 static void gcm_init_4bit(u128 Htable[16], u64 H[2]) 223 { 224 u128 V; 225 # if defined(OPENSSL_SMALL_FOOTPRINT) 226 int i; 227 # endif 228 229 Htable[0].hi = 0; 230 Htable[0].lo = 0; 231 V.hi = H[0]; 232 V.lo = H[1]; 233 234 # if defined(OPENSSL_SMALL_FOOTPRINT) 235 for (Htable[8] = V, i = 4; i > 0; i >>= 1) { 236 REDUCE1BIT(V); 237 Htable[i] = V; 238 } 239 240 for (i = 2; i < 16; i <<= 1) { 241 u128 *Hi = Htable + i; 242 int j; 243 for (V = *Hi, j = 1; j < i; ++j) { 244 Hi[j].hi = V.hi ^ Htable[j].hi; 245 Hi[j].lo = V.lo ^ Htable[j].lo; 246 } 247 } 248 # else 249 Htable[8] = V; 250 REDUCE1BIT(V); 251 Htable[4] = V; 252 REDUCE1BIT(V); 253 Htable[2] = V; 254 REDUCE1BIT(V); 255 Htable[1] = V; 256 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; 257 V = Htable[4]; 258 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; 259 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; 260 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; 261 V = Htable[8]; 262 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; 263 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; 264 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; 265 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; 266 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; 267 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; 268 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; 269 # endif 270 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) 271 /* 272 * ARM assembler expects specific dword order in Htable. 273 */ 274 { 275 int j; 276 const union { 277 long one; 278 char little; 279 } is_endian = { 1 }; 280 281 if (is_endian.little) 282 for (j = 0; j < 16; ++j) { 283 V = Htable[j]; 284 Htable[j].hi = V.lo; 285 Htable[j].lo = V.hi; 286 } else 287 for (j = 0; j < 16; ++j) { 288 V = Htable[j]; 289 Htable[j].hi = V.lo << 32 | V.lo >> 32; 290 Htable[j].lo = V.hi << 32 | V.hi >> 32; 291 } 292 } 293 # endif 294 } 295 296 # ifndef GHASH_ASM 297 static const size_t rem_4bit[16] = { 298 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 299 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 300 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 301 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) 302 }; 303 304 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) 305 { 306 u128 Z; 307 int cnt = 15; 308 size_t rem, nlo, nhi; 309 const union { 310 long one; 311 char little; 312 } is_endian = { 1 }; 313 314 nlo = ((const u8 *)Xi)[15]; 315 nhi = nlo >> 4; 316 nlo &= 0xf; 317 318 Z.hi = Htable[nlo].hi; 319 Z.lo = Htable[nlo].lo; 320 321 while (1) { 322 rem = (size_t)Z.lo & 0xf; 323 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 324 Z.hi = (Z.hi >> 4); 325 if (sizeof(size_t) == 8) 326 Z.hi ^= rem_4bit[rem]; 327 else 328 Z.hi ^= (u64)rem_4bit[rem] << 32; 329 330 Z.hi ^= Htable[nhi].hi; 331 Z.lo ^= Htable[nhi].lo; 332 333 if (--cnt < 0) 334 break; 335 336 nlo = ((const u8 *)Xi)[cnt]; 337 nhi = nlo >> 4; 338 nlo &= 0xf; 339 340 rem = (size_t)Z.lo & 0xf; 341 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 342 Z.hi = (Z.hi >> 4); 343 if (sizeof(size_t) == 8) 344 Z.hi ^= rem_4bit[rem]; 345 else 346 Z.hi ^= (u64)rem_4bit[rem] << 32; 347 348 Z.hi ^= Htable[nlo].hi; 349 Z.lo ^= Htable[nlo].lo; 350 } 351 352 if (is_endian.little) { 353 # ifdef BSWAP8 354 Xi[0] = BSWAP8(Z.hi); 355 Xi[1] = BSWAP8(Z.lo); 356 # else 357 u8 *p = (u8 *)Xi; 358 u32 v; 359 v = (u32)(Z.hi >> 32); 360 PUTU32(p, v); 361 v = (u32)(Z.hi); 362 PUTU32(p + 4, v); 363 v = (u32)(Z.lo >> 32); 364 PUTU32(p + 8, v); 365 v = (u32)(Z.lo); 366 PUTU32(p + 12, v); 367 # endif 368 } else { 369 Xi[0] = Z.hi; 370 Xi[1] = Z.lo; 371 } 372 } 373 374 # if !defined(OPENSSL_SMALL_FOOTPRINT) 375 /* 376 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 377 * details... Compiler-generated code doesn't seem to give any 378 * performance improvement, at least not on x86[_64]. It's here 379 * mostly as reference and a placeholder for possible future 380 * non-trivial optimization[s]... 381 */ 382 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], 383 const u8 *inp, size_t len) 384 { 385 u128 Z; 386 int cnt; 387 size_t rem, nlo, nhi; 388 const union { 389 long one; 390 char little; 391 } is_endian = { 1 }; 392 393 # if 1 394 do { 395 cnt = 15; 396 nlo = ((const u8 *)Xi)[15]; 397 nlo ^= inp[15]; 398 nhi = nlo >> 4; 399 nlo &= 0xf; 400 401 Z.hi = Htable[nlo].hi; 402 Z.lo = Htable[nlo].lo; 403 404 while (1) { 405 rem = (size_t)Z.lo & 0xf; 406 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 407 Z.hi = (Z.hi >> 4); 408 if (sizeof(size_t) == 8) 409 Z.hi ^= rem_4bit[rem]; 410 else 411 Z.hi ^= (u64)rem_4bit[rem] << 32; 412 413 Z.hi ^= Htable[nhi].hi; 414 Z.lo ^= Htable[nhi].lo; 415 416 if (--cnt < 0) 417 break; 418 419 nlo = ((const u8 *)Xi)[cnt]; 420 nlo ^= inp[cnt]; 421 nhi = nlo >> 4; 422 nlo &= 0xf; 423 424 rem = (size_t)Z.lo & 0xf; 425 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 426 Z.hi = (Z.hi >> 4); 427 if (sizeof(size_t) == 8) 428 Z.hi ^= rem_4bit[rem]; 429 else 430 Z.hi ^= (u64)rem_4bit[rem] << 32; 431 432 Z.hi ^= Htable[nlo].hi; 433 Z.lo ^= Htable[nlo].lo; 434 } 435 # else 436 /* 437 * Extra 256+16 bytes per-key plus 512 bytes shared tables 438 * [should] give ~50% improvement... One could have PACK()-ed 439 * the rem_8bit even here, but the priority is to minimize 440 * cache footprint... 441 */ 442 u128 Hshr4[16]; /* Htable shifted right by 4 bits */ 443 u8 Hshl4[16]; /* Htable shifted left by 4 bits */ 444 static const unsigned short rem_8bit[256] = { 445 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, 446 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, 447 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, 448 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, 449 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, 450 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, 451 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, 452 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, 453 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, 454 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, 455 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, 456 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, 457 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, 458 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, 459 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, 460 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, 461 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, 462 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, 463 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, 464 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, 465 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, 466 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, 467 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, 468 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, 469 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, 470 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, 471 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, 472 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, 473 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, 474 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, 475 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, 476 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE 477 }; 478 /* 479 * This pre-processing phase slows down procedure by approximately 480 * same time as it makes each loop spin faster. In other words 481 * single block performance is approximately same as straightforward 482 * "4-bit" implementation, and then it goes only faster... 483 */ 484 for (cnt = 0; cnt < 16; ++cnt) { 485 Z.hi = Htable[cnt].hi; 486 Z.lo = Htable[cnt].lo; 487 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4); 488 Hshr4[cnt].hi = (Z.hi >> 4); 489 Hshl4[cnt] = (u8)(Z.lo << 4); 490 } 491 492 do { 493 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) { 494 nlo = ((const u8 *)Xi)[cnt]; 495 nlo ^= inp[cnt]; 496 nhi = nlo >> 4; 497 nlo &= 0xf; 498 499 Z.hi ^= Htable[nlo].hi; 500 Z.lo ^= Htable[nlo].lo; 501 502 rem = (size_t)Z.lo & 0xff; 503 504 Z.lo = (Z.hi << 56) | (Z.lo >> 8); 505 Z.hi = (Z.hi >> 8); 506 507 Z.hi ^= Hshr4[nhi].hi; 508 Z.lo ^= Hshr4[nhi].lo; 509 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48; 510 } 511 512 nlo = ((const u8 *)Xi)[0]; 513 nlo ^= inp[0]; 514 nhi = nlo >> 4; 515 nlo &= 0xf; 516 517 Z.hi ^= Htable[nlo].hi; 518 Z.lo ^= Htable[nlo].lo; 519 520 rem = (size_t)Z.lo & 0xf; 521 522 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 523 Z.hi = (Z.hi >> 4); 524 525 Z.hi ^= Htable[nhi].hi; 526 Z.lo ^= Htable[nhi].lo; 527 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48; 528 # endif 529 530 if (is_endian.little) { 531 # ifdef BSWAP8 532 Xi[0] = BSWAP8(Z.hi); 533 Xi[1] = BSWAP8(Z.lo); 534 # else 535 u8 *p = (u8 *)Xi; 536 u32 v; 537 v = (u32)(Z.hi >> 32); 538 PUTU32(p, v); 539 v = (u32)(Z.hi); 540 PUTU32(p + 4, v); 541 v = (u32)(Z.lo >> 32); 542 PUTU32(p + 8, v); 543 v = (u32)(Z.lo); 544 PUTU32(p + 12, v); 545 # endif 546 } else { 547 Xi[0] = Z.hi; 548 Xi[1] = Z.lo; 549 } 550 } while (inp += 16, len -= 16); 551 } 552 # endif 553 # else 554 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]); 555 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp, 556 size_t len); 557 # endif 558 559 # define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) 560 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) 561 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) 562 /* 563 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing 564 * effect. In other words idea is to hash data while it's still in L1 cache 565 * after encryption pass... 566 */ 567 # define GHASH_CHUNK (3*1024) 568 # endif 569 570 #else /* TABLE_BITS */ 571 572 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) 573 { 574 u128 V, Z = { 0, 0 }; 575 long X; 576 int i, j; 577 const long *xi = (const long *)Xi; 578 const union { 579 long one; 580 char little; 581 } is_endian = { 1 }; 582 583 V.hi = H[0]; /* H is in host byte order, no byte swapping */ 584 V.lo = H[1]; 585 586 for (j = 0; j < 16 / sizeof(long); ++j) { 587 if (is_endian.little) { 588 if (sizeof(long) == 8) { 589 # ifdef BSWAP8 590 X = (long)(BSWAP8(xi[j])); 591 # else 592 const u8 *p = (const u8 *)(xi + j); 593 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4)); 594 # endif 595 } else { 596 const u8 *p = (const u8 *)(xi + j); 597 X = (long)GETU32(p); 598 } 599 } else 600 X = xi[j]; 601 602 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) { 603 u64 M = (u64)(X >> (8 * sizeof(long) - 1)); 604 Z.hi ^= V.hi & M; 605 Z.lo ^= V.lo & M; 606 607 REDUCE1BIT(V); 608 } 609 } 610 611 if (is_endian.little) { 612 # ifdef BSWAP8 613 Xi[0] = BSWAP8(Z.hi); 614 Xi[1] = BSWAP8(Z.lo); 615 # else 616 u8 *p = (u8 *)Xi; 617 u32 v; 618 v = (u32)(Z.hi >> 32); 619 PUTU32(p, v); 620 v = (u32)(Z.hi); 621 PUTU32(p + 4, v); 622 v = (u32)(Z.lo >> 32); 623 PUTU32(p + 8, v); 624 v = (u32)(Z.lo); 625 PUTU32(p + 12, v); 626 # endif 627 } else { 628 Xi[0] = Z.hi; 629 Xi[1] = Z.lo; 630 } 631 } 632 633 # define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) 634 635 #endif 636 637 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) 638 # if !defined(I386_ONLY) && \ 639 (defined(__i386) || defined(__i386__) || \ 640 defined(__x86_64) || defined(__x86_64__) || \ 641 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) 642 # define GHASH_ASM_X86_OR_64 643 # define GCM_FUNCREF_4BIT 644 extern unsigned int OPENSSL_ia32cap_P[]; 645 646 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); 647 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); 648 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, 649 size_t len); 650 651 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) 652 # define gcm_init_avx gcm_init_clmul 653 # define gcm_gmult_avx gcm_gmult_clmul 654 # define gcm_ghash_avx gcm_ghash_clmul 655 # else 656 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]); 657 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]); 658 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 659 size_t len); 660 # endif 661 662 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) 663 # define GHASH_ASM_X86 664 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); 665 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 666 size_t len); 667 668 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); 669 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, 670 size_t len); 671 # endif 672 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__) 673 # include "arm_arch.h" 674 # if __ARM_MAX_ARCH__>=7 675 # define GHASH_ASM_ARM 676 # define GCM_FUNCREF_4BIT 677 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) 678 # if defined(__arm__) || defined(__arm) 679 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) 680 # endif 681 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]); 682 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]); 683 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp, 684 size_t len); 685 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]); 686 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]); 687 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 688 size_t len); 689 # endif 690 # elif defined(__sparc__) || defined(__sparc) 691 # include "sparc_arch.h" 692 # define GHASH_ASM_SPARC 693 # define GCM_FUNCREF_4BIT 694 extern unsigned int OPENSSL_sparcv9cap_P[]; 695 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]); 696 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]); 697 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp, 698 size_t len); 699 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) 700 # include "ppc_arch.h" 701 # define GHASH_ASM_PPC 702 # define GCM_FUNCREF_4BIT 703 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]); 704 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]); 705 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 706 size_t len); 707 # endif 708 #endif 709 710 #ifdef GCM_FUNCREF_4BIT 711 # undef GCM_MUL 712 # define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) 713 # ifdef GHASH 714 # undef GHASH 715 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) 716 # endif 717 #endif 718 719 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) 720 { 721 const union { 722 long one; 723 char little; 724 } is_endian = { 1 }; 725 726 memset(ctx, 0, sizeof(*ctx)); 727 ctx->block = block; 728 ctx->key = key; 729 730 (*block) (ctx->H.c, ctx->H.c, key); 731 732 if (is_endian.little) { 733 /* H is stored in host byte order */ 734 #ifdef BSWAP8 735 ctx->H.u[0] = BSWAP8(ctx->H.u[0]); 736 ctx->H.u[1] = BSWAP8(ctx->H.u[1]); 737 #else 738 u8 *p = ctx->H.c; 739 u64 hi, lo; 740 hi = (u64)GETU32(p) << 32 | GETU32(p + 4); 741 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 742 ctx->H.u[0] = hi; 743 ctx->H.u[1] = lo; 744 #endif 745 } 746 #if TABLE_BITS==8 747 gcm_init_8bit(ctx->Htable, ctx->H.u); 748 #elif TABLE_BITS==4 749 # if defined(GHASH) 750 # define CTX__GHASH(f) (ctx->ghash = (f)) 751 # else 752 # define CTX__GHASH(f) (ctx->ghash = NULL) 753 # endif 754 # if defined(GHASH_ASM_X86_OR_64) 755 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) 756 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ 757 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ 758 gcm_init_avx(ctx->Htable, ctx->H.u); 759 ctx->gmult = gcm_gmult_avx; 760 CTX__GHASH(gcm_ghash_avx); 761 } else { 762 gcm_init_clmul(ctx->Htable, ctx->H.u); 763 ctx->gmult = gcm_gmult_clmul; 764 CTX__GHASH(gcm_ghash_clmul); 765 } 766 return; 767 } 768 # endif 769 gcm_init_4bit(ctx->Htable, ctx->H.u); 770 # if defined(GHASH_ASM_X86) /* x86 only */ 771 # if defined(OPENSSL_IA32_SSE2) 772 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */ 773 # else 774 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */ 775 # endif 776 ctx->gmult = gcm_gmult_4bit_mmx; 777 CTX__GHASH(gcm_ghash_4bit_mmx); 778 } else { 779 ctx->gmult = gcm_gmult_4bit_x86; 780 CTX__GHASH(gcm_ghash_4bit_x86); 781 } 782 # else 783 ctx->gmult = gcm_gmult_4bit; 784 CTX__GHASH(gcm_ghash_4bit); 785 # endif 786 # elif defined(GHASH_ASM_ARM) 787 # ifdef PMULL_CAPABLE 788 if (PMULL_CAPABLE) { 789 gcm_init_v8(ctx->Htable, ctx->H.u); 790 ctx->gmult = gcm_gmult_v8; 791 CTX__GHASH(gcm_ghash_v8); 792 } else 793 # endif 794 # ifdef NEON_CAPABLE 795 if (NEON_CAPABLE) { 796 gcm_init_neon(ctx->Htable, ctx->H.u); 797 ctx->gmult = gcm_gmult_neon; 798 CTX__GHASH(gcm_ghash_neon); 799 } else 800 # endif 801 { 802 gcm_init_4bit(ctx->Htable, ctx->H.u); 803 ctx->gmult = gcm_gmult_4bit; 804 CTX__GHASH(gcm_ghash_4bit); 805 } 806 # elif defined(GHASH_ASM_SPARC) 807 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { 808 gcm_init_vis3(ctx->Htable, ctx->H.u); 809 ctx->gmult = gcm_gmult_vis3; 810 CTX__GHASH(gcm_ghash_vis3); 811 } else { 812 gcm_init_4bit(ctx->Htable, ctx->H.u); 813 ctx->gmult = gcm_gmult_4bit; 814 CTX__GHASH(gcm_ghash_4bit); 815 } 816 # elif defined(GHASH_ASM_PPC) 817 if (OPENSSL_ppccap_P & PPC_CRYPTO207) { 818 gcm_init_p8(ctx->Htable, ctx->H.u); 819 ctx->gmult = gcm_gmult_p8; 820 CTX__GHASH(gcm_ghash_p8); 821 } else { 822 gcm_init_4bit(ctx->Htable, ctx->H.u); 823 ctx->gmult = gcm_gmult_4bit; 824 CTX__GHASH(gcm_ghash_4bit); 825 } 826 # else 827 gcm_init_4bit(ctx->Htable, ctx->H.u); 828 # endif 829 # undef CTX__GHASH 830 #endif 831 } 832 833 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, 834 size_t len) 835 { 836 const union { 837 long one; 838 char little; 839 } is_endian = { 1 }; 840 unsigned int ctr; 841 #ifdef GCM_FUNCREF_4BIT 842 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 843 #endif 844 845 ctx->len.u[0] = 0; /* AAD length */ 846 ctx->len.u[1] = 0; /* message length */ 847 ctx->ares = 0; 848 ctx->mres = 0; 849 850 if (len == 12) { 851 memcpy(ctx->Yi.c, iv, 12); 852 ctx->Yi.c[12] = 0; 853 ctx->Yi.c[13] = 0; 854 ctx->Yi.c[14] = 0; 855 ctx->Yi.c[15] = 1; 856 ctr = 1; 857 } else { 858 size_t i; 859 u64 len0 = len; 860 861 /* Borrow ctx->Xi to calculate initial Yi */ 862 ctx->Xi.u[0] = 0; 863 ctx->Xi.u[1] = 0; 864 865 while (len >= 16) { 866 for (i = 0; i < 16; ++i) 867 ctx->Xi.c[i] ^= iv[i]; 868 GCM_MUL(ctx); 869 iv += 16; 870 len -= 16; 871 } 872 if (len) { 873 for (i = 0; i < len; ++i) 874 ctx->Xi.c[i] ^= iv[i]; 875 GCM_MUL(ctx); 876 } 877 len0 <<= 3; 878 if (is_endian.little) { 879 #ifdef BSWAP8 880 ctx->Xi.u[1] ^= BSWAP8(len0); 881 #else 882 ctx->Xi.c[8] ^= (u8)(len0 >> 56); 883 ctx->Xi.c[9] ^= (u8)(len0 >> 48); 884 ctx->Xi.c[10] ^= (u8)(len0 >> 40); 885 ctx->Xi.c[11] ^= (u8)(len0 >> 32); 886 ctx->Xi.c[12] ^= (u8)(len0 >> 24); 887 ctx->Xi.c[13] ^= (u8)(len0 >> 16); 888 ctx->Xi.c[14] ^= (u8)(len0 >> 8); 889 ctx->Xi.c[15] ^= (u8)(len0); 890 #endif 891 } else { 892 ctx->Xi.u[1] ^= len0; 893 } 894 895 GCM_MUL(ctx); 896 897 if (is_endian.little) 898 #ifdef BSWAP4 899 ctr = BSWAP4(ctx->Xi.d[3]); 900 #else 901 ctr = GETU32(ctx->Xi.c + 12); 902 #endif 903 else 904 ctr = ctx->Xi.d[3]; 905 906 /* Copy borrowed Xi to Yi */ 907 ctx->Yi.u[0] = ctx->Xi.u[0]; 908 ctx->Yi.u[1] = ctx->Xi.u[1]; 909 } 910 911 ctx->Xi.u[0] = 0; 912 ctx->Xi.u[1] = 0; 913 914 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key); 915 ++ctr; 916 if (is_endian.little) 917 #ifdef BSWAP4 918 ctx->Yi.d[3] = BSWAP4(ctr); 919 #else 920 PUTU32(ctx->Yi.c + 12, ctr); 921 #endif 922 else 923 ctx->Yi.d[3] = ctr; 924 } 925 926 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, 927 size_t len) 928 { 929 size_t i; 930 unsigned int n; 931 u64 alen = ctx->len.u[0]; 932 #ifdef GCM_FUNCREF_4BIT 933 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 934 # ifdef GHASH 935 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 936 const u8 *inp, size_t len) = ctx->ghash; 937 # endif 938 #endif 939 940 if (ctx->len.u[1]) 941 return -2; 942 943 alen += len; 944 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) 945 return -1; 946 ctx->len.u[0] = alen; 947 948 n = ctx->ares; 949 if (n) { 950 while (n && len) { 951 ctx->Xi.c[n] ^= *(aad++); 952 --len; 953 n = (n + 1) % 16; 954 } 955 if (n == 0) 956 GCM_MUL(ctx); 957 else { 958 ctx->ares = n; 959 return 0; 960 } 961 } 962 #ifdef GHASH 963 if ((i = (len & (size_t)-16))) { 964 GHASH(ctx, aad, i); 965 aad += i; 966 len -= i; 967 } 968 #else 969 while (len >= 16) { 970 for (i = 0; i < 16; ++i) 971 ctx->Xi.c[i] ^= aad[i]; 972 GCM_MUL(ctx); 973 aad += 16; 974 len -= 16; 975 } 976 #endif 977 if (len) { 978 n = (unsigned int)len; 979 for (i = 0; i < len; ++i) 980 ctx->Xi.c[i] ^= aad[i]; 981 } 982 983 ctx->ares = n; 984 return 0; 985 } 986 987 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 988 const unsigned char *in, unsigned char *out, 989 size_t len) 990 { 991 const union { 992 long one; 993 char little; 994 } is_endian = { 1 }; 995 unsigned int n, ctr, mres; 996 size_t i; 997 u64 mlen = ctx->len.u[1]; 998 block128_f block = ctx->block; 999 void *key = ctx->key; 1000 #ifdef GCM_FUNCREF_4BIT 1001 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1002 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1003 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1004 const u8 *inp, size_t len) = ctx->ghash; 1005 # endif 1006 #endif 1007 1008 mlen += len; 1009 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1010 return -1; 1011 ctx->len.u[1] = mlen; 1012 1013 mres = ctx->mres; 1014 1015 if (ctx->ares) { 1016 /* First call to encrypt finalizes GHASH(AAD) */ 1017 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1018 if (len == 0) { 1019 GCM_MUL(ctx); 1020 ctx->ares = 0; 1021 return 0; 1022 } 1023 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1024 ctx->Xi.u[0] = 0; 1025 ctx->Xi.u[1] = 0; 1026 mres = sizeof(ctx->Xi); 1027 #else 1028 GCM_MUL(ctx); 1029 #endif 1030 ctx->ares = 0; 1031 } 1032 1033 if (is_endian.little) 1034 #ifdef BSWAP4 1035 ctr = BSWAP4(ctx->Yi.d[3]); 1036 #else 1037 ctr = GETU32(ctx->Yi.c + 12); 1038 #endif 1039 else 1040 ctr = ctx->Yi.d[3]; 1041 1042 n = mres % 16; 1043 #if !defined(OPENSSL_SMALL_FOOTPRINT) 1044 if (16 % sizeof(size_t) == 0) { /* always true actually */ 1045 do { 1046 if (n) { 1047 # if defined(GHASH) 1048 while (n && len) { 1049 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1050 --len; 1051 n = (n + 1) % 16; 1052 } 1053 if (n == 0) { 1054 GHASH(ctx, ctx->Xn, mres); 1055 mres = 0; 1056 } else { 1057 ctx->mres = mres; 1058 return 0; 1059 } 1060 # else 1061 while (n && len) { 1062 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1063 --len; 1064 n = (n + 1) % 16; 1065 } 1066 if (n == 0) { 1067 GCM_MUL(ctx); 1068 mres = 0; 1069 } else { 1070 ctx->mres = n; 1071 return 0; 1072 } 1073 # endif 1074 } 1075 # if defined(STRICT_ALIGNMENT) 1076 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1077 break; 1078 # endif 1079 # if defined(GHASH) 1080 if (len >= 16 && mres) { 1081 GHASH(ctx, ctx->Xn, mres); 1082 mres = 0; 1083 } 1084 # if defined(GHASH_CHUNK) 1085 while (len >= GHASH_CHUNK) { 1086 size_t j = GHASH_CHUNK; 1087 1088 while (j) { 1089 size_t_aX *out_t = (size_t_aX *)out; 1090 const size_t_aX *in_t = (const size_t_aX *)in; 1091 1092 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1093 ++ctr; 1094 if (is_endian.little) 1095 # ifdef BSWAP4 1096 ctx->Yi.d[3] = BSWAP4(ctr); 1097 # else 1098 PUTU32(ctx->Yi.c + 12, ctr); 1099 # endif 1100 else 1101 ctx->Yi.d[3] = ctr; 1102 for (i = 0; i < 16 / sizeof(size_t); ++i) 1103 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1104 out += 16; 1105 in += 16; 1106 j -= 16; 1107 } 1108 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); 1109 len -= GHASH_CHUNK; 1110 } 1111 # endif 1112 if ((i = (len & (size_t)-16))) { 1113 size_t j = i; 1114 1115 while (len >= 16) { 1116 size_t_aX *out_t = (size_t_aX *)out; 1117 const size_t_aX *in_t = (const size_t_aX *)in; 1118 1119 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1120 ++ctr; 1121 if (is_endian.little) 1122 # ifdef BSWAP4 1123 ctx->Yi.d[3] = BSWAP4(ctr); 1124 # else 1125 PUTU32(ctx->Yi.c + 12, ctr); 1126 # endif 1127 else 1128 ctx->Yi.d[3] = ctr; 1129 for (i = 0; i < 16 / sizeof(size_t); ++i) 1130 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1131 out += 16; 1132 in += 16; 1133 len -= 16; 1134 } 1135 GHASH(ctx, out - j, j); 1136 } 1137 # else 1138 while (len >= 16) { 1139 size_t *out_t = (size_t *)out; 1140 const size_t *in_t = (const size_t *)in; 1141 1142 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1143 ++ctr; 1144 if (is_endian.little) 1145 # ifdef BSWAP4 1146 ctx->Yi.d[3] = BSWAP4(ctr); 1147 # else 1148 PUTU32(ctx->Yi.c + 12, ctr); 1149 # endif 1150 else 1151 ctx->Yi.d[3] = ctr; 1152 for (i = 0; i < 16 / sizeof(size_t); ++i) 1153 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1154 GCM_MUL(ctx); 1155 out += 16; 1156 in += 16; 1157 len -= 16; 1158 } 1159 # endif 1160 if (len) { 1161 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1162 ++ctr; 1163 if (is_endian.little) 1164 # ifdef BSWAP4 1165 ctx->Yi.d[3] = BSWAP4(ctr); 1166 # else 1167 PUTU32(ctx->Yi.c + 12, ctr); 1168 # endif 1169 else 1170 ctx->Yi.d[3] = ctr; 1171 # if defined(GHASH) 1172 while (len--) { 1173 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1174 ++n; 1175 } 1176 # else 1177 while (len--) { 1178 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1179 ++n; 1180 } 1181 mres = n; 1182 # endif 1183 } 1184 1185 ctx->mres = mres; 1186 return 0; 1187 } while (0); 1188 } 1189 #endif 1190 for (i = 0; i < len; ++i) { 1191 if (n == 0) { 1192 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1193 ++ctr; 1194 if (is_endian.little) 1195 #ifdef BSWAP4 1196 ctx->Yi.d[3] = BSWAP4(ctr); 1197 #else 1198 PUTU32(ctx->Yi.c + 12, ctr); 1199 #endif 1200 else 1201 ctx->Yi.d[3] = ctr; 1202 } 1203 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1204 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n]; 1205 n = (n + 1) % 16; 1206 if (mres == sizeof(ctx->Xn)) { 1207 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1208 mres = 0; 1209 } 1210 #else 1211 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 1212 mres = n = (n + 1) % 16; 1213 if (n == 0) 1214 GCM_MUL(ctx); 1215 #endif 1216 } 1217 1218 ctx->mres = mres; 1219 return 0; 1220 } 1221 1222 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 1223 const unsigned char *in, unsigned char *out, 1224 size_t len) 1225 { 1226 const union { 1227 long one; 1228 char little; 1229 } is_endian = { 1 }; 1230 unsigned int n, ctr, mres; 1231 size_t i; 1232 u64 mlen = ctx->len.u[1]; 1233 block128_f block = ctx->block; 1234 void *key = ctx->key; 1235 #ifdef GCM_FUNCREF_4BIT 1236 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1237 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1238 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1239 const u8 *inp, size_t len) = ctx->ghash; 1240 # endif 1241 #endif 1242 1243 mlen += len; 1244 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1245 return -1; 1246 ctx->len.u[1] = mlen; 1247 1248 mres = ctx->mres; 1249 1250 if (ctx->ares) { 1251 /* First call to decrypt finalizes GHASH(AAD) */ 1252 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1253 if (len == 0) { 1254 GCM_MUL(ctx); 1255 ctx->ares = 0; 1256 return 0; 1257 } 1258 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1259 ctx->Xi.u[0] = 0; 1260 ctx->Xi.u[1] = 0; 1261 mres = sizeof(ctx->Xi); 1262 #else 1263 GCM_MUL(ctx); 1264 #endif 1265 ctx->ares = 0; 1266 } 1267 1268 if (is_endian.little) 1269 #ifdef BSWAP4 1270 ctr = BSWAP4(ctx->Yi.d[3]); 1271 #else 1272 ctr = GETU32(ctx->Yi.c + 12); 1273 #endif 1274 else 1275 ctr = ctx->Yi.d[3]; 1276 1277 n = mres % 16; 1278 #if !defined(OPENSSL_SMALL_FOOTPRINT) 1279 if (16 % sizeof(size_t) == 0) { /* always true actually */ 1280 do { 1281 if (n) { 1282 # if defined(GHASH) 1283 while (n && len) { 1284 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1285 --len; 1286 n = (n + 1) % 16; 1287 } 1288 if (n == 0) { 1289 GHASH(ctx, ctx->Xn, mres); 1290 mres = 0; 1291 } else { 1292 ctx->mres = mres; 1293 return 0; 1294 } 1295 # else 1296 while (n && len) { 1297 u8 c = *(in++); 1298 *(out++) = c ^ ctx->EKi.c[n]; 1299 ctx->Xi.c[n] ^= c; 1300 --len; 1301 n = (n + 1) % 16; 1302 } 1303 if (n == 0) { 1304 GCM_MUL(ctx); 1305 mres = 0; 1306 } else { 1307 ctx->mres = n; 1308 return 0; 1309 } 1310 # endif 1311 } 1312 # if defined(STRICT_ALIGNMENT) 1313 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1314 break; 1315 # endif 1316 # if defined(GHASH) 1317 if (len >= 16 && mres) { 1318 GHASH(ctx, ctx->Xn, mres); 1319 mres = 0; 1320 } 1321 # if defined(GHASH_CHUNK) 1322 while (len >= GHASH_CHUNK) { 1323 size_t j = GHASH_CHUNK; 1324 1325 GHASH(ctx, in, GHASH_CHUNK); 1326 while (j) { 1327 size_t_aX *out_t = (size_t_aX *)out; 1328 const size_t_aX *in_t = (const size_t_aX *)in; 1329 1330 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1331 ++ctr; 1332 if (is_endian.little) 1333 # ifdef BSWAP4 1334 ctx->Yi.d[3] = BSWAP4(ctr); 1335 # else 1336 PUTU32(ctx->Yi.c + 12, ctr); 1337 # endif 1338 else 1339 ctx->Yi.d[3] = ctr; 1340 for (i = 0; i < 16 / sizeof(size_t); ++i) 1341 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1342 out += 16; 1343 in += 16; 1344 j -= 16; 1345 } 1346 len -= GHASH_CHUNK; 1347 } 1348 # endif 1349 if ((i = (len & (size_t)-16))) { 1350 GHASH(ctx, in, i); 1351 while (len >= 16) { 1352 size_t_aX *out_t = (size_t_aX *)out; 1353 const size_t_aX *in_t = (const size_t_aX *)in; 1354 1355 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1356 ++ctr; 1357 if (is_endian.little) 1358 # ifdef BSWAP4 1359 ctx->Yi.d[3] = BSWAP4(ctr); 1360 # else 1361 PUTU32(ctx->Yi.c + 12, ctr); 1362 # endif 1363 else 1364 ctx->Yi.d[3] = ctr; 1365 for (i = 0; i < 16 / sizeof(size_t); ++i) 1366 out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1367 out += 16; 1368 in += 16; 1369 len -= 16; 1370 } 1371 } 1372 # else 1373 while (len >= 16) { 1374 size_t *out_t = (size_t *)out; 1375 const size_t *in_t = (const size_t *)in; 1376 1377 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1378 ++ctr; 1379 if (is_endian.little) 1380 # ifdef BSWAP4 1381 ctx->Yi.d[3] = BSWAP4(ctr); 1382 # else 1383 PUTU32(ctx->Yi.c + 12, ctr); 1384 # endif 1385 else 1386 ctx->Yi.d[3] = ctr; 1387 for (i = 0; i < 16 / sizeof(size_t); ++i) { 1388 size_t c = in_t[i]; 1389 out_t[i] = c ^ ctx->EKi.t[i]; 1390 ctx->Xi.t[i] ^= c; 1391 } 1392 GCM_MUL(ctx); 1393 out += 16; 1394 in += 16; 1395 len -= 16; 1396 } 1397 # endif 1398 if (len) { 1399 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1400 ++ctr; 1401 if (is_endian.little) 1402 # ifdef BSWAP4 1403 ctx->Yi.d[3] = BSWAP4(ctr); 1404 # else 1405 PUTU32(ctx->Yi.c + 12, ctr); 1406 # endif 1407 else 1408 ctx->Yi.d[3] = ctr; 1409 # if defined(GHASH) 1410 while (len--) { 1411 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1412 ++n; 1413 } 1414 # else 1415 while (len--) { 1416 u8 c = in[n]; 1417 ctx->Xi.c[n] ^= c; 1418 out[n] = c ^ ctx->EKi.c[n]; 1419 ++n; 1420 } 1421 mres = n; 1422 # endif 1423 } 1424 1425 ctx->mres = mres; 1426 return 0; 1427 } while (0); 1428 } 1429 #endif 1430 for (i = 0; i < len; ++i) { 1431 u8 c; 1432 if (n == 0) { 1433 (*block) (ctx->Yi.c, ctx->EKi.c, key); 1434 ++ctr; 1435 if (is_endian.little) 1436 #ifdef BSWAP4 1437 ctx->Yi.d[3] = BSWAP4(ctr); 1438 #else 1439 PUTU32(ctx->Yi.c + 12, ctr); 1440 #endif 1441 else 1442 ctx->Yi.d[3] = ctr; 1443 } 1444 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1445 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n]; 1446 n = (n + 1) % 16; 1447 if (mres == sizeof(ctx->Xn)) { 1448 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1449 mres = 0; 1450 } 1451 #else 1452 c = in[i]; 1453 out[i] = c ^ ctx->EKi.c[n]; 1454 ctx->Xi.c[n] ^= c; 1455 mres = n = (n + 1) % 16; 1456 if (n == 0) 1457 GCM_MUL(ctx); 1458 #endif 1459 } 1460 1461 ctx->mres = mres; 1462 return 0; 1463 } 1464 1465 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 1466 const unsigned char *in, unsigned char *out, 1467 size_t len, ctr128_f stream) 1468 { 1469 #if defined(OPENSSL_SMALL_FOOTPRINT) 1470 return CRYPTO_gcm128_encrypt(ctx, in, out, len); 1471 #else 1472 const union { 1473 long one; 1474 char little; 1475 } is_endian = { 1 }; 1476 unsigned int n, ctr, mres; 1477 size_t i; 1478 u64 mlen = ctx->len.u[1]; 1479 void *key = ctx->key; 1480 # ifdef GCM_FUNCREF_4BIT 1481 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1482 # ifdef GHASH 1483 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1484 const u8 *inp, size_t len) = ctx->ghash; 1485 # endif 1486 # endif 1487 1488 mlen += len; 1489 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1490 return -1; 1491 ctx->len.u[1] = mlen; 1492 1493 mres = ctx->mres; 1494 1495 if (ctx->ares) { 1496 /* First call to encrypt finalizes GHASH(AAD) */ 1497 #if defined(GHASH) 1498 if (len == 0) { 1499 GCM_MUL(ctx); 1500 ctx->ares = 0; 1501 return 0; 1502 } 1503 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1504 ctx->Xi.u[0] = 0; 1505 ctx->Xi.u[1] = 0; 1506 mres = sizeof(ctx->Xi); 1507 #else 1508 GCM_MUL(ctx); 1509 #endif 1510 ctx->ares = 0; 1511 } 1512 1513 if (is_endian.little) 1514 # ifdef BSWAP4 1515 ctr = BSWAP4(ctx->Yi.d[3]); 1516 # else 1517 ctr = GETU32(ctx->Yi.c + 12); 1518 # endif 1519 else 1520 ctr = ctx->Yi.d[3]; 1521 1522 n = mres % 16; 1523 if (n) { 1524 # if defined(GHASH) 1525 while (n && len) { 1526 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1527 --len; 1528 n = (n + 1) % 16; 1529 } 1530 if (n == 0) { 1531 GHASH(ctx, ctx->Xn, mres); 1532 mres = 0; 1533 } else { 1534 ctx->mres = mres; 1535 return 0; 1536 } 1537 # else 1538 while (n && len) { 1539 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1540 --len; 1541 n = (n + 1) % 16; 1542 } 1543 if (n == 0) { 1544 GCM_MUL(ctx); 1545 mres = 0; 1546 } else { 1547 ctx->mres = n; 1548 return 0; 1549 } 1550 # endif 1551 } 1552 # if defined(GHASH) 1553 if (len >= 16 && mres) { 1554 GHASH(ctx, ctx->Xn, mres); 1555 mres = 0; 1556 } 1557 # if defined(GHASH_CHUNK) 1558 while (len >= GHASH_CHUNK) { 1559 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1560 ctr += GHASH_CHUNK / 16; 1561 if (is_endian.little) 1562 # ifdef BSWAP4 1563 ctx->Yi.d[3] = BSWAP4(ctr); 1564 # else 1565 PUTU32(ctx->Yi.c + 12, ctr); 1566 # endif 1567 else 1568 ctx->Yi.d[3] = ctr; 1569 GHASH(ctx, out, GHASH_CHUNK); 1570 out += GHASH_CHUNK; 1571 in += GHASH_CHUNK; 1572 len -= GHASH_CHUNK; 1573 } 1574 # endif 1575 # endif 1576 if ((i = (len & (size_t)-16))) { 1577 size_t j = i / 16; 1578 1579 (*stream) (in, out, j, key, ctx->Yi.c); 1580 ctr += (unsigned int)j; 1581 if (is_endian.little) 1582 # ifdef BSWAP4 1583 ctx->Yi.d[3] = BSWAP4(ctr); 1584 # else 1585 PUTU32(ctx->Yi.c + 12, ctr); 1586 # endif 1587 else 1588 ctx->Yi.d[3] = ctr; 1589 in += i; 1590 len -= i; 1591 # if defined(GHASH) 1592 GHASH(ctx, out, i); 1593 out += i; 1594 # else 1595 while (j--) { 1596 for (i = 0; i < 16; ++i) 1597 ctx->Xi.c[i] ^= out[i]; 1598 GCM_MUL(ctx); 1599 out += 16; 1600 } 1601 # endif 1602 } 1603 if (len) { 1604 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1605 ++ctr; 1606 if (is_endian.little) 1607 # ifdef BSWAP4 1608 ctx->Yi.d[3] = BSWAP4(ctr); 1609 # else 1610 PUTU32(ctx->Yi.c + 12, ctr); 1611 # endif 1612 else 1613 ctx->Yi.d[3] = ctr; 1614 while (len--) { 1615 # if defined(GHASH) 1616 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1617 # else 1618 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1619 # endif 1620 ++n; 1621 } 1622 } 1623 1624 ctx->mres = mres; 1625 return 0; 1626 #endif 1627 } 1628 1629 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 1630 const unsigned char *in, unsigned char *out, 1631 size_t len, ctr128_f stream) 1632 { 1633 #if defined(OPENSSL_SMALL_FOOTPRINT) 1634 return CRYPTO_gcm128_decrypt(ctx, in, out, len); 1635 #else 1636 const union { 1637 long one; 1638 char little; 1639 } is_endian = { 1 }; 1640 unsigned int n, ctr, mres; 1641 size_t i; 1642 u64 mlen = ctx->len.u[1]; 1643 void *key = ctx->key; 1644 # ifdef GCM_FUNCREF_4BIT 1645 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1646 # ifdef GHASH 1647 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1648 const u8 *inp, size_t len) = ctx->ghash; 1649 # endif 1650 # endif 1651 1652 mlen += len; 1653 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1654 return -1; 1655 ctx->len.u[1] = mlen; 1656 1657 mres = ctx->mres; 1658 1659 if (ctx->ares) { 1660 /* First call to decrypt finalizes GHASH(AAD) */ 1661 # if defined(GHASH) 1662 if (len == 0) { 1663 GCM_MUL(ctx); 1664 ctx->ares = 0; 1665 return 0; 1666 } 1667 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1668 ctx->Xi.u[0] = 0; 1669 ctx->Xi.u[1] = 0; 1670 mres = sizeof(ctx->Xi); 1671 # else 1672 GCM_MUL(ctx); 1673 # endif 1674 ctx->ares = 0; 1675 } 1676 1677 if (is_endian.little) 1678 # ifdef BSWAP4 1679 ctr = BSWAP4(ctx->Yi.d[3]); 1680 # else 1681 ctr = GETU32(ctx->Yi.c + 12); 1682 # endif 1683 else 1684 ctr = ctx->Yi.d[3]; 1685 1686 n = mres % 16; 1687 if (n) { 1688 # if defined(GHASH) 1689 while (n && len) { 1690 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1691 --len; 1692 n = (n + 1) % 16; 1693 } 1694 if (n == 0) { 1695 GHASH(ctx, ctx->Xn, mres); 1696 mres = 0; 1697 } else { 1698 ctx->mres = mres; 1699 return 0; 1700 } 1701 # else 1702 while (n && len) { 1703 u8 c = *(in++); 1704 *(out++) = c ^ ctx->EKi.c[n]; 1705 ctx->Xi.c[n] ^= c; 1706 --len; 1707 n = (n + 1) % 16; 1708 } 1709 if (n == 0) { 1710 GCM_MUL(ctx); 1711 mres = 0; 1712 } else { 1713 ctx->mres = n; 1714 return 0; 1715 } 1716 # endif 1717 } 1718 # if defined(GHASH) 1719 if (len >= 16 && mres) { 1720 GHASH(ctx, ctx->Xn, mres); 1721 mres = 0; 1722 } 1723 # if defined(GHASH_CHUNK) 1724 while (len >= GHASH_CHUNK) { 1725 GHASH(ctx, in, GHASH_CHUNK); 1726 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1727 ctr += GHASH_CHUNK / 16; 1728 if (is_endian.little) 1729 # ifdef BSWAP4 1730 ctx->Yi.d[3] = BSWAP4(ctr); 1731 # else 1732 PUTU32(ctx->Yi.c + 12, ctr); 1733 # endif 1734 else 1735 ctx->Yi.d[3] = ctr; 1736 out += GHASH_CHUNK; 1737 in += GHASH_CHUNK; 1738 len -= GHASH_CHUNK; 1739 } 1740 # endif 1741 # endif 1742 if ((i = (len & (size_t)-16))) { 1743 size_t j = i / 16; 1744 1745 # if defined(GHASH) 1746 GHASH(ctx, in, i); 1747 # else 1748 while (j--) { 1749 size_t k; 1750 for (k = 0; k < 16; ++k) 1751 ctx->Xi.c[k] ^= in[k]; 1752 GCM_MUL(ctx); 1753 in += 16; 1754 } 1755 j = i / 16; 1756 in -= i; 1757 # endif 1758 (*stream) (in, out, j, key, ctx->Yi.c); 1759 ctr += (unsigned int)j; 1760 if (is_endian.little) 1761 # ifdef BSWAP4 1762 ctx->Yi.d[3] = BSWAP4(ctr); 1763 # else 1764 PUTU32(ctx->Yi.c + 12, ctr); 1765 # endif 1766 else 1767 ctx->Yi.d[3] = ctr; 1768 out += i; 1769 in += i; 1770 len -= i; 1771 } 1772 if (len) { 1773 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1774 ++ctr; 1775 if (is_endian.little) 1776 # ifdef BSWAP4 1777 ctx->Yi.d[3] = BSWAP4(ctr); 1778 # else 1779 PUTU32(ctx->Yi.c + 12, ctr); 1780 # endif 1781 else 1782 ctx->Yi.d[3] = ctr; 1783 while (len--) { 1784 # if defined(GHASH) 1785 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1786 # else 1787 u8 c = in[n]; 1788 ctx->Xi.c[mres++] ^= c; 1789 out[n] = c ^ ctx->EKi.c[n]; 1790 # endif 1791 ++n; 1792 } 1793 } 1794 1795 ctx->mres = mres; 1796 return 0; 1797 #endif 1798 } 1799 1800 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, 1801 size_t len) 1802 { 1803 const union { 1804 long one; 1805 char little; 1806 } is_endian = { 1 }; 1807 u64 alen = ctx->len.u[0] << 3; 1808 u64 clen = ctx->len.u[1] << 3; 1809 #ifdef GCM_FUNCREF_4BIT 1810 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1811 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1812 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1813 const u8 *inp, size_t len) = ctx->ghash; 1814 # endif 1815 #endif 1816 1817 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1818 u128 bitlen; 1819 unsigned int mres = ctx->mres; 1820 1821 if (mres) { 1822 unsigned blocks = (mres + 15) & -16; 1823 1824 memset(ctx->Xn + mres, 0, blocks - mres); 1825 mres = blocks; 1826 if (mres == sizeof(ctx->Xn)) { 1827 GHASH(ctx, ctx->Xn, mres); 1828 mres = 0; 1829 } 1830 } else if (ctx->ares) { 1831 GCM_MUL(ctx); 1832 } 1833 #else 1834 if (ctx->mres || ctx->ares) 1835 GCM_MUL(ctx); 1836 #endif 1837 1838 if (is_endian.little) { 1839 #ifdef BSWAP8 1840 alen = BSWAP8(alen); 1841 clen = BSWAP8(clen); 1842 #else 1843 u8 *p = ctx->len.c; 1844 1845 ctx->len.u[0] = alen; 1846 ctx->len.u[1] = clen; 1847 1848 alen = (u64)GETU32(p) << 32 | GETU32(p + 4); 1849 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 1850 #endif 1851 } 1852 1853 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1854 bitlen.hi = alen; 1855 bitlen.lo = clen; 1856 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen)); 1857 mres += sizeof(bitlen); 1858 GHASH(ctx, ctx->Xn, mres); 1859 #else 1860 ctx->Xi.u[0] ^= alen; 1861 ctx->Xi.u[1] ^= clen; 1862 GCM_MUL(ctx); 1863 #endif 1864 1865 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1866 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1867 1868 if (tag && len <= sizeof(ctx->Xi)) 1869 return CRYPTO_memcmp(ctx->Xi.c, tag, len); 1870 else 1871 return -1; 1872 } 1873 1874 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) 1875 { 1876 CRYPTO_gcm128_finish(ctx, NULL, 0); 1877 memcpy(tag, ctx->Xi.c, 1878 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); 1879 } 1880 1881 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) 1882 { 1883 GCM128_CONTEXT *ret; 1884 1885 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL) 1886 CRYPTO_gcm128_init(ret, key, block); 1887 1888 return ret; 1889 } 1890 1891 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) 1892 { 1893 OPENSSL_clear_free(ctx, sizeof(*ctx)); 1894 } 1895