1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/zfs_context.h> 26 #include <modes/modes.h> 27 #include <sys/crypto/common.h> 28 #include <sys/crypto/icp.h> 29 #include <sys/crypto/impl.h> 30 #include <sys/byteorder.h> 31 #include <sys/simd.h> 32 #include <modes/gcm_impl.h> 33 #ifdef CAN_USE_GCM_ASM 34 #include <aes/aes_impl.h> 35 #endif 36 37 #define GHASH(c, d, t, o) \ 38 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ 39 (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \ 40 (uint64_t *)(void *)(t)); 41 42 /* Select GCM implementation */ 43 #define IMPL_FASTEST (UINT32_MAX) 44 #define IMPL_CYCLE (UINT32_MAX-1) 45 #ifdef CAN_USE_GCM_ASM 46 #define IMPL_AVX (UINT32_MAX-2) 47 #endif 48 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) 49 static uint32_t icp_gcm_impl = IMPL_FASTEST; 50 static uint32_t user_sel_impl = IMPL_FASTEST; 51 52 #ifdef CAN_USE_GCM_ASM 53 /* Does the architecture we run on support the MOVBE instruction? */ 54 boolean_t gcm_avx_can_use_movbe = B_FALSE; 55 /* 56 * Whether to use the optimized openssl gcm and ghash implementations. 57 * Set to true if module parameter icp_gcm_impl == "avx". 58 */ 59 static boolean_t gcm_use_avx = B_FALSE; 60 #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) 61 62 extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); 63 64 static inline boolean_t gcm_avx_will_work(void); 65 static inline void gcm_set_avx(boolean_t); 66 static inline boolean_t gcm_toggle_avx(void); 67 static inline size_t gcm_simd_get_htab_size(boolean_t); 68 69 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, 70 crypto_data_t *, size_t); 71 72 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 73 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 74 static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, 75 size_t, size_t); 76 #endif /* ifdef CAN_USE_GCM_ASM */ 77 78 /* 79 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode 80 * is done in another function. 81 */ 82 int 83 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 84 crypto_data_t *out, size_t block_size, 85 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 86 void (*copy_block)(uint8_t *, uint8_t *), 87 void (*xor_block)(uint8_t *, uint8_t *)) 88 { 89 #ifdef CAN_USE_GCM_ASM 90 if (ctx->gcm_use_avx == B_TRUE) 91 return (gcm_mode_encrypt_contiguous_blocks_avx( 92 ctx, data, length, out, block_size)); 93 #endif 94 95 const gcm_impl_ops_t *gops; 96 size_t remainder = length; 97 size_t need = 0; 98 uint8_t *datap = (uint8_t *)data; 99 uint8_t *blockp; 100 uint8_t *lastp; 101 void *iov_or_mp; 102 offset_t offset; 103 uint8_t *out_data_1; 104 uint8_t *out_data_2; 105 size_t out_data_1_len; 106 uint64_t counter; 107 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 108 109 if (length + ctx->gcm_remainder_len < block_size) { 110 /* accumulate bytes here and return */ 111 memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, 112 datap, 113 length); 114 ctx->gcm_remainder_len += length; 115 if (ctx->gcm_copy_to == NULL) { 116 ctx->gcm_copy_to = datap; 117 } 118 return (CRYPTO_SUCCESS); 119 } 120 121 crypto_init_ptrs(out, &iov_or_mp, &offset); 122 123 gops = gcm_impl_get_ops(); 124 do { 125 /* Unprocessed data from last call. */ 126 if (ctx->gcm_remainder_len > 0) { 127 need = block_size - ctx->gcm_remainder_len; 128 129 if (need > remainder) 130 return (CRYPTO_DATA_LEN_RANGE); 131 132 memcpy(&((uint8_t *)ctx->gcm_remainder) 133 [ctx->gcm_remainder_len], datap, need); 134 135 blockp = (uint8_t *)ctx->gcm_remainder; 136 } else { 137 blockp = datap; 138 } 139 140 /* 141 * Increment counter. Counter bits are confined 142 * to the bottom 32 bits of the counter block. 143 */ 144 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 145 counter = htonll(counter + 1); 146 counter &= counter_mask; 147 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 148 149 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 150 (uint8_t *)ctx->gcm_tmp); 151 xor_block(blockp, (uint8_t *)ctx->gcm_tmp); 152 153 lastp = (uint8_t *)ctx->gcm_tmp; 154 155 ctx->gcm_processed_data_len += block_size; 156 157 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, 158 &out_data_1_len, &out_data_2, block_size); 159 160 /* copy block to where it belongs */ 161 if (out_data_1_len == block_size) { 162 copy_block(lastp, out_data_1); 163 } else { 164 memcpy(out_data_1, lastp, out_data_1_len); 165 if (out_data_2 != NULL) { 166 memcpy(out_data_2, 167 lastp + out_data_1_len, 168 block_size - out_data_1_len); 169 } 170 } 171 /* update offset */ 172 out->cd_offset += block_size; 173 174 /* add ciphertext to the hash */ 175 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops); 176 177 /* Update pointer to next block of data to be processed. */ 178 if (ctx->gcm_remainder_len != 0) { 179 datap += need; 180 ctx->gcm_remainder_len = 0; 181 } else { 182 datap += block_size; 183 } 184 185 remainder = (size_t)&data[length] - (size_t)datap; 186 187 /* Incomplete last block. */ 188 if (remainder > 0 && remainder < block_size) { 189 memcpy(ctx->gcm_remainder, datap, remainder); 190 ctx->gcm_remainder_len = remainder; 191 ctx->gcm_copy_to = datap; 192 goto out; 193 } 194 ctx->gcm_copy_to = NULL; 195 196 } while (remainder > 0); 197 out: 198 return (CRYPTO_SUCCESS); 199 } 200 201 int 202 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 203 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 204 void (*copy_block)(uint8_t *, uint8_t *), 205 void (*xor_block)(uint8_t *, uint8_t *)) 206 { 207 (void) copy_block; 208 #ifdef CAN_USE_GCM_ASM 209 if (ctx->gcm_use_avx == B_TRUE) 210 return (gcm_encrypt_final_avx(ctx, out, block_size)); 211 #endif 212 213 const gcm_impl_ops_t *gops; 214 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 215 uint8_t *ghash, *macp = NULL; 216 int i, rv; 217 218 if (out->cd_length < 219 (ctx->gcm_remainder_len + ctx->gcm_tag_len)) { 220 return (CRYPTO_DATA_LEN_RANGE); 221 } 222 223 gops = gcm_impl_get_ops(); 224 ghash = (uint8_t *)ctx->gcm_ghash; 225 226 if (ctx->gcm_remainder_len > 0) { 227 uint64_t counter; 228 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp; 229 230 /* 231 * Here is where we deal with data that is not a 232 * multiple of the block size. 233 */ 234 235 /* 236 * Increment counter. 237 */ 238 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 239 counter = htonll(counter + 1); 240 counter &= counter_mask; 241 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 242 243 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 244 (uint8_t *)ctx->gcm_tmp); 245 246 macp = (uint8_t *)ctx->gcm_remainder; 247 memset(macp + ctx->gcm_remainder_len, 0, 248 block_size - ctx->gcm_remainder_len); 249 250 /* XOR with counter block */ 251 for (i = 0; i < ctx->gcm_remainder_len; i++) { 252 macp[i] ^= tmpp[i]; 253 } 254 255 /* add ciphertext to the hash */ 256 GHASH(ctx, macp, ghash, gops); 257 258 ctx->gcm_processed_data_len += ctx->gcm_remainder_len; 259 } 260 261 ctx->gcm_len_a_len_c[1] = 262 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 263 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 264 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 265 (uint8_t *)ctx->gcm_J0); 266 xor_block((uint8_t *)ctx->gcm_J0, ghash); 267 268 if (ctx->gcm_remainder_len > 0) { 269 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len); 270 if (rv != CRYPTO_SUCCESS) 271 return (rv); 272 } 273 out->cd_offset += ctx->gcm_remainder_len; 274 ctx->gcm_remainder_len = 0; 275 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 276 if (rv != CRYPTO_SUCCESS) 277 return (rv); 278 out->cd_offset += ctx->gcm_tag_len; 279 280 return (CRYPTO_SUCCESS); 281 } 282 283 /* 284 * This will only deal with decrypting the last block of the input that 285 * might not be a multiple of block length. 286 */ 287 static void 288 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index, 289 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 290 void (*xor_block)(uint8_t *, uint8_t *)) 291 { 292 uint8_t *datap, *outp, *counterp; 293 uint64_t counter; 294 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 295 int i; 296 297 /* 298 * Increment counter. 299 * Counter bits are confined to the bottom 32 bits 300 */ 301 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 302 counter = htonll(counter + 1); 303 counter &= counter_mask; 304 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 305 306 datap = (uint8_t *)ctx->gcm_remainder; 307 outp = &((ctx->gcm_pt_buf)[index]); 308 counterp = (uint8_t *)ctx->gcm_tmp; 309 310 /* authentication tag */ 311 memset((uint8_t *)ctx->gcm_tmp, 0, block_size); 312 memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len); 313 314 /* add ciphertext to the hash */ 315 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops()); 316 317 /* decrypt remaining ciphertext */ 318 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp); 319 320 /* XOR with counter block */ 321 for (i = 0; i < ctx->gcm_remainder_len; i++) { 322 outp[i] = datap[i] ^ counterp[i]; 323 } 324 } 325 326 int 327 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 328 crypto_data_t *out, size_t block_size, 329 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 330 void (*copy_block)(uint8_t *, uint8_t *), 331 void (*xor_block)(uint8_t *, uint8_t *)) 332 { 333 (void) out, (void) block_size, (void) encrypt_block, (void) copy_block, 334 (void) xor_block; 335 size_t new_len; 336 uint8_t *new; 337 338 /* 339 * Copy contiguous ciphertext input blocks to plaintext buffer. 340 * Ciphertext will be decrypted in the final. 341 */ 342 if (length > 0) { 343 new_len = ctx->gcm_pt_buf_len + length; 344 new = vmem_alloc(new_len, KM_SLEEP); 345 if (new == NULL) { 346 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 347 ctx->gcm_pt_buf = NULL; 348 return (CRYPTO_HOST_MEMORY); 349 } 350 351 if (ctx->gcm_pt_buf != NULL) { 352 memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 353 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 354 } else { 355 ASSERT0(ctx->gcm_pt_buf_len); 356 } 357 358 ctx->gcm_pt_buf = new; 359 ctx->gcm_pt_buf_len = new_len; 360 memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data, 361 length); 362 ctx->gcm_processed_data_len += length; 363 } 364 365 ctx->gcm_remainder_len = 0; 366 return (CRYPTO_SUCCESS); 367 } 368 369 int 370 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 371 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 372 void (*xor_block)(uint8_t *, uint8_t *)) 373 { 374 #ifdef CAN_USE_GCM_ASM 375 if (ctx->gcm_use_avx == B_TRUE) 376 return (gcm_decrypt_final_avx(ctx, out, block_size)); 377 #endif 378 379 const gcm_impl_ops_t *gops; 380 size_t pt_len; 381 size_t remainder; 382 uint8_t *ghash; 383 uint8_t *blockp; 384 uint8_t *cbp; 385 uint64_t counter; 386 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 387 int processed = 0, rv; 388 389 ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len); 390 391 gops = gcm_impl_get_ops(); 392 pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 393 ghash = (uint8_t *)ctx->gcm_ghash; 394 blockp = ctx->gcm_pt_buf; 395 remainder = pt_len; 396 while (remainder > 0) { 397 /* Incomplete last block */ 398 if (remainder < block_size) { 399 memcpy(ctx->gcm_remainder, blockp, remainder); 400 ctx->gcm_remainder_len = remainder; 401 /* 402 * not expecting anymore ciphertext, just 403 * compute plaintext for the remaining input 404 */ 405 gcm_decrypt_incomplete_block(ctx, block_size, 406 processed, encrypt_block, xor_block); 407 ctx->gcm_remainder_len = 0; 408 goto out; 409 } 410 /* add ciphertext to the hash */ 411 GHASH(ctx, blockp, ghash, gops); 412 413 /* 414 * Increment counter. 415 * Counter bits are confined to the bottom 32 bits 416 */ 417 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 418 counter = htonll(counter + 1); 419 counter &= counter_mask; 420 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 421 422 cbp = (uint8_t *)ctx->gcm_tmp; 423 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp); 424 425 /* XOR with ciphertext */ 426 xor_block(cbp, blockp); 427 428 processed += block_size; 429 blockp += block_size; 430 remainder -= block_size; 431 } 432 out: 433 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 434 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 435 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 436 (uint8_t *)ctx->gcm_J0); 437 xor_block((uint8_t *)ctx->gcm_J0, ghash); 438 439 /* compare the input authentication tag with what we calculated */ 440 if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 441 /* They don't match */ 442 return (CRYPTO_INVALID_MAC); 443 } else { 444 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 445 if (rv != CRYPTO_SUCCESS) 446 return (rv); 447 out->cd_offset += pt_len; 448 } 449 return (CRYPTO_SUCCESS); 450 } 451 452 static int 453 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param) 454 { 455 size_t tag_len; 456 457 /* 458 * Check the length of the authentication tag (in bits). 459 */ 460 tag_len = gcm_param->ulTagBits; 461 switch (tag_len) { 462 case 32: 463 case 64: 464 case 96: 465 case 104: 466 case 112: 467 case 120: 468 case 128: 469 break; 470 default: 471 return (CRYPTO_MECHANISM_PARAM_INVALID); 472 } 473 474 if (gcm_param->ulIvLen == 0) 475 return (CRYPTO_MECHANISM_PARAM_INVALID); 476 477 return (CRYPTO_SUCCESS); 478 } 479 480 static void 481 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, 482 gcm_ctx_t *ctx, size_t block_size, 483 void (*copy_block)(uint8_t *, uint8_t *), 484 void (*xor_block)(uint8_t *, uint8_t *)) 485 { 486 const gcm_impl_ops_t *gops; 487 uint8_t *cb; 488 ulong_t remainder = iv_len; 489 ulong_t processed = 0; 490 uint8_t *datap, *ghash; 491 uint64_t len_a_len_c[2]; 492 493 gops = gcm_impl_get_ops(); 494 ghash = (uint8_t *)ctx->gcm_ghash; 495 cb = (uint8_t *)ctx->gcm_cb; 496 if (iv_len == 12) { 497 memcpy(cb, iv, 12); 498 cb[12] = 0; 499 cb[13] = 0; 500 cb[14] = 0; 501 cb[15] = 1; 502 /* J0 will be used again in the final */ 503 copy_block(cb, (uint8_t *)ctx->gcm_J0); 504 } else { 505 /* GHASH the IV */ 506 do { 507 if (remainder < block_size) { 508 memset(cb, 0, block_size); 509 memcpy(cb, &(iv[processed]), remainder); 510 datap = (uint8_t *)cb; 511 remainder = 0; 512 } else { 513 datap = (uint8_t *)(&(iv[processed])); 514 processed += block_size; 515 remainder -= block_size; 516 } 517 GHASH(ctx, datap, ghash, gops); 518 } while (remainder > 0); 519 520 len_a_len_c[0] = 0; 521 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len)); 522 GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops); 523 524 /* J0 will be used again in the final */ 525 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb); 526 } 527 } 528 529 static int 530 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 531 unsigned char *auth_data, size_t auth_data_len, size_t block_size, 532 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 533 void (*copy_block)(uint8_t *, uint8_t *), 534 void (*xor_block)(uint8_t *, uint8_t *)) 535 { 536 const gcm_impl_ops_t *gops; 537 uint8_t *ghash, *datap, *authp; 538 size_t remainder, processed; 539 540 /* encrypt zero block to get subkey H */ 541 memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H)); 542 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H, 543 (uint8_t *)ctx->gcm_H); 544 545 gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 546 copy_block, xor_block); 547 548 gops = gcm_impl_get_ops(); 549 authp = (uint8_t *)ctx->gcm_tmp; 550 ghash = (uint8_t *)ctx->gcm_ghash; 551 memset(authp, 0, block_size); 552 memset(ghash, 0, block_size); 553 554 processed = 0; 555 remainder = auth_data_len; 556 do { 557 if (remainder < block_size) { 558 /* 559 * There's not a block full of data, pad rest of 560 * buffer with zero 561 */ 562 563 if (auth_data != NULL) { 564 memset(authp, 0, block_size); 565 memcpy(authp, &(auth_data[processed]), 566 remainder); 567 } else { 568 ASSERT0(remainder); 569 } 570 571 datap = (uint8_t *)authp; 572 remainder = 0; 573 } else { 574 datap = (uint8_t *)(&(auth_data[processed])); 575 processed += block_size; 576 remainder -= block_size; 577 } 578 579 /* add auth data to the hash */ 580 GHASH(ctx, datap, ghash, gops); 581 582 } while (remainder > 0); 583 584 return (CRYPTO_SUCCESS); 585 } 586 587 /* 588 * The following function is called at encrypt or decrypt init time 589 * for AES GCM mode. 590 * 591 * Init the GCM context struct. Handle the cycle and avx implementations here. 592 */ 593 int 594 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 595 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 596 void (*copy_block)(uint8_t *, uint8_t *), 597 void (*xor_block)(uint8_t *, uint8_t *)) 598 { 599 int rv; 600 CK_AES_GCM_PARAMS *gcm_param; 601 602 if (param != NULL) { 603 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param; 604 605 if ((rv = gcm_validate_args(gcm_param)) != 0) { 606 return (rv); 607 } 608 609 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits; 610 gcm_ctx->gcm_tag_len >>= 3; 611 gcm_ctx->gcm_processed_data_len = 0; 612 613 /* these values are in bits */ 614 gcm_ctx->gcm_len_a_len_c[0] 615 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen)); 616 617 rv = CRYPTO_SUCCESS; 618 gcm_ctx->gcm_flags |= GCM_MODE; 619 } else { 620 return (CRYPTO_MECHANISM_PARAM_INVALID); 621 } 622 623 #ifdef CAN_USE_GCM_ASM 624 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 625 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 626 } else { 627 /* 628 * Handle the "cycle" implementation by creating avx and 629 * non-avx contexts alternately. 630 */ 631 gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 632 /* 633 * We don't handle byte swapped key schedules in the avx 634 * code path. 635 */ 636 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 637 if (ks->ops->needs_byteswap == B_TRUE) { 638 gcm_ctx->gcm_use_avx = B_FALSE; 639 } 640 /* Use the MOVBE and the BSWAP variants alternately. */ 641 if (gcm_ctx->gcm_use_avx == B_TRUE && 642 zfs_movbe_available() == B_TRUE) { 643 (void) atomic_toggle_boolean_nv( 644 (volatile boolean_t *)&gcm_avx_can_use_movbe); 645 } 646 } 647 /* Allocate Htab memory as needed. */ 648 if (gcm_ctx->gcm_use_avx == B_TRUE) { 649 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); 650 651 if (htab_len == 0) { 652 return (CRYPTO_MECHANISM_PARAM_INVALID); 653 } 654 gcm_ctx->gcm_htab_len = htab_len; 655 gcm_ctx->gcm_Htable = 656 (uint64_t *)kmem_alloc(htab_len, KM_SLEEP); 657 658 if (gcm_ctx->gcm_Htable == NULL) { 659 return (CRYPTO_HOST_MEMORY); 660 } 661 } 662 /* Avx and non avx context initialization differs from here on. */ 663 if (gcm_ctx->gcm_use_avx == B_FALSE) { 664 #endif /* ifdef CAN_USE_GCM_ASM */ 665 if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 666 gcm_param->pAAD, gcm_param->ulAADLen, block_size, 667 encrypt_block, copy_block, xor_block) != 0) { 668 rv = CRYPTO_MECHANISM_PARAM_INVALID; 669 } 670 #ifdef CAN_USE_GCM_ASM 671 } else { 672 if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 673 gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { 674 rv = CRYPTO_MECHANISM_PARAM_INVALID; 675 } 676 } 677 #endif /* ifdef CAN_USE_GCM_ASM */ 678 679 return (rv); 680 } 681 682 int 683 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 684 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 685 void (*copy_block)(uint8_t *, uint8_t *), 686 void (*xor_block)(uint8_t *, uint8_t *)) 687 { 688 int rv; 689 CK_AES_GMAC_PARAMS *gmac_param; 690 691 if (param != NULL) { 692 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param; 693 694 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS); 695 gcm_ctx->gcm_processed_data_len = 0; 696 697 /* these values are in bits */ 698 gcm_ctx->gcm_len_a_len_c[0] 699 = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen)); 700 701 rv = CRYPTO_SUCCESS; 702 gcm_ctx->gcm_flags |= GMAC_MODE; 703 } else { 704 return (CRYPTO_MECHANISM_PARAM_INVALID); 705 } 706 707 #ifdef CAN_USE_GCM_ASM 708 /* 709 * Handle the "cycle" implementation by creating avx and non avx 710 * contexts alternately. 711 */ 712 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 713 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 714 } else { 715 gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 716 } 717 /* We don't handle byte swapped key schedules in the avx code path. */ 718 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 719 if (ks->ops->needs_byteswap == B_TRUE) { 720 gcm_ctx->gcm_use_avx = B_FALSE; 721 } 722 /* Allocate Htab memory as needed. */ 723 if (gcm_ctx->gcm_use_avx == B_TRUE) { 724 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); 725 726 if (htab_len == 0) { 727 return (CRYPTO_MECHANISM_PARAM_INVALID); 728 } 729 gcm_ctx->gcm_htab_len = htab_len; 730 gcm_ctx->gcm_Htable = 731 (uint64_t *)kmem_alloc(htab_len, KM_SLEEP); 732 733 if (gcm_ctx->gcm_Htable == NULL) { 734 return (CRYPTO_HOST_MEMORY); 735 } 736 } 737 738 /* Avx and non avx context initialization differs from here on. */ 739 if (gcm_ctx->gcm_use_avx == B_FALSE) { 740 #endif /* ifdef CAN_USE_GCM_ASM */ 741 if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 742 gmac_param->pAAD, gmac_param->ulAADLen, block_size, 743 encrypt_block, copy_block, xor_block) != 0) { 744 rv = CRYPTO_MECHANISM_PARAM_INVALID; 745 } 746 #ifdef CAN_USE_GCM_ASM 747 } else { 748 if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 749 gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { 750 rv = CRYPTO_MECHANISM_PARAM_INVALID; 751 } 752 } 753 #endif /* ifdef CAN_USE_GCM_ASM */ 754 755 return (rv); 756 } 757 758 void * 759 gcm_alloc_ctx(int kmflag) 760 { 761 gcm_ctx_t *gcm_ctx; 762 763 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 764 return (NULL); 765 766 gcm_ctx->gcm_flags = GCM_MODE; 767 return (gcm_ctx); 768 } 769 770 void * 771 gmac_alloc_ctx(int kmflag) 772 { 773 gcm_ctx_t *gcm_ctx; 774 775 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 776 return (NULL); 777 778 gcm_ctx->gcm_flags = GMAC_MODE; 779 return (gcm_ctx); 780 } 781 782 /* GCM implementation that contains the fastest methods */ 783 static gcm_impl_ops_t gcm_fastest_impl = { 784 .name = "fastest" 785 }; 786 787 /* All compiled in implementations */ 788 static const gcm_impl_ops_t *gcm_all_impl[] = { 789 &gcm_generic_impl, 790 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 791 &gcm_pclmulqdq_impl, 792 #endif 793 }; 794 795 /* Indicate that benchmark has been completed */ 796 static boolean_t gcm_impl_initialized = B_FALSE; 797 798 /* Hold all supported implementations */ 799 static size_t gcm_supp_impl_cnt = 0; 800 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; 801 802 /* 803 * Returns the GCM operations for encrypt/decrypt/key setup. When a 804 * SIMD implementation is not allowed in the current context, then 805 * fallback to the fastest generic implementation. 806 */ 807 const gcm_impl_ops_t * 808 gcm_impl_get_ops(void) 809 { 810 if (!kfpu_allowed()) 811 return (&gcm_generic_impl); 812 813 const gcm_impl_ops_t *ops = NULL; 814 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 815 816 switch (impl) { 817 case IMPL_FASTEST: 818 ASSERT(gcm_impl_initialized); 819 ops = &gcm_fastest_impl; 820 break; 821 case IMPL_CYCLE: 822 /* Cycle through supported implementations */ 823 ASSERT(gcm_impl_initialized); 824 ASSERT3U(gcm_supp_impl_cnt, >, 0); 825 static size_t cycle_impl_idx = 0; 826 size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; 827 ops = gcm_supp_impl[idx]; 828 break; 829 #ifdef CAN_USE_GCM_ASM 830 case IMPL_AVX: 831 /* 832 * Make sure that we return a valid implementation while 833 * switching to the avx implementation since there still 834 * may be unfinished non-avx contexts around. 835 */ 836 ops = &gcm_generic_impl; 837 break; 838 #endif 839 default: 840 ASSERT3U(impl, <, gcm_supp_impl_cnt); 841 ASSERT3U(gcm_supp_impl_cnt, >, 0); 842 if (impl < ARRAY_SIZE(gcm_all_impl)) 843 ops = gcm_supp_impl[impl]; 844 break; 845 } 846 847 ASSERT3P(ops, !=, NULL); 848 849 return (ops); 850 } 851 852 /* 853 * Initialize all supported implementations. 854 */ 855 void 856 gcm_impl_init(void) 857 { 858 gcm_impl_ops_t *curr_impl; 859 int i, c; 860 861 /* Move supported implementations into gcm_supp_impls */ 862 for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) { 863 curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i]; 864 865 if (curr_impl->is_supported()) 866 gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl; 867 } 868 gcm_supp_impl_cnt = c; 869 870 /* 871 * Set the fastest implementation given the assumption that the 872 * hardware accelerated version is the fastest. 873 */ 874 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 875 if (gcm_pclmulqdq_impl.is_supported()) { 876 memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, 877 sizeof (gcm_fastest_impl)); 878 } else 879 #endif 880 { 881 memcpy(&gcm_fastest_impl, &gcm_generic_impl, 882 sizeof (gcm_fastest_impl)); 883 } 884 885 strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX); 886 887 #ifdef CAN_USE_GCM_ASM 888 /* 889 * Use the avx implementation if it's available and the implementation 890 * hasn't changed from its default value of fastest on module load. 891 */ 892 if (gcm_avx_will_work()) { 893 #ifdef HAVE_MOVBE 894 if (zfs_movbe_available() == B_TRUE) { 895 atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE); 896 } 897 #endif 898 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { 899 gcm_set_avx(B_TRUE); 900 } 901 } 902 #endif 903 /* Finish initialization */ 904 atomic_swap_32(&icp_gcm_impl, user_sel_impl); 905 gcm_impl_initialized = B_TRUE; 906 } 907 908 static const struct { 909 const char *name; 910 uint32_t sel; 911 } gcm_impl_opts[] = { 912 { "cycle", IMPL_CYCLE }, 913 { "fastest", IMPL_FASTEST }, 914 #ifdef CAN_USE_GCM_ASM 915 { "avx", IMPL_AVX }, 916 #endif 917 }; 918 919 /* 920 * Function sets desired gcm implementation. 921 * 922 * If we are called before init(), user preference will be saved in 923 * user_sel_impl, and applied in later init() call. This occurs when module 924 * parameter is specified on module load. Otherwise, directly update 925 * icp_gcm_impl. 926 * 927 * @val Name of gcm implementation to use 928 * @param Unused. 929 */ 930 int 931 gcm_impl_set(const char *val) 932 { 933 int err = -EINVAL; 934 char req_name[GCM_IMPL_NAME_MAX]; 935 uint32_t impl = GCM_IMPL_READ(user_sel_impl); 936 size_t i; 937 938 /* sanitize input */ 939 i = strnlen(val, GCM_IMPL_NAME_MAX); 940 if (i == 0 || i >= GCM_IMPL_NAME_MAX) 941 return (err); 942 943 strlcpy(req_name, val, GCM_IMPL_NAME_MAX); 944 while (i > 0 && isspace(req_name[i-1])) 945 i--; 946 req_name[i] = '\0'; 947 948 /* Check mandatory options */ 949 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 950 #ifdef CAN_USE_GCM_ASM 951 /* Ignore avx implementation if it won't work. */ 952 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 953 continue; 954 } 955 #endif 956 if (strcmp(req_name, gcm_impl_opts[i].name) == 0) { 957 impl = gcm_impl_opts[i].sel; 958 err = 0; 959 break; 960 } 961 } 962 963 /* check all supported impl if init() was already called */ 964 if (err != 0 && gcm_impl_initialized) { 965 /* check all supported implementations */ 966 for (i = 0; i < gcm_supp_impl_cnt; i++) { 967 if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) { 968 impl = i; 969 err = 0; 970 break; 971 } 972 } 973 } 974 #ifdef CAN_USE_GCM_ASM 975 /* 976 * Use the avx implementation if available and the requested one is 977 * avx or fastest. 978 */ 979 if (gcm_avx_will_work() == B_TRUE && 980 (impl == IMPL_AVX || impl == IMPL_FASTEST)) { 981 gcm_set_avx(B_TRUE); 982 } else { 983 gcm_set_avx(B_FALSE); 984 } 985 #endif 986 987 if (err == 0) { 988 if (gcm_impl_initialized) 989 atomic_swap_32(&icp_gcm_impl, impl); 990 else 991 atomic_swap_32(&user_sel_impl, impl); 992 } 993 994 return (err); 995 } 996 997 #if defined(_KERNEL) && defined(__linux__) 998 999 static int 1000 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp) 1001 { 1002 return (gcm_impl_set(val)); 1003 } 1004 1005 static int 1006 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) 1007 { 1008 int i, cnt = 0; 1009 char *fmt; 1010 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 1011 1012 ASSERT(gcm_impl_initialized); 1013 1014 /* list mandatory options */ 1015 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 1016 #ifdef CAN_USE_GCM_ASM 1017 /* Ignore avx implementation if it won't work. */ 1018 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 1019 continue; 1020 } 1021 #endif 1022 fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; 1023 cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name); 1024 } 1025 1026 /* list all supported implementations */ 1027 for (i = 0; i < gcm_supp_impl_cnt; i++) { 1028 fmt = (i == impl) ? "[%s] " : "%s "; 1029 cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name); 1030 } 1031 1032 return (cnt); 1033 } 1034 1035 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, 1036 NULL, 0644); 1037 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); 1038 #endif /* defined(__KERNEL) */ 1039 1040 #ifdef CAN_USE_GCM_ASM 1041 #define GCM_BLOCK_LEN 16 1042 /* 1043 * The openssl asm routines are 6x aggregated and need that many bytes 1044 * at minimum. 1045 */ 1046 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6) 1047 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3) 1048 /* 1049 * Ensure the chunk size is reasonable since we are allocating a 1050 * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts. 1051 */ 1052 #define GCM_AVX_MAX_CHUNK_SIZE \ 1053 (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) 1054 1055 /* Clear the FPU registers since they hold sensitive internal state. */ 1056 #define clear_fpu_regs() clear_fpu_regs_avx() 1057 #define GHASH_AVX(ctx, in, len) \ 1058 gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \ 1059 in, len) 1060 1061 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) 1062 1063 /* Get the chunk size module parameter. */ 1064 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size 1065 1066 /* 1067 * Module parameter: number of bytes to process at once while owning the FPU. 1068 * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is 1069 * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES. 1070 */ 1071 static uint32_t gcm_avx_chunk_size = 1072 ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1073 1074 extern void clear_fpu_regs_avx(void); 1075 extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); 1076 extern void aes_encrypt_intel(const uint32_t rk[], int nr, 1077 const uint32_t pt[4], uint32_t ct[4]); 1078 1079 extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); 1080 extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, 1081 const uint8_t *in, size_t len); 1082 1083 extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, 1084 const void *, uint64_t *, uint64_t *); 1085 1086 extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, 1087 const void *, uint64_t *, uint64_t *); 1088 1089 static inline boolean_t 1090 gcm_avx_will_work(void) 1091 { 1092 /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ 1093 return (kfpu_allowed() && 1094 zfs_avx_available() && zfs_aes_available() && 1095 zfs_pclmulqdq_available()); 1096 } 1097 1098 static inline void 1099 gcm_set_avx(boolean_t val) 1100 { 1101 if (gcm_avx_will_work() == B_TRUE) { 1102 atomic_swap_32(&gcm_use_avx, val); 1103 } 1104 } 1105 1106 static inline boolean_t 1107 gcm_toggle_avx(void) 1108 { 1109 if (gcm_avx_will_work() == B_TRUE) { 1110 return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); 1111 } else { 1112 return (B_FALSE); 1113 } 1114 } 1115 1116 static inline size_t 1117 gcm_simd_get_htab_size(boolean_t simd_mode) 1118 { 1119 switch (simd_mode) { 1120 case B_TRUE: 1121 return (2 * 6 * 2 * sizeof (uint64_t)); 1122 1123 default: 1124 return (0); 1125 } 1126 } 1127 1128 /* 1129 * Clear sensitive data in the context. 1130 * 1131 * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and 1132 * ctx->gcm_Htable contain the hash sub key which protects authentication. 1133 * 1134 * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for 1135 * a known plaintext attack, they consists of the IV and the first and last 1136 * counter respectively. If they should be cleared is debatable. 1137 */ 1138 static inline void 1139 gcm_clear_ctx(gcm_ctx_t *ctx) 1140 { 1141 memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder)); 1142 memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H)); 1143 memset(ctx->gcm_J0, 0, sizeof (ctx->gcm_J0)); 1144 memset(ctx->gcm_tmp, 0, sizeof (ctx->gcm_tmp)); 1145 } 1146 1147 /* Increment the GCM counter block by n. */ 1148 static inline void 1149 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) 1150 { 1151 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 1152 uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask); 1153 1154 counter = htonll(counter + n); 1155 counter &= counter_mask; 1156 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 1157 } 1158 1159 /* 1160 * Encrypt multiple blocks of data in GCM mode. 1161 * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines 1162 * if possible. While processing a chunk the FPU is "locked". 1163 */ 1164 static int 1165 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, 1166 size_t length, crypto_data_t *out, size_t block_size) 1167 { 1168 size_t bleft = length; 1169 size_t need = 0; 1170 size_t done = 0; 1171 uint8_t *datap = (uint8_t *)data; 1172 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1173 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1174 uint64_t *ghash = ctx->gcm_ghash; 1175 uint64_t *cb = ctx->gcm_cb; 1176 uint8_t *ct_buf = NULL; 1177 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1178 int rv = CRYPTO_SUCCESS; 1179 1180 ASSERT(block_size == GCM_BLOCK_LEN); 1181 /* 1182 * If the last call left an incomplete block, try to fill 1183 * it first. 1184 */ 1185 if (ctx->gcm_remainder_len > 0) { 1186 need = block_size - ctx->gcm_remainder_len; 1187 if (length < need) { 1188 /* Accumulate bytes here and return. */ 1189 memcpy((uint8_t *)ctx->gcm_remainder + 1190 ctx->gcm_remainder_len, datap, length); 1191 1192 ctx->gcm_remainder_len += length; 1193 if (ctx->gcm_copy_to == NULL) { 1194 ctx->gcm_copy_to = datap; 1195 } 1196 return (CRYPTO_SUCCESS); 1197 } else { 1198 /* Complete incomplete block. */ 1199 memcpy((uint8_t *)ctx->gcm_remainder + 1200 ctx->gcm_remainder_len, datap, need); 1201 1202 ctx->gcm_copy_to = NULL; 1203 } 1204 } 1205 1206 /* Allocate a buffer to encrypt to if there is enough input. */ 1207 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1208 ct_buf = vmem_alloc(chunk_size, KM_SLEEP); 1209 if (ct_buf == NULL) { 1210 return (CRYPTO_HOST_MEMORY); 1211 } 1212 } 1213 1214 /* If we completed an incomplete block, encrypt and write it out. */ 1215 if (ctx->gcm_remainder_len > 0) { 1216 kfpu_begin(); 1217 aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1218 (const uint32_t *)cb, (uint32_t *)tmp); 1219 1220 gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp); 1221 GHASH_AVX(ctx, tmp, block_size); 1222 clear_fpu_regs(); 1223 kfpu_end(); 1224 rv = crypto_put_output_data(tmp, out, block_size); 1225 out->cd_offset += block_size; 1226 gcm_incr_counter_block(ctx); 1227 ctx->gcm_processed_data_len += block_size; 1228 bleft -= need; 1229 datap += need; 1230 ctx->gcm_remainder_len = 0; 1231 } 1232 1233 /* Do the bulk encryption in chunk_size blocks. */ 1234 for (; bleft >= chunk_size; bleft -= chunk_size) { 1235 kfpu_begin(); 1236 done = aesni_gcm_encrypt( 1237 datap, ct_buf, chunk_size, key, cb, ghash); 1238 1239 clear_fpu_regs(); 1240 kfpu_end(); 1241 if (done != chunk_size) { 1242 rv = CRYPTO_FAILED; 1243 goto out_nofpu; 1244 } 1245 rv = crypto_put_output_data(ct_buf, out, chunk_size); 1246 if (rv != CRYPTO_SUCCESS) { 1247 goto out_nofpu; 1248 } 1249 out->cd_offset += chunk_size; 1250 datap += chunk_size; 1251 ctx->gcm_processed_data_len += chunk_size; 1252 } 1253 /* Check if we are already done. */ 1254 if (bleft == 0) { 1255 goto out_nofpu; 1256 } 1257 /* Bulk encrypt the remaining data. */ 1258 kfpu_begin(); 1259 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1260 done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); 1261 if (done == 0) { 1262 rv = CRYPTO_FAILED; 1263 goto out; 1264 } 1265 rv = crypto_put_output_data(ct_buf, out, done); 1266 if (rv != CRYPTO_SUCCESS) { 1267 goto out; 1268 } 1269 out->cd_offset += done; 1270 ctx->gcm_processed_data_len += done; 1271 datap += done; 1272 bleft -= done; 1273 1274 } 1275 /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ 1276 while (bleft > 0) { 1277 if (bleft < block_size) { 1278 memcpy(ctx->gcm_remainder, datap, bleft); 1279 ctx->gcm_remainder_len = bleft; 1280 ctx->gcm_copy_to = datap; 1281 goto out; 1282 } 1283 /* Encrypt, hash and write out. */ 1284 aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1285 (const uint32_t *)cb, (uint32_t *)tmp); 1286 1287 gcm_xor_avx(datap, tmp); 1288 GHASH_AVX(ctx, tmp, block_size); 1289 rv = crypto_put_output_data(tmp, out, block_size); 1290 if (rv != CRYPTO_SUCCESS) { 1291 goto out; 1292 } 1293 out->cd_offset += block_size; 1294 gcm_incr_counter_block(ctx); 1295 ctx->gcm_processed_data_len += block_size; 1296 datap += block_size; 1297 bleft -= block_size; 1298 } 1299 out: 1300 clear_fpu_regs(); 1301 kfpu_end(); 1302 out_nofpu: 1303 if (ct_buf != NULL) { 1304 vmem_free(ct_buf, chunk_size); 1305 } 1306 return (rv); 1307 } 1308 1309 /* 1310 * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual 1311 * incomplete last block. Encrypt the ICB. Calculate the tag and write it out. 1312 */ 1313 static int 1314 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1315 { 1316 uint8_t *ghash = (uint8_t *)ctx->gcm_ghash; 1317 uint32_t *J0 = (uint32_t *)ctx->gcm_J0; 1318 uint8_t *remainder = (uint8_t *)ctx->gcm_remainder; 1319 size_t rem_len = ctx->gcm_remainder_len; 1320 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1321 int aes_rounds = ((aes_key_t *)keysched)->nr; 1322 int rv; 1323 1324 ASSERT(block_size == GCM_BLOCK_LEN); 1325 1326 if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { 1327 return (CRYPTO_DATA_LEN_RANGE); 1328 } 1329 1330 kfpu_begin(); 1331 /* Pad last incomplete block with zeros, encrypt and hash. */ 1332 if (rem_len > 0) { 1333 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1334 const uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1335 1336 aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); 1337 memset(remainder + rem_len, 0, block_size - rem_len); 1338 for (int i = 0; i < rem_len; i++) { 1339 remainder[i] ^= tmp[i]; 1340 } 1341 GHASH_AVX(ctx, remainder, block_size); 1342 ctx->gcm_processed_data_len += rem_len; 1343 /* No need to increment counter_block, it's the last block. */ 1344 } 1345 /* Finish tag. */ 1346 ctx->gcm_len_a_len_c[1] = 1347 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 1348 GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size); 1349 aes_encrypt_intel(keysched, aes_rounds, J0, J0); 1350 1351 gcm_xor_avx((uint8_t *)J0, ghash); 1352 clear_fpu_regs(); 1353 kfpu_end(); 1354 1355 /* Output remainder. */ 1356 if (rem_len > 0) { 1357 rv = crypto_put_output_data(remainder, out, rem_len); 1358 if (rv != CRYPTO_SUCCESS) 1359 return (rv); 1360 } 1361 out->cd_offset += rem_len; 1362 ctx->gcm_remainder_len = 0; 1363 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 1364 if (rv != CRYPTO_SUCCESS) 1365 return (rv); 1366 1367 out->cd_offset += ctx->gcm_tag_len; 1368 /* Clear sensitive data in the context before returning. */ 1369 gcm_clear_ctx(ctx); 1370 return (CRYPTO_SUCCESS); 1371 } 1372 1373 /* 1374 * Finalize decryption: We just have accumulated crypto text, so now we 1375 * decrypt it here inplace. 1376 */ 1377 static int 1378 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1379 { 1380 ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); 1381 ASSERT3U(block_size, ==, 16); 1382 1383 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1384 size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 1385 uint8_t *datap = ctx->gcm_pt_buf; 1386 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1387 uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1388 uint64_t *ghash = ctx->gcm_ghash; 1389 uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; 1390 int rv = CRYPTO_SUCCESS; 1391 size_t bleft, done; 1392 1393 /* 1394 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be 1395 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of 1396 * GCM_AVX_MIN_DECRYPT_BYTES. 1397 */ 1398 for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { 1399 kfpu_begin(); 1400 done = aesni_gcm_decrypt(datap, datap, chunk_size, 1401 (const void *)key, ctx->gcm_cb, ghash); 1402 clear_fpu_regs(); 1403 kfpu_end(); 1404 if (done != chunk_size) { 1405 return (CRYPTO_FAILED); 1406 } 1407 datap += done; 1408 } 1409 /* Decrypt remainder, which is less than chunk size, in one go. */ 1410 kfpu_begin(); 1411 if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { 1412 done = aesni_gcm_decrypt(datap, datap, bleft, 1413 (const void *)key, ctx->gcm_cb, ghash); 1414 if (done == 0) { 1415 clear_fpu_regs(); 1416 kfpu_end(); 1417 return (CRYPTO_FAILED); 1418 } 1419 datap += done; 1420 bleft -= done; 1421 } 1422 ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); 1423 1424 /* 1425 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain, 1426 * decrypt them block by block. 1427 */ 1428 while (bleft > 0) { 1429 /* Incomplete last block. */ 1430 if (bleft < block_size) { 1431 uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; 1432 1433 memset(lastb, 0, block_size); 1434 memcpy(lastb, datap, bleft); 1435 /* The GCM processing. */ 1436 GHASH_AVX(ctx, lastb, block_size); 1437 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1438 for (size_t i = 0; i < bleft; i++) { 1439 datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i]; 1440 } 1441 break; 1442 } 1443 /* The GCM processing. */ 1444 GHASH_AVX(ctx, datap, block_size); 1445 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1446 gcm_xor_avx((uint8_t *)tmp, datap); 1447 gcm_incr_counter_block(ctx); 1448 1449 datap += block_size; 1450 bleft -= block_size; 1451 } 1452 if (rv != CRYPTO_SUCCESS) { 1453 clear_fpu_regs(); 1454 kfpu_end(); 1455 return (rv); 1456 } 1457 /* Decryption done, finish the tag. */ 1458 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 1459 GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size); 1460 aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0, 1461 (uint32_t *)ctx->gcm_J0); 1462 1463 gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash); 1464 1465 /* We are done with the FPU, restore its state. */ 1466 clear_fpu_regs(); 1467 kfpu_end(); 1468 1469 /* Compare the input authentication tag with what we calculated. */ 1470 if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 1471 /* They don't match. */ 1472 return (CRYPTO_INVALID_MAC); 1473 } 1474 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 1475 if (rv != CRYPTO_SUCCESS) { 1476 return (rv); 1477 } 1478 out->cd_offset += pt_len; 1479 gcm_clear_ctx(ctx); 1480 return (CRYPTO_SUCCESS); 1481 } 1482 1483 /* 1484 * Initialize the GCM params H, Htabtle and the counter block. Save the 1485 * initial counter block. 1486 */ 1487 static int 1488 gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 1489 unsigned char *auth_data, size_t auth_data_len, size_t block_size) 1490 { 1491 uint8_t *cb = (uint8_t *)ctx->gcm_cb; 1492 uint64_t *H = ctx->gcm_H; 1493 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1494 int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; 1495 uint8_t *datap = auth_data; 1496 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1497 size_t bleft; 1498 1499 ASSERT(block_size == GCM_BLOCK_LEN); 1500 1501 /* Init H (encrypt zero block) and create the initial counter block. */ 1502 memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash)); 1503 memset(H, 0, sizeof (ctx->gcm_H)); 1504 kfpu_begin(); 1505 aes_encrypt_intel(keysched, aes_rounds, 1506 (const uint32_t *)H, (uint32_t *)H); 1507 1508 gcm_init_htab_avx(ctx->gcm_Htable, H); 1509 1510 if (iv_len == 12) { 1511 memcpy(cb, iv, 12); 1512 cb[12] = 0; 1513 cb[13] = 0; 1514 cb[14] = 0; 1515 cb[15] = 1; 1516 /* We need the ICB later. */ 1517 memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0)); 1518 } else { 1519 /* 1520 * Most consumers use 12 byte IVs, so it's OK to use the 1521 * original routines for other IV sizes, just avoid nesting 1522 * kfpu_begin calls. 1523 */ 1524 clear_fpu_regs(); 1525 kfpu_end(); 1526 gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 1527 aes_copy_block, aes_xor_block); 1528 kfpu_begin(); 1529 } 1530 1531 /* Openssl post increments the counter, adjust for that. */ 1532 gcm_incr_counter_block(ctx); 1533 1534 /* Ghash AAD in chunk_size blocks. */ 1535 for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) { 1536 GHASH_AVX(ctx, datap, chunk_size); 1537 datap += chunk_size; 1538 clear_fpu_regs(); 1539 kfpu_end(); 1540 kfpu_begin(); 1541 } 1542 /* Ghash the remainder and handle possible incomplete GCM block. */ 1543 if (bleft > 0) { 1544 size_t incomp = bleft % block_size; 1545 1546 bleft -= incomp; 1547 if (bleft > 0) { 1548 GHASH_AVX(ctx, datap, bleft); 1549 datap += bleft; 1550 } 1551 if (incomp > 0) { 1552 /* Zero pad and hash incomplete last block. */ 1553 uint8_t *authp = (uint8_t *)ctx->gcm_tmp; 1554 1555 memset(authp, 0, block_size); 1556 memcpy(authp, datap, incomp); 1557 GHASH_AVX(ctx, authp, block_size); 1558 } 1559 } 1560 clear_fpu_regs(); 1561 kfpu_end(); 1562 return (CRYPTO_SUCCESS); 1563 } 1564 1565 #if defined(_KERNEL) 1566 static int 1567 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp) 1568 { 1569 unsigned long val; 1570 char val_rounded[16]; 1571 int error = 0; 1572 1573 error = kstrtoul(buf, 0, &val); 1574 if (error) 1575 return (error); 1576 1577 val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1578 1579 if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE) 1580 return (-EINVAL); 1581 1582 snprintf(val_rounded, 16, "%u", (uint32_t)val); 1583 error = param_set_uint(val_rounded, kp); 1584 return (error); 1585 } 1586 1587 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size, 1588 param_get_uint, &gcm_avx_chunk_size, 0644); 1589 1590 MODULE_PARM_DESC(icp_gcm_avx_chunk_size, 1591 "How many bytes to process while owning the FPU"); 1592 1593 #endif /* defined(__KERNEL) */ 1594 #endif /* ifdef CAN_USE_GCM_ASM */ 1595