1 /*******************************************************************************
2 Copyright (c) 2018-2020, Intel Corporation
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6
7 * Redistributions of source code must retain the above copyright notice,
8 this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of Intel Corporation nor the names of its contributors
13 may be used to endorse or promote products derived from this software
14 without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *******************************************************************************/
27
28 #include <stdio.h>
29 #include <stdint.h>
30 #include <string.h>
31
32 #include "ipsec_ooo_mgr.h"
33 #include "constants.h"
34 #include "include/clear_regs_mem.h"
35
36 extern void sha1_block_sse(const void *, void *);
37 extern void sha1_block_avx(const void *, void *);
38
39 extern void sha224_block_sse(const void *, void *);
40 extern void sha224_block_avx(const void *, void *);
41
42 extern void sha256_block_sse(const void *, void *);
43 extern void sha256_block_avx(const void *, void *);
44
45 extern void sha384_block_sse(const void *, void *);
46 extern void sha384_block_avx(const void *, void *);
47
48 extern void sha512_block_sse(const void *, void *);
49 extern void sha512_block_avx(const void *, void *);
50
51
52 /* ========================================================================== */
53 /*
54 * Various utility functions for SHA API
55 */
56
57 __forceinline
bswap4(const uint32_t val)58 uint32_t bswap4(const uint32_t val)
59 {
60 return ((val >> 24) | /**< A*/
61 ((val & 0xff0000) >> 8) | /**< B*/
62 ((val & 0xff00) << 8) | /**< C*/
63 (val << 24)); /**< D*/
64 }
65
66 __forceinline
bswap8(const uint64_t val)67 uint64_t bswap8(const uint64_t val)
68 {
69 return (((uint64_t) bswap4((uint32_t) val)) << 32) |
70 (((uint64_t) bswap4((uint32_t) (val >> 32))));
71 }
72
73 __forceinline
store8_be(void * outp,const uint64_t val)74 void store8_be(void *outp, const uint64_t val)
75 {
76 *((uint64_t *)outp) = bswap8(val);
77 }
78
79 __forceinline
var_memcpy(void * dst,const void * src,const uint64_t len)80 void var_memcpy(void *dst, const void *src, const uint64_t len)
81 {
82 uint64_t i;
83 const uint8_t *src8 = (const uint8_t *)src;
84 uint8_t *dst8 = (uint8_t *)dst;
85
86 for (i = 0; i < len; i++)
87 dst8[i] = src8[i];
88 }
89
90 __forceinline
copy_bswap4_array(void * dst,const void * src,const size_t num)91 void copy_bswap4_array(void *dst, const void *src, const size_t num)
92 {
93 uint32_t *outp = (uint32_t *) dst;
94 const uint32_t *inp = (const uint32_t *) src;
95 size_t i;
96
97 for (i = 0; i < num; i++)
98 outp[i] = bswap4(inp[i]);
99 }
100
101 __forceinline
copy_bswap8_array(void * dst,const void * src,const size_t num)102 void copy_bswap8_array(void *dst, const void *src, const size_t num)
103 {
104 uint64_t *outp = (uint64_t *) dst;
105 const uint64_t *inp = (const uint64_t *) src;
106 size_t i;
107
108 for (i = 0; i < num; i++)
109 outp[i] = bswap8(inp[i]);
110 }
111
112 __forceinline
113 void
sha_generic_one_block(const void * inp,void * digest,const int is_avx,const int sha_type)114 sha_generic_one_block(const void *inp, void *digest,
115 const int is_avx, const int sha_type)
116 {
117 if (sha_type == 1) {
118 if (is_avx)
119 sha1_block_avx(inp, digest);
120 else
121 sha1_block_sse(inp, digest);
122 } else if (sha_type == 224) {
123 if (is_avx)
124 sha224_block_avx(inp, digest);
125 else
126 sha224_block_sse(inp, digest);
127 } else if (sha_type == 256) {
128 if (is_avx)
129 sha256_block_avx(inp, digest);
130 else
131 sha256_block_sse(inp, digest);
132 } else if (sha_type == 384) {
133 if (is_avx)
134 sha384_block_avx(inp, digest);
135 else
136 sha384_block_sse(inp, digest);
137 } else if (sha_type == 512) {
138 if (is_avx)
139 sha512_block_avx(inp, digest);
140 else
141 sha512_block_sse(inp, digest);
142 }
143 }
144
145 __forceinline
sha1_init_digest(void * p)146 void sha1_init_digest(void *p)
147 {
148 uint32_t *p_digest = (uint32_t *)p;
149
150 p_digest[0] = H0;
151 p_digest[1] = H1;
152 p_digest[2] = H2;
153 p_digest[3] = H3;
154 p_digest[4] = H4;
155 }
156
157 __forceinline
sha224_init_digest(void * p)158 void sha224_init_digest(void *p)
159 {
160 uint32_t *p_digest = (uint32_t *)p;
161
162 p_digest[0] = SHA224_H0;
163 p_digest[1] = SHA224_H1;
164 p_digest[2] = SHA224_H2;
165 p_digest[3] = SHA224_H3;
166 p_digest[4] = SHA224_H4;
167 p_digest[5] = SHA224_H5;
168 p_digest[6] = SHA224_H6;
169 p_digest[7] = SHA224_H7;
170 }
171
172 __forceinline
sha256_init_digest(void * p)173 void sha256_init_digest(void *p)
174 {
175 uint32_t *p_digest = (uint32_t *)p;
176
177 p_digest[0] = SHA256_H0;
178 p_digest[1] = SHA256_H1;
179 p_digest[2] = SHA256_H2;
180 p_digest[3] = SHA256_H3;
181 p_digest[4] = SHA256_H4;
182 p_digest[5] = SHA256_H5;
183 p_digest[6] = SHA256_H6;
184 p_digest[7] = SHA256_H7;
185 }
186
187 __forceinline
sha384_init_digest(void * p)188 void sha384_init_digest(void *p)
189 {
190 uint64_t *p_digest = (uint64_t *)p;
191
192 p_digest[0] = SHA384_H0;
193 p_digest[1] = SHA384_H1;
194 p_digest[2] = SHA384_H2;
195 p_digest[3] = SHA384_H3;
196 p_digest[4] = SHA384_H4;
197 p_digest[5] = SHA384_H5;
198 p_digest[6] = SHA384_H6;
199 p_digest[7] = SHA384_H7;
200 }
201
202 __forceinline
sha512_init_digest(void * p)203 void sha512_init_digest(void *p)
204 {
205 uint64_t *p_digest = (uint64_t *)p;
206
207 p_digest[0] = SHA512_H0;
208 p_digest[1] = SHA512_H1;
209 p_digest[2] = SHA512_H2;
210 p_digest[3] = SHA512_H3;
211 p_digest[4] = SHA512_H4;
212 p_digest[5] = SHA512_H5;
213 p_digest[6] = SHA512_H6;
214 p_digest[7] = SHA512_H7;
215 }
216
217 __forceinline
218 void
sha_generic_init(void * digest,const int sha_type)219 sha_generic_init(void *digest, const int sha_type)
220 {
221 if (sha_type == 1)
222 sha1_init_digest(digest);
223 else if (sha_type == 224)
224 sha224_init_digest(digest);
225 else if (sha_type == 256)
226 sha256_init_digest(digest);
227 else if (sha_type == 384)
228 sha384_init_digest(digest);
229 else if (sha_type == 512)
230 sha512_init_digest(digest);
231 }
232
233 __forceinline
sha_generic_write_digest(void * dst,const void * src,const int sha_type)234 void sha_generic_write_digest(void *dst, const void *src, const int sha_type)
235 {
236 if (sha_type == 1)
237 copy_bswap4_array(dst, src, NUM_SHA_DIGEST_WORDS);
238 else if (sha_type == 224)
239 copy_bswap4_array(dst, src, NUM_SHA_224_DIGEST_WORDS);
240 else if (sha_type == 256)
241 copy_bswap4_array(dst, src, NUM_SHA_256_DIGEST_WORDS);
242 else if (sha_type == 384)
243 copy_bswap8_array(dst, src, NUM_SHA_384_DIGEST_WORDS);
244 else if (sha_type == 512)
245 copy_bswap8_array(dst, src, NUM_SHA_512_DIGEST_WORDS);
246 }
247
248 __forceinline
249 void
sha_generic(const void * data,const uint64_t length,void * digest,const int is_avx,const int sha_type,const uint64_t blk_size,const uint64_t pad_size)250 sha_generic(const void *data, const uint64_t length, void *digest,
251 const int is_avx, const int sha_type, const uint64_t blk_size,
252 const uint64_t pad_size)
253 {
254 #ifdef SAFE_PARAM
255 if (data == NULL || digest == NULL)
256 return;
257 #endif
258
259 uint8_t cb[SHA_512_BLOCK_SIZE]; /* biggest possible */
260 union {
261 uint32_t digest1[NUM_SHA_256_DIGEST_WORDS];
262 uint64_t digest2[NUM_SHA_512_DIGEST_WORDS];
263 } local_digest;
264 void *ld = (void *) &local_digest;
265 const uint8_t *inp = (const uint8_t *) data;
266 uint64_t idx, r;
267
268 sha_generic_init(ld, sha_type);
269
270 for (idx = 0; (idx + blk_size) <= length; idx += blk_size)
271 sha_generic_one_block(&inp[idx], ld, is_avx, sha_type);
272
273 r = length % blk_size;
274
275 memset(cb, 0, sizeof(cb));
276 var_memcpy(cb, &inp[idx], r);
277 cb[r] = 0x80;
278
279 if (r >= (blk_size - pad_size)) {
280 /* length will be encoded in the next block */
281 sha_generic_one_block(cb, ld, is_avx, sha_type);
282 memset(cb, 0, sizeof(cb));
283 }
284
285 store8_be(&cb[blk_size - 8], length * 8 /* bit length */);
286 sha_generic_one_block(cb, ld, is_avx, sha_type);
287
288 sha_generic_write_digest(digest, ld, sha_type);
289 #ifdef SAFE_DATA
290 clear_mem(cb, sizeof(cb));
291 clear_mem(&local_digest, sizeof(local_digest));
292 clear_scratch_gps();
293 if (is_avx)
294 clear_scratch_xmms_avx();
295 else
296 clear_scratch_xmms_sse();
297 #endif
298 }
299
300 __forceinline
sha_generic_1block(const void * data,void * digest,const int is_avx,const int sha_type)301 void sha_generic_1block(const void *data, void *digest,
302 const int is_avx, const int sha_type)
303 {
304 #ifdef SAFE_PARAM
305 if (data == NULL || digest == NULL)
306 return;
307 #endif
308 sha_generic_init(digest, sha_type);
309 sha_generic_one_block(data, digest, is_avx, sha_type);
310 #ifdef SAFE_DATA
311 clear_scratch_gps();
312 if (is_avx)
313 clear_scratch_xmms_avx();
314 else
315 clear_scratch_xmms_sse();
316 #endif
317 }
318
319
320 /* ========================================================================== */
321 /* One block SHA1 computation for IPAD / OPAD usage only */
322
sha1_one_block_sse(const void * data,void * digest)323 void sha1_one_block_sse(const void *data, void *digest)
324 {
325 sha_generic_1block(data, digest, 0 /* SSE */, 1 /* SHA1 */);
326 }
327
sha1_one_block_avx(const void * data,void * digest)328 void sha1_one_block_avx(const void *data, void *digest)
329 {
330 sha_generic_1block(data, digest, 1 /* AVX */, 1 /* SHA1 */);
331 }
332
sha1_one_block_avx2(const void * data,void * digest)333 void sha1_one_block_avx2(const void *data, void *digest)
334 {
335 sha_generic_1block(data, digest, 1 /* AVX */, 1 /* SHA1 */);
336 }
337
sha1_one_block_avx512(const void * data,void * digest)338 void sha1_one_block_avx512(const void *data, void *digest)
339 {
340 sha_generic_1block(data, digest, 1 /* AVX */, 1 /* SHA1 */);
341 }
342
343
344 /* ========================================================================== */
345 /*
346 * SHA1 API for use in HMAC-SHA1 when key is longer than the block size
347 */
348
sha1_sse(const void * data,const uint64_t length,void * digest)349 void sha1_sse(const void *data, const uint64_t length, void *digest)
350 {
351 sha_generic(data, length, digest, 0 /* SSE */, 1, SHA1_BLOCK_SIZE,
352 SHA1_PAD_SIZE);
353 }
354
sha1_avx(const void * data,const uint64_t length,void * digest)355 void sha1_avx(const void *data, const uint64_t length, void *digest)
356 {
357 sha_generic(data, length, digest, 1 /* AVX */, 1, SHA1_BLOCK_SIZE,
358 SHA1_PAD_SIZE);
359 }
360
sha1_avx2(const void * data,const uint64_t length,void * digest)361 void sha1_avx2(const void *data, const uint64_t length, void *digest)
362 {
363 sha_generic(data, length, digest, 1 /* AVX */, 1, SHA1_BLOCK_SIZE,
364 SHA1_PAD_SIZE);
365 }
366
sha1_avx512(const void * data,const uint64_t length,void * digest)367 void sha1_avx512(const void *data, const uint64_t length, void *digest)
368 {
369 sha_generic(data, length, digest, 1 /* AVX */, 1, SHA1_BLOCK_SIZE,
370 SHA1_PAD_SIZE);
371 }
372
373 /* ========================================================================== */
374 /* One block SHA224 computation for IPAD / OPAD usage only */
375
sha224_one_block_sse(const void * data,void * digest)376 void sha224_one_block_sse(const void *data, void *digest)
377 {
378 sha_generic_1block(data, digest, 0 /* SSE */, 224 /* SHA224 */);
379 }
380
sha224_one_block_avx(const void * data,void * digest)381 void sha224_one_block_avx(const void *data, void *digest)
382 {
383 sha_generic_1block(data, digest, 1 /* AVX */, 224 /* SHA224 */);
384 }
385
sha224_one_block_avx2(const void * data,void * digest)386 void sha224_one_block_avx2(const void *data, void *digest)
387 {
388 sha_generic_1block(data, digest, 1 /* AVX */, 224 /* SHA224 */);
389 }
390
sha224_one_block_avx512(const void * data,void * digest)391 void sha224_one_block_avx512(const void *data, void *digest)
392 {
393 sha_generic_1block(data, digest, 1 /* AVX */, 224 /* SHA224 */);
394 }
395
396 /* ========================================================================== */
397 /*
398 * SHA224 API for use in HMAC-SHA224 when key is longer than the block size
399 */
sha224_sse(const void * data,const uint64_t length,void * digest)400 void sha224_sse(const void *data, const uint64_t length, void *digest)
401 {
402 sha_generic(data, length, digest, 0 /* SSE */, 224, SHA_256_BLOCK_SIZE,
403 SHA224_PAD_SIZE);
404 }
405
sha224_avx(const void * data,const uint64_t length,void * digest)406 void sha224_avx(const void *data, const uint64_t length, void *digest)
407 {
408 sha_generic(data, length, digest, 1 /* AVX */, 224, SHA_256_BLOCK_SIZE,
409 SHA224_PAD_SIZE);
410 }
411
sha224_avx2(const void * data,const uint64_t length,void * digest)412 void sha224_avx2(const void *data, const uint64_t length, void *digest)
413 {
414 sha_generic(data, length, digest, 1 /* AVX */, 224, SHA_256_BLOCK_SIZE,
415 SHA224_PAD_SIZE);
416 }
417
sha224_avx512(const void * data,const uint64_t length,void * digest)418 void sha224_avx512(const void *data, const uint64_t length, void *digest)
419 {
420 sha_generic(data, length, digest, 1 /* AVX */, 224, SHA_256_BLOCK_SIZE,
421 SHA224_PAD_SIZE);
422 }
423
424 /* ========================================================================== */
425 /* One block SHA256 computation for IPAD / OPAD usage only */
426
sha256_one_block_sse(const void * data,void * digest)427 void sha256_one_block_sse(const void *data, void *digest)
428 {
429 sha_generic_1block(data, digest, 0 /* SSE */, 256 /* SHA256 */);
430 }
431
sha256_one_block_avx(const void * data,void * digest)432 void sha256_one_block_avx(const void *data, void *digest)
433 {
434 sha_generic_1block(data, digest, 1 /* AVX */, 256 /* SHA256 */);
435 }
436
sha256_one_block_avx2(const void * data,void * digest)437 void sha256_one_block_avx2(const void *data, void *digest)
438 {
439 sha_generic_1block(data, digest, 1 /* AVX */, 256 /* SHA256 */);
440 }
441
sha256_one_block_avx512(const void * data,void * digest)442 void sha256_one_block_avx512(const void *data, void *digest)
443 {
444 sha_generic_1block(data, digest, 1 /* AVX */, 256 /* SHA256 */);
445 }
446
447 /* ========================================================================== */
448 /*
449 * SHA256 API for use in HMAC-SHA256 when key is longer than the block size
450 */
sha256_sse(const void * data,const uint64_t length,void * digest)451 void sha256_sse(const void *data, const uint64_t length, void *digest)
452 {
453 sha_generic(data, length, digest, 0 /* SSE */, 256, SHA_256_BLOCK_SIZE,
454 SHA256_PAD_SIZE);
455 }
456
sha256_avx(const void * data,const uint64_t length,void * digest)457 void sha256_avx(const void *data, const uint64_t length, void *digest)
458 {
459 sha_generic(data, length, digest, 1 /* AVX */, 256, SHA_256_BLOCK_SIZE,
460 SHA256_PAD_SIZE);
461 }
462
sha256_avx2(const void * data,const uint64_t length,void * digest)463 void sha256_avx2(const void *data, const uint64_t length, void *digest)
464 {
465 sha_generic(data, length, digest, 1 /* AVX */, 256, SHA_256_BLOCK_SIZE,
466 SHA256_PAD_SIZE);
467 }
468
sha256_avx512(const void * data,const uint64_t length,void * digest)469 void sha256_avx512(const void *data, const uint64_t length, void *digest)
470 {
471 sha_generic(data, length, digest, 1 /* AVX */, 256, SHA_256_BLOCK_SIZE,
472 SHA256_PAD_SIZE);
473 }
474
475 /* ========================================================================== */
476 /* One block SHA384 computation for IPAD / OPAD usage only */
477
sha384_one_block_sse(const void * data,void * digest)478 void sha384_one_block_sse(const void *data, void *digest)
479 {
480 sha_generic_1block(data, digest, 0 /* SSE */, 384 /* SHA384 */);
481 }
482
sha384_one_block_avx(const void * data,void * digest)483 void sha384_one_block_avx(const void *data, void *digest)
484 {
485 sha_generic_1block(data, digest, 1 /* AVX */, 384 /* SHA384 */);
486 }
487
sha384_one_block_avx2(const void * data,void * digest)488 void sha384_one_block_avx2(const void *data, void *digest)
489 {
490 sha_generic_1block(data, digest, 1 /* AVX */, 384 /* SHA384 */);
491 }
492
sha384_one_block_avx512(const void * data,void * digest)493 void sha384_one_block_avx512(const void *data, void *digest)
494 {
495 sha_generic_1block(data, digest, 1 /* AVX */, 384 /* SHA384 */);
496 }
497
498 /* ========================================================================== */
499 /*
500 * SHA384 API for use in HMAC-SHA384 when key is longer than the block size
501 */
sha384_sse(const void * data,const uint64_t length,void * digest)502 void sha384_sse(const void *data, const uint64_t length, void *digest)
503 {
504 sha_generic(data, length, digest, 0 /* SSE */, 384, SHA_384_BLOCK_SIZE,
505 SHA384_PAD_SIZE);
506 }
507
sha384_avx(const void * data,const uint64_t length,void * digest)508 void sha384_avx(const void *data, const uint64_t length, void *digest)
509 {
510 sha_generic(data, length, digest, 1 /* AVX */, 384, SHA_384_BLOCK_SIZE,
511 SHA384_PAD_SIZE);
512 }
513
sha384_avx2(const void * data,const uint64_t length,void * digest)514 void sha384_avx2(const void *data, const uint64_t length, void *digest)
515 {
516 sha_generic(data, length, digest, 1 /* AVX */, 384, SHA_384_BLOCK_SIZE,
517 SHA384_PAD_SIZE);
518 }
519
sha384_avx512(const void * data,const uint64_t length,void * digest)520 void sha384_avx512(const void *data, const uint64_t length, void *digest)
521 {
522 sha_generic(data, length, digest, 1 /* AVX */, 384, SHA_384_BLOCK_SIZE,
523 SHA384_PAD_SIZE);
524 }
525
526 /* ========================================================================== */
527 /* One block SHA512 computation for IPAD / OPAD usage only */
528
sha512_one_block_sse(const void * data,void * digest)529 void sha512_one_block_sse(const void *data, void *digest)
530 {
531 sha_generic_1block(data, digest, 0 /* SSE */, 512 /* SHA512 */);
532 }
533
sha512_one_block_avx(const void * data,void * digest)534 void sha512_one_block_avx(const void *data, void *digest)
535 {
536 sha_generic_1block(data, digest, 1 /* AVX */, 512 /* SHA512 */);
537 }
538
sha512_one_block_avx2(const void * data,void * digest)539 void sha512_one_block_avx2(const void *data, void *digest)
540 {
541 sha_generic_1block(data, digest, 1 /* AVX */, 512 /* SHA512 */);
542 }
543
sha512_one_block_avx512(const void * data,void * digest)544 void sha512_one_block_avx512(const void *data, void *digest)
545 {
546 sha_generic_1block(data, digest, 1 /* AVX */, 512 /* SHA512 */);
547 }
548
549 /* ========================================================================== */
550 /*
551 * SHA512 API for use in HMAC-SHA512 when key is longer than the block size
552 */
sha512_sse(const void * data,const uint64_t length,void * digest)553 void sha512_sse(const void *data, const uint64_t length, void *digest)
554 {
555 sha_generic(data, length, digest, 0 /* SSE */, 512, SHA_512_BLOCK_SIZE,
556 SHA512_PAD_SIZE);
557 }
558
sha512_avx(const void * data,const uint64_t length,void * digest)559 void sha512_avx(const void *data, const uint64_t length, void *digest)
560 {
561 sha_generic(data, length, digest, 1 /* AVX */, 512, SHA_512_BLOCK_SIZE,
562 SHA512_PAD_SIZE);
563 }
564
sha512_avx2(const void * data,const uint64_t length,void * digest)565 void sha512_avx2(const void *data, const uint64_t length, void *digest)
566 {
567 sha_generic(data, length, digest, 1 /* AVX */, 512, SHA_512_BLOCK_SIZE,
568 SHA512_PAD_SIZE);
569 }
570
sha512_avx512(const void * data,const uint64_t length,void * digest)571 void sha512_avx512(const void *data, const uint64_t length, void *digest)
572 {
573 sha_generic(data, length, digest, 1 /* AVX */, 512, SHA_512_BLOCK_SIZE,
574 SHA512_PAD_SIZE);
575 }
576