1 /*******************************************************************************
2 Copyright (c) 2009-2020, Intel Corporation
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6
7 * Redistributions of source code must retain the above copyright notice,
8 this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of Intel Corporation nor the names of its contributors
13 may be used to endorse or promote products derived from this software
14 without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *******************************************************************************/
27
28 /*-----------------------------------------------------------------------
29 * zuc_sse.c
30 *-----------------------------------------------------------------------
31 * An implementation of ZUC, the core algorithm for the
32 * 3GPP Confidentiality and Integrity algorithms.
33 *
34 *-----------------------------------------------------------------------*/
35
36 #include <string.h>
37
38 #include "include/zuc_internal.h"
39 #include "include/wireless_common.h"
40 #include "include/save_xmms.h"
41 #include "include/clear_regs_mem.h"
42 #include "intel-ipsec-mb.h"
43
44 #define SAVE_XMMS save_xmms
45 #define RESTORE_XMMS restore_xmms
46 #define CLEAR_SCRATCH_SIMD_REGS clear_scratch_xmms_sse
47
48 #define NUM_SSE_BUFS 4
49 #define KEYSTR_ROUND_LEN 16
50
51 static inline
_zuc_eea3_1_buffer_sse(const void * pKey,const void * pIv,const void * pBufferIn,void * pBufferOut,const uint32_t length)52 void _zuc_eea3_1_buffer_sse(const void *pKey,
53 const void *pIv,
54 const void *pBufferIn,
55 void *pBufferOut,
56 const uint32_t length)
57 {
58 DECLARE_ALIGNED(ZucState_t zucState, 16);
59 DECLARE_ALIGNED(uint8_t keyStream[KEYSTR_ROUND_LEN], 16);
60 const uint64_t *pIn64 = NULL;
61 uint64_t *pOut64 = NULL, *pKeyStream64 = NULL;
62 uint64_t *pTemp64 = NULL, *pdstTemp64 = NULL;
63
64 uint32_t numKeyStreamsPerPkt = length/ KEYSTR_ROUND_LEN;
65 const uint32_t numBytesLeftOver = length % KEYSTR_ROUND_LEN;
66
67 /* initialize the zuc state */
68 asm_ZucInitialization_sse(pKey, pIv, &(zucState));
69
70 /* Loop Over all the Quad-Words in input buffer and XOR with the 64bits
71 * of generated keystream */
72 pOut64 = (uint64_t *) pBufferOut;
73 pIn64 = (const uint64_t *) pBufferIn;
74
75 while (numKeyStreamsPerPkt--) {
76 /* Generate the key stream 16 bytes at a time */
77 asm_ZucGenKeystream16B_sse((uint32_t *) &keyStream[0],
78 &zucState);
79
80 /* XOR The Keystream generated with the input buffer here */
81 pKeyStream64 = (uint64_t *) keyStream;
82 asm_XorKeyStream16B_sse(pIn64, pOut64, pKeyStream64);
83 pIn64 += 2;
84 pOut64 += 2;
85 }
86
87 /* Check for remaining 0 to 15 bytes */
88 if (numBytesLeftOver) {
89 /* buffer to store 16 bytes of keystream */
90 DECLARE_ALIGNED(uint8_t tempSrc[KEYSTR_ROUND_LEN], 16);
91 DECLARE_ALIGNED(uint8_t tempDst[KEYSTR_ROUND_LEN], 16);
92 const uint8_t *pIn8 = (const uint8_t *) pBufferIn;
93 uint8_t *pOut8 = (uint8_t *) pBufferOut;
94 const uint64_t num4BRounds = ((numBytesLeftOver - 1) / 4) + 1;
95
96 asm_ZucGenKeystream_sse((uint32_t *) &keyStream[0],
97 &zucState, num4BRounds);
98
99 /* copy the remaining bytes into temporary buffer and XOR with
100 * the 64-bytes of keystream. Then copy on the valid bytes back
101 * to the output buffer */
102
103 memcpy(&tempSrc[0], &pIn8[length - numBytesLeftOver],
104 numBytesLeftOver);
105 pKeyStream64 = (uint64_t *) &keyStream[0];
106 pTemp64 = (uint64_t *) &tempSrc[0];
107 pdstTemp64 = (uint64_t *) &tempDst[0];
108
109 asm_XorKeyStream16B_sse(pTemp64, pdstTemp64,
110 pKeyStream64);
111 memcpy(&pOut8[length - numBytesLeftOver], &tempDst[0],
112 numBytesLeftOver);
113 #ifdef SAFE_DATA
114 clear_mem(tempSrc, sizeof(tempSrc));
115 clear_mem(tempDst, sizeof(tempDst));
116 #endif
117
118 }
119 #ifdef SAFE_DATA
120 /* Clear sensitive data in stack */
121 clear_mem(keyStream, sizeof(keyStream));
122 clear_mem(&zucState, sizeof(zucState));
123 #endif
124 }
125
126 static inline
_zuc_eea3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],void * pBufferOut[NUM_SSE_BUFS],const uint32_t length[NUM_SSE_BUFS],const unsigned use_gfni)127 void _zuc_eea3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],
128 const void * const pIv[NUM_SSE_BUFS],
129 const void * const pBufferIn[NUM_SSE_BUFS],
130 void *pBufferOut[NUM_SSE_BUFS],
131 const uint32_t length[NUM_SSE_BUFS],
132 const unsigned use_gfni)
133 {
134 DECLARE_ALIGNED(ZucState4_t state, 64);
135 DECLARE_ALIGNED(ZucState_t singlePktState, 64);
136 unsigned int i;
137 /* Calculate the minimum input packet size */
138 uint32_t bytes1 = (length[0] < length[1] ?
139 length[0] : length[1]);
140 uint32_t bytes2 = (length[2] < length[3] ?
141 length[2] : length[3]);
142 /* min number of bytes */
143 uint32_t bytes = (bytes1 < bytes2) ? bytes1 : bytes2;
144 uint32_t numKeyStreamsPerPkt;
145 uint16_t remainBytes[NUM_SSE_BUFS] = {0};
146 DECLARE_ALIGNED(uint8_t keyStr[NUM_SSE_BUFS][KEYSTR_ROUND_LEN], 64);
147 /* structure to store the 4 keys */
148 DECLARE_ALIGNED(ZucKey4_t keys, 64);
149 /* structure to store the 4 IV's */
150 DECLARE_ALIGNED(ZucIv4_t ivs, 64);
151 uint32_t numBytesLeftOver = 0;
152 const uint8_t *pTempBufInPtr = NULL;
153 uint8_t *pTempBufOutPtr = NULL;
154 const uint64_t *pIn64[NUM_SSE_BUFS]= {NULL};
155 uint64_t *pOut64[NUM_SSE_BUFS] = {NULL};
156 uint64_t *pKeyStream64 = NULL;
157
158 /*
159 * Calculate the number of bytes left over for each packet,
160 * and setup the Keys and IVs
161 */
162 for (i = 0; i < NUM_SSE_BUFS; i++) {
163 remainBytes[i] = length[i];
164 keys.pKeys[i] = pKey[i];
165 ivs.pIvs[i] = pIv[i];
166 }
167
168 if (use_gfni)
169 asm_ZucInitialization_4_gfni_sse(&keys, &ivs, &state);
170 else
171 asm_ZucInitialization_4_sse(&keys, &ivs, &state);
172
173 for (i = 0; i < NUM_SSE_BUFS; i++) {
174 pOut64[i] = (uint64_t *) pBufferOut[i];
175 pIn64[i] = (const uint64_t *) pBufferIn[i];
176 }
177
178 /* Encrypt common length of all buffers */
179 if (use_gfni)
180 asm_ZucCipher_4_gfni_sse(&state, pIn64, pOut64,
181 remainBytes, (uint16_t) bytes);
182 else
183 asm_ZucCipher_4_sse(&state, pIn64, pOut64,
184 remainBytes, (uint16_t) bytes);
185
186 /* process each packet separately for the remaining bytes */
187 for (i = 0; i < NUM_SSE_BUFS; i++) {
188 if (remainBytes[i]) {
189 /* need to copy the zuc state to single packet state */
190 singlePktState.lfsrState[0] = state.lfsrState[0][i];
191 singlePktState.lfsrState[1] = state.lfsrState[1][i];
192 singlePktState.lfsrState[2] = state.lfsrState[2][i];
193 singlePktState.lfsrState[3] = state.lfsrState[3][i];
194 singlePktState.lfsrState[4] = state.lfsrState[4][i];
195 singlePktState.lfsrState[5] = state.lfsrState[5][i];
196 singlePktState.lfsrState[6] = state.lfsrState[6][i];
197 singlePktState.lfsrState[7] = state.lfsrState[7][i];
198 singlePktState.lfsrState[8] = state.lfsrState[8][i];
199 singlePktState.lfsrState[9] = state.lfsrState[9][i];
200 singlePktState.lfsrState[10] = state.lfsrState[10][i];
201 singlePktState.lfsrState[11] = state.lfsrState[11][i];
202 singlePktState.lfsrState[12] = state.lfsrState[12][i];
203 singlePktState.lfsrState[13] = state.lfsrState[13][i];
204 singlePktState.lfsrState[14] = state.lfsrState[14][i];
205 singlePktState.lfsrState[15] = state.lfsrState[15][i];
206
207 singlePktState.fR1 = state.fR1[i];
208 singlePktState.fR2 = state.fR2[i];
209
210 numKeyStreamsPerPkt = remainBytes[i] / KEYSTR_ROUND_LEN;
211 numBytesLeftOver = remainBytes[i] % KEYSTR_ROUND_LEN;
212
213 pTempBufInPtr = pBufferIn[i];
214 pTempBufOutPtr = pBufferOut[i];
215
216 /* update the output and input pointers here to point
217 * to the i'th buffers */
218 pOut64[0] = (uint64_t *) &pTempBufOutPtr[length[i] -
219 remainBytes[i]];
220 pIn64[0] = (const uint64_t *) &pTempBufInPtr[length[i] -
221 remainBytes[i]];
222
223 while (numKeyStreamsPerPkt--) {
224 /* Generate the key stream 16 bytes at a time */
225 asm_ZucGenKeystream16B_sse(
226 (uint32_t *) keyStr[0],
227 &singlePktState);
228 pKeyStream64 = (uint64_t *) keyStr[0];
229 asm_XorKeyStream16B_sse(pIn64[0],
230 pOut64[0],
231 pKeyStream64);
232 pIn64[0] += 2;
233 pOut64[0] += 2;
234 }
235
236 /* Check for remaining 0 to 15 bytes */
237 if (numBytesLeftOver) {
238 DECLARE_ALIGNED(uint8_t tempSrc[16], 64);
239 DECLARE_ALIGNED(uint8_t tempDst[16], 64);
240 uint64_t *pTempSrc64;
241 uint64_t *pTempDst64;
242 uint32_t offset = length[i] - numBytesLeftOver;
243 const uint64_t num4BRounds =
244 ((numBytesLeftOver - 1) / 4) + 1;
245
246 asm_ZucGenKeystream_sse((uint32_t *)&keyStr[0],
247 &singlePktState,
248 num4BRounds);
249 /* copy the remaining bytes into temporary
250 * buffer and XOR with the 16 bytes of
251 * keystream. Then copy on the valid bytes back
252 * to the output buffer */
253 memcpy(&tempSrc[0], &pTempBufInPtr[offset],
254 numBytesLeftOver);
255 memset(&tempSrc[numBytesLeftOver], 0,
256 16 - numBytesLeftOver);
257
258 pKeyStream64 = (uint64_t *) &keyStr[0][0];
259 pTempSrc64 = (uint64_t *) &tempSrc[0];
260 pTempDst64 = (uint64_t *) &tempDst[0];
261 asm_XorKeyStream16B_sse(pTempSrc64,
262 pTempDst64,
263 pKeyStream64);
264
265 memcpy(&pTempBufOutPtr[offset],
266 &tempDst[0], numBytesLeftOver);
267 #ifdef SAFE_DATA
268 clear_mem(tempSrc, sizeof(tempSrc));
269 clear_mem(tempDst, sizeof(tempDst));
270 #endif
271 }
272 }
273 }
274 #ifdef SAFE_DATA
275 /* Clear sensitive data in stack */
276 clear_mem(keyStr, sizeof(keyStr));
277 clear_mem(&singlePktState, sizeof(singlePktState));
278 clear_mem(&state, sizeof(state));
279 clear_mem(&keys, sizeof(keys));
280 #endif
281 }
282
zuc_eea3_1_buffer_sse(const void * pKey,const void * pIv,const void * pBufferIn,void * pBufferOut,const uint32_t length)283 void zuc_eea3_1_buffer_sse(const void *pKey,
284 const void *pIv,
285 const void *pBufferIn,
286 void *pBufferOut,
287 const uint32_t length)
288 {
289 #ifndef LINUX
290 DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
291
292 SAVE_XMMS(xmm_save);
293 #endif
294 #ifdef SAFE_PARAM
295 /* Check for NULL pointers */
296 if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
297 pBufferOut == NULL)
298 return;
299
300 /* Check input data is in range of supported length */
301 if (length < ZUC_MIN_BYTELEN || length > ZUC_MAX_BYTELEN)
302 return;
303 #endif
304
305 _zuc_eea3_1_buffer_sse(pKey, pIv, pBufferIn, pBufferOut, length);
306
307 #ifdef SAFE_DATA
308 /* Clear sensitive data in registers */
309 CLEAR_SCRATCH_GPS();
310 CLEAR_SCRATCH_SIMD_REGS();
311 #endif
312 #ifndef LINUX
313 RESTORE_XMMS(xmm_save);
314 #endif
315 }
316
317 static inline
_zuc_eea3_4_buffer(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],void * pBufferOut[NUM_SSE_BUFS],const uint32_t length[NUM_SSE_BUFS],const unsigned use_gfni)318 void _zuc_eea3_4_buffer(const void * const pKey[NUM_SSE_BUFS],
319 const void * const pIv[NUM_SSE_BUFS],
320 const void * const pBufferIn[NUM_SSE_BUFS],
321 void *pBufferOut[NUM_SSE_BUFS],
322 const uint32_t length[NUM_SSE_BUFS],
323 const unsigned use_gfni)
324 {
325 #ifndef LINUX
326 DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
327
328 SAVE_XMMS(xmm_save);
329 #endif
330 #ifdef SAFE_PARAM
331 unsigned int i;
332
333 /* Check for NULL pointers */
334 if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
335 pBufferOut == NULL || length == NULL)
336 return;
337
338 for (i = 0; i < NUM_SSE_BUFS; i++) {
339 if (pKey[i] == NULL || pIv[i] == NULL ||
340 pBufferIn[i] == NULL || pBufferOut[i] == NULL)
341 return;
342
343 /* Check input data is in range of supported length */
344 if (length[i] < ZUC_MIN_BYTELEN || length[i] > ZUC_MAX_BYTELEN)
345 return;
346 }
347 #endif
348
349 _zuc_eea3_4_buffer_sse(pKey, pIv, pBufferIn, pBufferOut, length,
350 use_gfni);
351
352 #ifdef SAFE_DATA
353 /* Clear sensitive data in registers */
354 CLEAR_SCRATCH_GPS();
355 CLEAR_SCRATCH_SIMD_REGS();
356 #endif
357 #ifndef LINUX
358 RESTORE_XMMS(xmm_save);
359 #endif
360 }
361
zuc_eea3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],void * pBufferOut[NUM_SSE_BUFS],const uint32_t length[NUM_SSE_BUFS])362 void zuc_eea3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],
363 const void * const pIv[NUM_SSE_BUFS],
364 const void * const pBufferIn[NUM_SSE_BUFS],
365 void *pBufferOut[NUM_SSE_BUFS],
366 const uint32_t length[NUM_SSE_BUFS])
367 {
368 _zuc_eea3_4_buffer(pKey, pIv, pBufferIn, pBufferOut, length, 0);
369 }
370
zuc_eea3_4_buffer_gfni_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],void * pBufferOut[NUM_SSE_BUFS],const uint32_t length[NUM_SSE_BUFS])371 void zuc_eea3_4_buffer_gfni_sse(const void * const pKey[NUM_SSE_BUFS],
372 const void * const pIv[NUM_SSE_BUFS],
373 const void * const pBufferIn[NUM_SSE_BUFS],
374 void *pBufferOut[NUM_SSE_BUFS],
375 const uint32_t length[NUM_SSE_BUFS])
376 {
377 _zuc_eea3_4_buffer(pKey, pIv, pBufferIn, pBufferOut, length, 1);
378 }
379
380 static inline
_zuc_eea3_n_buffer(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],void * pBufferOut[],const uint32_t length[],const uint32_t numBuffers,const unsigned use_gfni)381 void _zuc_eea3_n_buffer(const void * const pKey[], const void * const pIv[],
382 const void * const pBufferIn[], void *pBufferOut[],
383 const uint32_t length[],
384 const uint32_t numBuffers,
385 const unsigned use_gfni)
386 {
387 #ifndef LINUX
388 DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
389
390 SAVE_XMMS(xmm_save);
391 #endif
392
393 unsigned int i;
394 unsigned int packetCount = numBuffers;
395
396 #ifdef SAFE_PARAM
397 /* Check for NULL pointers */
398 if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
399 pBufferOut == NULL || length == NULL)
400 return;
401
402 for (i = 0; i < numBuffers; i++) {
403 if (pKey[i] == NULL || pIv[i] == NULL ||
404 pBufferIn[i] == NULL || pBufferOut[i] == NULL)
405 return;
406
407 /* Check input data is in range of supported length */
408 if (length[i] < ZUC_MIN_BYTELEN || length[i] > ZUC_MAX_BYTELEN)
409 return;
410 }
411 #endif
412 i = 0;
413
414 while (packetCount >= NUM_SSE_BUFS) {
415 packetCount -= NUM_SSE_BUFS;
416 _zuc_eea3_4_buffer(&pKey[i],
417 &pIv[i],
418 &pBufferIn[i],
419 &pBufferOut[i],
420 &length[i],
421 use_gfni);
422 i += NUM_SSE_BUFS;
423 }
424
425 while(packetCount--) {
426 _zuc_eea3_1_buffer_sse(pKey[i],
427 pIv[i],
428 pBufferIn[i],
429 pBufferOut[i],
430 length[i]);
431 i++;
432 }
433
434 #ifdef SAFE_DATA
435 /* Clear sensitive data in registers */
436 CLEAR_SCRATCH_GPS();
437 CLEAR_SCRATCH_SIMD_REGS();
438 #endif
439 #ifndef LINUX
440 RESTORE_XMMS(xmm_save);
441 #endif
442 }
443
zuc_eea3_n_buffer_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],void * pBufferOut[],const uint32_t length[],const uint32_t numBuffers)444 void zuc_eea3_n_buffer_sse(const void * const pKey[], const void * const pIv[],
445 const void * const pBufferIn[], void *pBufferOut[],
446 const uint32_t length[],
447 const uint32_t numBuffers)
448 {
449 _zuc_eea3_n_buffer(pKey, pIv, pBufferIn, pBufferOut, length,
450 numBuffers, 0);
451 }
452
zuc_eea3_n_buffer_gfni_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],void * pBufferOut[],const uint32_t length[],const uint32_t numBuffers)453 void zuc_eea3_n_buffer_gfni_sse(const void * const pKey[],
454 const void * const pIv[],
455 const void * const pBufferIn[],
456 void *pBufferOut[],
457 const uint32_t length[],
458 const uint32_t numBuffers)
459 {
460 _zuc_eea3_n_buffer(pKey, pIv, pBufferIn, pBufferOut, length,
461 numBuffers, 1);
462 }
463
rotate_left(uint64_t u,size_t r)464 static inline uint64_t rotate_left(uint64_t u, size_t r)
465 {
466 return (((u) << (r)) | ((u) >> (64 - (r))));
467 }
468
load_uint64(const void * ptr)469 static inline uint64_t load_uint64(const void *ptr)
470 {
471 return *((const uint64_t *)ptr);
472 }
473
474 static inline
_zuc_eia3_1_buffer_sse(const void * pKey,const void * pIv,const void * pBufferIn,const uint32_t lengthInBits,uint32_t * pMacI)475 void _zuc_eia3_1_buffer_sse(const void *pKey,
476 const void *pIv,
477 const void *pBufferIn,
478 const uint32_t lengthInBits,
479 uint32_t *pMacI)
480 {
481 DECLARE_ALIGNED(ZucState_t zucState, 16);
482 DECLARE_ALIGNED(uint32_t keyStream[4 * 2], 64);
483 const uint32_t keyStreamLengthInBits = KEYSTR_ROUND_LEN * 8;
484 /* generate a key-stream 2 words longer than the input message */
485 const uint32_t N = lengthInBits + (2 * ZUC_WORD_BITS);
486 uint32_t L = (N + 31) / ZUC_WORD_BITS;
487 uint32_t *pZuc = (uint32_t *) &keyStream[0];
488 uint32_t remainingBits = lengthInBits;
489 uint32_t T = 0;
490 const uint8_t *pIn8 = (const uint8_t *) pBufferIn;
491
492 asm_ZucInitialization_sse(pKey, pIv, &(zucState));
493 asm_ZucGenKeystream16B_sse(pZuc, &zucState);
494
495 /* loop over the message bits */
496 while (remainingBits >= keyStreamLengthInBits) {
497 remainingBits -= keyStreamLengthInBits;
498 L -= (keyStreamLengthInBits / 32);
499
500 /* Generate the next key stream 8 bytes or 16 bytes */
501 if (!remainingBits)
502 asm_ZucGenKeystream8B_sse(&keyStream[4], &zucState);
503 else
504 asm_ZucGenKeystream16B_sse(&keyStream[4], &zucState);
505 T = asm_Eia3Round16BSSE(T, keyStream, pIn8);
506 /* Copy the last keystream generated
507 * to the first 16 bytes */
508 memcpy(&keyStream[0], &keyStream[4], KEYSTR_ROUND_LEN);
509 pIn8 = &pIn8[KEYSTR_ROUND_LEN];
510 }
511
512 /*
513 * If remaining bits has more than 2 ZUC WORDS (double words),
514 * keystream needs to have up to another 2 ZUC WORDS (8B)
515 */
516 if (remainingBits > (2 * 32))
517 asm_ZucGenKeystream8B_sse(&keyStream[4], &zucState);
518 T ^= asm_Eia3RemainderSSE(&keyStream[0], pIn8, remainingBits);
519 T ^= rotate_left(load_uint64(&keyStream[remainingBits / 32]),
520 remainingBits % 32);
521
522 /* save the final MAC-I result */
523 uint32_t keyBlock = keyStream[L - 1];
524 *pMacI = bswap4(T ^ keyBlock);
525
526 #ifdef SAFE_DATA
527 /* Clear sensitive data (in registers and stack) */
528 clear_mem(keyStream, sizeof(keyStream));
529 clear_mem(&zucState, sizeof(zucState));
530 #endif
531 }
532
533 static inline
_zuc_eia3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],const uint32_t lengthInBits[NUM_SSE_BUFS],uint32_t * pMacI[NUM_SSE_BUFS],const unsigned use_gfni)534 void _zuc_eia3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],
535 const void * const pIv[NUM_SSE_BUFS],
536 const void * const pBufferIn[NUM_SSE_BUFS],
537 const uint32_t lengthInBits[NUM_SSE_BUFS],
538 uint32_t *pMacI[NUM_SSE_BUFS],
539 const unsigned use_gfni)
540 {
541 unsigned int i;
542 DECLARE_ALIGNED(ZucState4_t state, 64);
543 DECLARE_ALIGNED(ZucState_t singlePktState, 64);
544 DECLARE_ALIGNED(uint8_t keyStr[NUM_SSE_BUFS][2*KEYSTR_ROUND_LEN], 64);
545 /* structure to store the 4 keys */
546 DECLARE_ALIGNED(ZucKey4_t keys, 64);
547 /* structure to store the 4 IV's */
548 DECLARE_ALIGNED(ZucIv4_t ivs, 64);
549 const uint8_t *pIn8[NUM_SSE_BUFS] = {NULL};
550 uint32_t remainCommonBits;
551 uint32_t numKeyStr = 0;
552 uint32_t T[NUM_SSE_BUFS] = {0};
553 const uint32_t keyStreamLengthInBits = KEYSTR_ROUND_LEN * 8;
554 DECLARE_ALIGNED(uint32_t *pKeyStrArr[NUM_SSE_BUFS], 16) = {NULL};
555 unsigned int allCommonBits;
556
557 /* Check if all lengths are equal */
558 if ((lengthInBits[0] == lengthInBits[1]) &&
559 (lengthInBits[0] == lengthInBits[2]) &&
560 (lengthInBits[0] == lengthInBits[3])) {
561 remainCommonBits = lengthInBits[0];
562 allCommonBits = 1;
563 } else {
564 /* Calculate the minimum input packet size */
565 uint32_t bits1 = (lengthInBits[0] < lengthInBits[1] ?
566 lengthInBits[0] : lengthInBits[1]);
567 uint32_t bits2 = (lengthInBits[2] < lengthInBits[3] ?
568 lengthInBits[2] : lengthInBits[3]);
569
570 remainCommonBits = (bits1 < bits2) ? bits1 : bits2;
571 allCommonBits = 0;
572 }
573
574 for (i = 0; i < NUM_SSE_BUFS; i++) {
575 pIn8[i] = (const uint8_t *) pBufferIn[i];
576 pKeyStrArr[i] = (uint32_t *) &keyStr[i][0];
577 keys.pKeys[i] = pKey[i];
578 ivs.pIvs[i] = pIv[i];
579 }
580
581 if (use_gfni) {
582 asm_ZucInitialization_4_gfni_sse(&keys, &ivs, &state);
583
584 /* Generate 16 bytes at a time */
585 asm_ZucGenKeystream16B_4_gfni_sse(&state, pKeyStrArr);
586 } else {
587 asm_ZucInitialization_4_sse(&keys, &ivs, &state);
588
589 /* Generate 16 bytes at a time */
590 asm_ZucGenKeystream16B_4_sse(&state, pKeyStrArr);
591 }
592
593 /* Point at the next 16 bytes of the key */
594 for (i = 0; i < NUM_SSE_BUFS; i++)
595 pKeyStrArr[i] = (uint32_t *) &keyStr[i][KEYSTR_ROUND_LEN];
596
597 /* loop over the message bits */
598 while (remainCommonBits >= keyStreamLengthInBits) {
599 remainCommonBits -= keyStreamLengthInBits;
600 numKeyStr++;
601 /* Generate the next key stream 8 bytes or 16 bytes */
602 if (use_gfni) {
603 if (!remainCommonBits && allCommonBits)
604 asm_ZucGenKeystream8B_4_gfni_sse(&state,
605 pKeyStrArr);
606 else
607 asm_ZucGenKeystream16B_4_gfni_sse(&state,
608 pKeyStrArr);
609 } else {
610 if (!remainCommonBits && allCommonBits)
611 asm_ZucGenKeystream8B_4_sse(&state, pKeyStrArr);
612 else
613 asm_ZucGenKeystream16B_4_sse(&state,
614 pKeyStrArr);
615 }
616 for (i = 0; i < NUM_SSE_BUFS; i++) {
617 T[i] = asm_Eia3Round16BSSE(T[i], keyStr[i],
618 pIn8[i]);
619 /* Copy the last keystream generated
620 * to the first 16 bytes */
621 memcpy(&keyStr[i][0], &keyStr[i][KEYSTR_ROUND_LEN],
622 KEYSTR_ROUND_LEN);
623 pIn8[i] = &pIn8[i][KEYSTR_ROUND_LEN];
624 }
625 }
626
627 /* Process each packet separately for the remaining bits */
628 for (i = 0; i < NUM_SSE_BUFS; i++) {
629 const uint32_t N = lengthInBits[i] + (2 * ZUC_WORD_BITS);
630 uint32_t L = ((N + 31) / ZUC_WORD_BITS) -
631 numKeyStr*(keyStreamLengthInBits / 32);
632 uint32_t remainBits = lengthInBits[i] -
633 numKeyStr*keyStreamLengthInBits;
634 uint32_t *keyStr32 = (uint32_t *) keyStr[i];
635
636 /* If remaining bits are more than 8 bytes, we need to generate
637 * at least 8B more of keystream, so we need to copy
638 * the zuc state to single packet state first */
639 if (remainBits > (2*32)) {
640 singlePktState.lfsrState[0] = state.lfsrState[0][i];
641 singlePktState.lfsrState[1] = state.lfsrState[1][i];
642 singlePktState.lfsrState[2] = state.lfsrState[2][i];
643 singlePktState.lfsrState[3] = state.lfsrState[3][i];
644 singlePktState.lfsrState[4] = state.lfsrState[4][i];
645 singlePktState.lfsrState[5] = state.lfsrState[5][i];
646 singlePktState.lfsrState[6] = state.lfsrState[6][i];
647 singlePktState.lfsrState[7] = state.lfsrState[7][i];
648 singlePktState.lfsrState[8] = state.lfsrState[8][i];
649 singlePktState.lfsrState[9] = state.lfsrState[9][i];
650 singlePktState.lfsrState[10] = state.lfsrState[10][i];
651 singlePktState.lfsrState[11] = state.lfsrState[11][i];
652 singlePktState.lfsrState[12] = state.lfsrState[12][i];
653 singlePktState.lfsrState[13] = state.lfsrState[13][i];
654 singlePktState.lfsrState[14] = state.lfsrState[14][i];
655 singlePktState.lfsrState[15] = state.lfsrState[15][i];
656
657 singlePktState.fR1 = state.fR1[i];
658 singlePktState.fR2 = state.fR2[i];
659 }
660
661 while (remainBits >= keyStreamLengthInBits) {
662 remainBits -= keyStreamLengthInBits;
663 L -= (keyStreamLengthInBits / 32);
664
665 /* Generate the next key stream 8 bytes or 16 bytes */
666 if (!remainBits)
667 asm_ZucGenKeystream8B_sse(&keyStr32[4],
668 &singlePktState);
669 else
670 asm_ZucGenKeystream16B_sse(&keyStr32[4],
671 &singlePktState);
672 T[i] = asm_Eia3Round16BSSE(T[i], keyStr32,
673 pIn8[i]);
674 /* Copy the last keystream generated
675 * to the first 16 bytes */
676 memcpy(keyStr32, &keyStr32[4], KEYSTR_ROUND_LEN);
677 pIn8[i] = &pIn8[i][KEYSTR_ROUND_LEN];
678 }
679
680 /*
681 * If remaining bits has more than 2 ZUC WORDS (double words),
682 * keystream needs to have up to another 2 ZUC WORDS (8B)
683 */
684 if (remainBits > (2 * 32))
685 asm_ZucGenKeystream8B_sse(&keyStr32[4],
686 &singlePktState);
687
688 uint32_t keyBlock = keyStr32[L - 1];
689
690 T[i] ^= asm_Eia3RemainderSSE(keyStr32, pIn8[i], remainBits);
691 T[i] ^= rotate_left(load_uint64(&keyStr32[remainBits / 32]),
692 remainBits % 32);
693
694 /* save the final MAC-I result */
695 *(pMacI[i]) = bswap4(T[i] ^ keyBlock);
696 }
697
698 #ifdef SAFE_DATA
699 /* Clear sensitive data (in registers and stack) */
700 clear_mem(keyStr, sizeof(keyStr));
701 clear_mem(&singlePktState, sizeof(singlePktState));
702 clear_mem(&state, sizeof(state));
703 clear_mem(&keys, sizeof(keys));
704 #endif
705 }
706
zuc_eia3_1_buffer_sse(const void * pKey,const void * pIv,const void * pBufferIn,const uint32_t lengthInBits,uint32_t * pMacI)707 void zuc_eia3_1_buffer_sse(const void *pKey,
708 const void *pIv,
709 const void *pBufferIn,
710 const uint32_t lengthInBits,
711 uint32_t *pMacI)
712 {
713 #ifndef LINUX
714 DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
715
716 SAVE_XMMS(xmm_save);
717 #endif
718 #ifdef SAFE_PARAM
719 /* Check for NULL pointers */
720 if (pKey == NULL || pIv == NULL || pBufferIn == NULL || pMacI == NULL)
721 return;
722
723 /* Check input data is in range of supported length */
724 if (lengthInBits < ZUC_MIN_BITLEN || lengthInBits > ZUC_MAX_BITLEN)
725 return;
726 #endif
727
728 _zuc_eia3_1_buffer_sse(pKey, pIv, pBufferIn, lengthInBits, pMacI);
729
730 #ifdef SAFE_DATA
731 CLEAR_SCRATCH_GPS();
732 CLEAR_SCRATCH_SIMD_REGS();
733 #endif
734 #ifndef LINUX
735 RESTORE_XMMS(xmm_save);
736 #endif
737 }
738
739 static inline
_zuc_eia3_4_buffer_job(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],uint32_t * pMacI[NUM_SSE_BUFS],const uint16_t lengthInBits[NUM_SSE_BUFS],const void * const job_in_lane[NUM_SSE_BUFS],const unsigned use_gfni)740 void _zuc_eia3_4_buffer_job(const void * const pKey[NUM_SSE_BUFS],
741 const void * const pIv[NUM_SSE_BUFS],
742 const void * const pBufferIn[NUM_SSE_BUFS],
743 uint32_t *pMacI[NUM_SSE_BUFS],
744 const uint16_t lengthInBits[NUM_SSE_BUFS],
745 const void * const job_in_lane[NUM_SSE_BUFS],
746 const unsigned use_gfni)
747 {
748 unsigned int i;
749 DECLARE_ALIGNED(ZucState4_t state, 64);
750 DECLARE_ALIGNED(ZucState_t singlePktState, 64);
751 DECLARE_ALIGNED(uint8_t keyStr[NUM_SSE_BUFS][2*KEYSTR_ROUND_LEN], 64);
752 /* structure to store the 4 keys */
753 DECLARE_ALIGNED(ZucKey4_t keys, 64);
754 /* structure to store the 4 IV's */
755 DECLARE_ALIGNED(ZucIv4_t ivs, 64);
756 const uint8_t *pIn8[NUM_SSE_BUFS] = {NULL};
757 uint32_t remainCommonBits;
758 uint32_t numKeyStr = 0;
759 uint32_t T[NUM_SSE_BUFS] = {0};
760 const uint32_t keyStreamLengthInBits = KEYSTR_ROUND_LEN * 8;
761 DECLARE_ALIGNED(uint32_t *pKeyStrArr[NUM_SSE_BUFS], 16) = {NULL};
762 unsigned int allCommonBits;
763
764 /* Check if all lengths are equal */
765 if ((lengthInBits[0] == lengthInBits[1]) &&
766 (lengthInBits[0] == lengthInBits[2]) &&
767 (lengthInBits[0] == lengthInBits[3])) {
768 remainCommonBits = lengthInBits[0];
769 allCommonBits = 1;
770 } else {
771 /* Calculate the minimum input packet size */
772 uint32_t bits1 = (lengthInBits[0] < lengthInBits[1] ?
773 lengthInBits[0] : lengthInBits[1]);
774 uint32_t bits2 = (lengthInBits[2] < lengthInBits[3] ?
775 lengthInBits[2] : lengthInBits[3]);
776
777 remainCommonBits = (bits1 < bits2) ? bits1 : bits2;
778 allCommonBits = 0;
779 }
780
781 for (i = 0; i < NUM_SSE_BUFS; i++) {
782 pIn8[i] = (const uint8_t *) pBufferIn[i];
783 pKeyStrArr[i] = (uint32_t *) &keyStr[i][0];
784 keys.pKeys[i] = pKey[i];
785 ivs.pIvs[i] = pIv[i];
786 }
787
788 if (use_gfni) {
789 asm_ZucInitialization_4_gfni_sse(&keys, &ivs, &state);
790
791 /* Generate 16 bytes at a time */
792 asm_ZucGenKeystream16B_4_gfni_sse(&state, pKeyStrArr);
793 } else {
794 asm_ZucInitialization_4_sse(&keys, &ivs, &state);
795
796 /* Generate 16 bytes at a time */
797 asm_ZucGenKeystream16B_4_sse(&state, pKeyStrArr);
798 }
799
800 /* Point at the next 16 bytes of the key */
801 for (i = 0; i < NUM_SSE_BUFS; i++)
802 pKeyStrArr[i] = (uint32_t *) &keyStr[i][KEYSTR_ROUND_LEN];
803
804 /* loop over the message bits */
805 while (remainCommonBits >= keyStreamLengthInBits) {
806 remainCommonBits -= keyStreamLengthInBits;
807 numKeyStr++;
808 /* Generate the next key stream 8 bytes or 16 bytes */
809 if (use_gfni) {
810 if (!remainCommonBits && allCommonBits)
811 asm_ZucGenKeystream8B_4_gfni_sse(&state,
812 pKeyStrArr);
813 else
814 asm_ZucGenKeystream16B_4_gfni_sse(&state,
815 pKeyStrArr);
816 } else {
817 if (!remainCommonBits && allCommonBits)
818 asm_ZucGenKeystream8B_4_sse(&state,
819 pKeyStrArr);
820 else
821 asm_ZucGenKeystream16B_4_sse(&state,
822 pKeyStrArr);
823 }
824 for (i = 0; i < NUM_SSE_BUFS; i++) {
825 if (job_in_lane[i] == NULL)
826 continue;
827 T[i] = asm_Eia3Round16BSSE(T[i], keyStr[i],
828 pIn8[i]);
829 /* Copy the last keystream generated
830 * to the first 16 bytes */
831 memcpy(&keyStr[i][0], &keyStr[i][KEYSTR_ROUND_LEN],
832 KEYSTR_ROUND_LEN);
833 pIn8[i] = &pIn8[i][KEYSTR_ROUND_LEN];
834 }
835 }
836
837 /* Process each packet separately for the remaining bits */
838 for (i = 0; i < NUM_SSE_BUFS; i++) {
839 if (job_in_lane[i] == NULL)
840 continue;
841
842 const uint32_t N = lengthInBits[i] + (2 * ZUC_WORD_BITS);
843 uint32_t L = ((N + 31) / ZUC_WORD_BITS) -
844 numKeyStr*(keyStreamLengthInBits / 32);
845 uint32_t remainBits = lengthInBits[i] -
846 numKeyStr*keyStreamLengthInBits;
847 uint32_t *keyStr32 = (uint32_t *) keyStr[i];
848
849 /* If remaining bits are more than 8 bytes, we need to generate
850 * at least 8B more of keystream, so we need to copy
851 * the zuc state to single packet state first */
852 if (remainBits > (2*32)) {
853 singlePktState.lfsrState[0] = state.lfsrState[0][i];
854 singlePktState.lfsrState[1] = state.lfsrState[1][i];
855 singlePktState.lfsrState[2] = state.lfsrState[2][i];
856 singlePktState.lfsrState[3] = state.lfsrState[3][i];
857 singlePktState.lfsrState[4] = state.lfsrState[4][i];
858 singlePktState.lfsrState[5] = state.lfsrState[5][i];
859 singlePktState.lfsrState[6] = state.lfsrState[6][i];
860 singlePktState.lfsrState[7] = state.lfsrState[7][i];
861 singlePktState.lfsrState[8] = state.lfsrState[8][i];
862 singlePktState.lfsrState[9] = state.lfsrState[9][i];
863 singlePktState.lfsrState[10] = state.lfsrState[10][i];
864 singlePktState.lfsrState[11] = state.lfsrState[11][i];
865 singlePktState.lfsrState[12] = state.lfsrState[12][i];
866 singlePktState.lfsrState[13] = state.lfsrState[13][i];
867 singlePktState.lfsrState[14] = state.lfsrState[14][i];
868 singlePktState.lfsrState[15] = state.lfsrState[15][i];
869
870 singlePktState.fR1 = state.fR1[i];
871 singlePktState.fR2 = state.fR2[i];
872 }
873
874 while (remainBits >= keyStreamLengthInBits) {
875 remainBits -= keyStreamLengthInBits;
876 L -= (keyStreamLengthInBits / 32);
877
878 /* Generate the next key stream 8 bytes or 16 bytes */
879 if (!remainBits)
880 asm_ZucGenKeystream8B_sse(&keyStr32[4],
881 &singlePktState);
882 else
883 asm_ZucGenKeystream16B_sse(&keyStr32[4],
884 &singlePktState);
885 T[i] = asm_Eia3Round16BSSE(T[i], keyStr32,
886 pIn8[i]);
887 /* Copy the last keystream generated
888 * to the first 16 bytes */
889 memcpy(keyStr32, &keyStr32[4], KEYSTR_ROUND_LEN);
890 pIn8[i] = &pIn8[i][KEYSTR_ROUND_LEN];
891 }
892
893 /*
894 * If remaining bits has more than 2 ZUC WORDS (double words),
895 * keystream needs to have up to another 2 ZUC WORDS (8B)
896 */
897 if (remainBits > (2 * 32))
898 asm_ZucGenKeystream8B_sse(&keyStr32[4],
899 &singlePktState);
900
901 uint32_t keyBlock = keyStr32[L - 1];
902
903 T[i] ^= asm_Eia3RemainderSSE(keyStr32, pIn8[i], remainBits);
904 T[i] ^= rotate_left(load_uint64(&keyStr32[remainBits / 32]),
905 remainBits % 32);
906
907 /* save the final MAC-I result */
908 *(pMacI[i]) = bswap4(T[i] ^ keyBlock);
909 }
910
911 #ifdef SAFE_DATA
912 /* Clear sensitive data (in registers and stack) */
913 clear_mem(keyStr, sizeof(keyStr));
914 clear_mem(&singlePktState, sizeof(singlePktState));
915 clear_mem(&state, sizeof(state));
916 clear_mem(&keys, sizeof(keys));
917 #endif
918 }
919
zuc_eia3_4_buffer_job_no_gfni_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],uint32_t * pMacI[NUM_SSE_BUFS],const uint16_t lengthInBits[NUM_SSE_BUFS],const void * const job_in_lane[NUM_SSE_BUFS])920 void zuc_eia3_4_buffer_job_no_gfni_sse(const void * const pKey[NUM_SSE_BUFS],
921 const void * const pIv[NUM_SSE_BUFS],
922 const void * const pBufferIn[NUM_SSE_BUFS],
923 uint32_t *pMacI[NUM_SSE_BUFS],
924 const uint16_t lengthInBits[NUM_SSE_BUFS],
925 const void * const job_in_lane[NUM_SSE_BUFS])
926 {
927 _zuc_eia3_4_buffer_job(pKey, pIv, pBufferIn, pMacI, lengthInBits,
928 job_in_lane, 0);
929 }
930
zuc_eia3_4_buffer_job_gfni_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],uint32_t * pMacI[NUM_SSE_BUFS],const uint16_t lengthInBits[NUM_SSE_BUFS],const void * const job_in_lane[NUM_SSE_BUFS])931 void zuc_eia3_4_buffer_job_gfni_sse(const void * const pKey[NUM_SSE_BUFS],
932 const void * const pIv[NUM_SSE_BUFS],
933 const void * const pBufferIn[NUM_SSE_BUFS],
934 uint32_t *pMacI[NUM_SSE_BUFS],
935 const uint16_t lengthInBits[NUM_SSE_BUFS],
936 const void * const job_in_lane[NUM_SSE_BUFS])
937 {
938 _zuc_eia3_4_buffer_job(pKey, pIv, pBufferIn, pMacI, lengthInBits,
939 job_in_lane, 1);
940 }
941
942 static inline
_zuc_eia3_n_buffer_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],const uint32_t lengthInBits[],uint32_t * pMacI[],const uint32_t numBuffers,const unsigned use_gfni)943 void _zuc_eia3_n_buffer_sse(const void * const pKey[],
944 const void * const pIv[],
945 const void * const pBufferIn[],
946 const uint32_t lengthInBits[],
947 uint32_t *pMacI[],
948 const uint32_t numBuffers,
949 const unsigned use_gfni)
950 {
951 #ifndef LINUX
952 DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
953
954 SAVE_XMMS(xmm_save);
955 #endif
956
957 unsigned int i;
958 unsigned int packetCount = numBuffers;
959
960 #ifdef SAFE_PARAM
961 /* Check for NULL pointers */
962 if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
963 lengthInBits == NULL || pMacI == NULL)
964 return;
965
966 for (i = 0; i < numBuffers; i++) {
967 if (pKey[i] == NULL || pIv[i] == NULL ||
968 pBufferIn[i] == NULL || pMacI[i] == NULL)
969 return;
970
971 /* Check input data is in range of supported length */
972 if (lengthInBits[i] < ZUC_MIN_BITLEN ||
973 lengthInBits[i] > ZUC_MAX_BITLEN)
974 return;
975 }
976 #endif
977 i = 0;
978
979 while(packetCount >= 4) {
980 packetCount -=4;
981 _zuc_eia3_4_buffer_sse(&pKey[i],
982 &pIv[i],
983 &pBufferIn[i],
984 &lengthInBits[i],
985 &pMacI[i],
986 use_gfni);
987 i+=4;
988 }
989
990 while(packetCount--) {
991 _zuc_eia3_1_buffer_sse(pKey[i],
992 pIv[i],
993 pBufferIn[i],
994 lengthInBits[i],
995 pMacI[i]);
996 i++;
997 }
998
999 #ifdef SAFE_DATA
1000 /* Clear sensitive data in registers */
1001 CLEAR_SCRATCH_GPS();
1002 CLEAR_SCRATCH_SIMD_REGS();
1003 #endif
1004 #ifndef LINUX
1005 RESTORE_XMMS(xmm_save);
1006 #endif
1007 }
1008
zuc_eia3_n_buffer_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],const uint32_t lengthInBits[],uint32_t * pMacI[],const uint32_t numBuffers)1009 void zuc_eia3_n_buffer_sse(const void * const pKey[],
1010 const void * const pIv[],
1011 const void * const pBufferIn[],
1012 const uint32_t lengthInBits[],
1013 uint32_t *pMacI[],
1014 const uint32_t numBuffers)
1015 {
1016 _zuc_eia3_n_buffer_sse(pKey, pIv, pBufferIn, lengthInBits,
1017 pMacI, numBuffers, 0);
1018 }
1019
zuc_eia3_n_buffer_gfni_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],const uint32_t lengthInBits[],uint32_t * pMacI[],const uint32_t numBuffers)1020 void zuc_eia3_n_buffer_gfni_sse(const void * const pKey[],
1021 const void * const pIv[],
1022 const void * const pBufferIn[],
1023 const uint32_t lengthInBits[],
1024 uint32_t *pMacI[],
1025 const uint32_t numBuffers)
1026 {
1027 _zuc_eia3_n_buffer_sse(pKey, pIv, pBufferIn, lengthInBits,
1028 pMacI, numBuffers, 1);
1029 }
1030