1 /*******************************************************************************
2   Copyright (c) 2009-2020, Intel Corporation
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6 
7       * Redistributions of source code must retain the above copyright notice,
8         this list of conditions and the following disclaimer.
9       * Redistributions in binary form must reproduce the above copyright
10         notice, this list of conditions and the following disclaimer in the
11         documentation and/or other materials provided with the distribution.
12       * Neither the name of Intel Corporation nor the names of its contributors
13         may be used to endorse or promote products derived from this software
14         without specific prior written permission.
15 
16   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *******************************************************************************/
27 
28 /*-----------------------------------------------------------------------
29 * zuc_sse.c
30 *-----------------------------------------------------------------------
31 * An implementation of ZUC, the core algorithm for the
32 * 3GPP Confidentiality and Integrity algorithms.
33 *
34 *-----------------------------------------------------------------------*/
35 
36 #include <string.h>
37 
38 #include "include/zuc_internal.h"
39 #include "include/wireless_common.h"
40 #include "include/save_xmms.h"
41 #include "include/clear_regs_mem.h"
42 #include "intel-ipsec-mb.h"
43 
44 #define SAVE_XMMS               save_xmms
45 #define RESTORE_XMMS            restore_xmms
46 #define CLEAR_SCRATCH_SIMD_REGS clear_scratch_xmms_sse
47 
48 #define NUM_SSE_BUFS 4
49 #define KEYSTR_ROUND_LEN 16
50 
51 static inline
_zuc_eea3_1_buffer_sse(const void * pKey,const void * pIv,const void * pBufferIn,void * pBufferOut,const uint32_t length)52 void _zuc_eea3_1_buffer_sse(const void *pKey,
53                             const void *pIv,
54                             const void *pBufferIn,
55                             void *pBufferOut,
56                             const uint32_t length)
57 {
58         DECLARE_ALIGNED(ZucState_t zucState, 16);
59         DECLARE_ALIGNED(uint8_t keyStream[KEYSTR_ROUND_LEN], 16);
60         const uint64_t *pIn64 = NULL;
61         uint64_t *pOut64 = NULL, *pKeyStream64 = NULL;
62         uint64_t *pTemp64 = NULL, *pdstTemp64 = NULL;
63 
64         uint32_t numKeyStreamsPerPkt = length/ KEYSTR_ROUND_LEN;
65         const uint32_t numBytesLeftOver = length % KEYSTR_ROUND_LEN;
66 
67         /* initialize the zuc state */
68         asm_ZucInitialization_sse(pKey, pIv, &(zucState));
69 
70         /* Loop Over all the Quad-Words in input buffer and XOR with the 64bits
71          * of generated keystream */
72         pOut64 = (uint64_t *) pBufferOut;
73         pIn64 = (const uint64_t *) pBufferIn;
74 
75         while (numKeyStreamsPerPkt--) {
76                 /* Generate the key stream 16 bytes at a time */
77                 asm_ZucGenKeystream16B_sse((uint32_t *) &keyStream[0],
78                                            &zucState);
79 
80                 /* XOR The Keystream generated with the input buffer here */
81                 pKeyStream64 = (uint64_t *) keyStream;
82                 asm_XorKeyStream16B_sse(pIn64, pOut64, pKeyStream64);
83                 pIn64 += 2;
84                 pOut64 += 2;
85         }
86 
87         /* Check for remaining 0 to 15 bytes */
88         if (numBytesLeftOver) {
89                 /* buffer to store 16 bytes of keystream */
90                 DECLARE_ALIGNED(uint8_t tempSrc[KEYSTR_ROUND_LEN], 16);
91                 DECLARE_ALIGNED(uint8_t tempDst[KEYSTR_ROUND_LEN], 16);
92                 const uint8_t *pIn8 = (const uint8_t *) pBufferIn;
93                 uint8_t *pOut8 = (uint8_t *) pBufferOut;
94                 const uint64_t num4BRounds = ((numBytesLeftOver - 1) / 4) + 1;
95 
96                 asm_ZucGenKeystream_sse((uint32_t *) &keyStream[0],
97                                         &zucState, num4BRounds);
98 
99                 /* copy the remaining bytes into temporary buffer and XOR with
100                  * the 64-bytes of keystream. Then copy on the valid bytes back
101                  * to the output buffer */
102 
103                 memcpy(&tempSrc[0], &pIn8[length - numBytesLeftOver],
104                        numBytesLeftOver);
105                 pKeyStream64 = (uint64_t *) &keyStream[0];
106                 pTemp64 = (uint64_t *) &tempSrc[0];
107                 pdstTemp64 = (uint64_t *) &tempDst[0];
108 
109                 asm_XorKeyStream16B_sse(pTemp64, pdstTemp64,
110                                         pKeyStream64);
111                 memcpy(&pOut8[length - numBytesLeftOver], &tempDst[0],
112                        numBytesLeftOver);
113 #ifdef SAFE_DATA
114                 clear_mem(tempSrc, sizeof(tempSrc));
115                 clear_mem(tempDst, sizeof(tempDst));
116 #endif
117 
118         }
119 #ifdef SAFE_DATA
120         /* Clear sensitive data in stack */
121         clear_mem(keyStream, sizeof(keyStream));
122         clear_mem(&zucState, sizeof(zucState));
123 #endif
124 }
125 
126 static inline
_zuc_eea3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],void * pBufferOut[NUM_SSE_BUFS],const uint32_t length[NUM_SSE_BUFS],const unsigned use_gfni)127 void _zuc_eea3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],
128                             const void * const pIv[NUM_SSE_BUFS],
129                             const void * const pBufferIn[NUM_SSE_BUFS],
130                             void *pBufferOut[NUM_SSE_BUFS],
131                             const uint32_t length[NUM_SSE_BUFS],
132                             const unsigned use_gfni)
133 {
134         DECLARE_ALIGNED(ZucState4_t state, 64);
135         DECLARE_ALIGNED(ZucState_t singlePktState, 64);
136         unsigned int i;
137         /* Calculate the minimum input packet size */
138         uint32_t bytes1 = (length[0] < length[1] ?
139                            length[0] : length[1]);
140         uint32_t bytes2 = (length[2] < length[3] ?
141                            length[2] : length[3]);
142         /* min number of bytes */
143         uint32_t bytes = (bytes1 < bytes2) ? bytes1 : bytes2;
144         uint32_t numKeyStreamsPerPkt;
145         uint16_t remainBytes[NUM_SSE_BUFS] = {0};
146         DECLARE_ALIGNED(uint8_t keyStr[NUM_SSE_BUFS][KEYSTR_ROUND_LEN], 64);
147         /* structure to store the 4 keys */
148         DECLARE_ALIGNED(ZucKey4_t keys, 64);
149         /* structure to store the 4 IV's */
150         DECLARE_ALIGNED(ZucIv4_t ivs, 64);
151         uint32_t numBytesLeftOver = 0;
152         const uint8_t *pTempBufInPtr = NULL;
153         uint8_t *pTempBufOutPtr = NULL;
154         const uint64_t *pIn64[NUM_SSE_BUFS]= {NULL};
155         uint64_t *pOut64[NUM_SSE_BUFS] = {NULL};
156         uint64_t *pKeyStream64 = NULL;
157 
158         /*
159          * Calculate the number of bytes left over for each packet,
160          * and setup the Keys and IVs
161          */
162         for (i = 0; i < NUM_SSE_BUFS; i++) {
163                 remainBytes[i] = length[i];
164                 keys.pKeys[i] = pKey[i];
165                 ivs.pIvs[i] = pIv[i];
166         }
167 
168         if (use_gfni)
169                 asm_ZucInitialization_4_gfni_sse(&keys, &ivs, &state);
170         else
171                 asm_ZucInitialization_4_sse(&keys, &ivs, &state);
172 
173         for (i = 0; i < NUM_SSE_BUFS; i++) {
174                 pOut64[i] = (uint64_t *) pBufferOut[i];
175                 pIn64[i] = (const uint64_t *) pBufferIn[i];
176         }
177 
178         /* Encrypt common length of all buffers */
179         if (use_gfni)
180                 asm_ZucCipher_4_gfni_sse(&state, pIn64, pOut64,
181                                              remainBytes, (uint16_t) bytes);
182         else
183                 asm_ZucCipher_4_sse(&state, pIn64, pOut64,
184                                         remainBytes, (uint16_t) bytes);
185 
186         /* process each packet separately for the remaining bytes */
187         for (i = 0; i < NUM_SSE_BUFS; i++) {
188                 if (remainBytes[i]) {
189                         /* need to copy the zuc state to single packet state */
190                         singlePktState.lfsrState[0] = state.lfsrState[0][i];
191                         singlePktState.lfsrState[1] = state.lfsrState[1][i];
192                         singlePktState.lfsrState[2] = state.lfsrState[2][i];
193                         singlePktState.lfsrState[3] = state.lfsrState[3][i];
194                         singlePktState.lfsrState[4] = state.lfsrState[4][i];
195                         singlePktState.lfsrState[5] = state.lfsrState[5][i];
196                         singlePktState.lfsrState[6] = state.lfsrState[6][i];
197                         singlePktState.lfsrState[7] = state.lfsrState[7][i];
198                         singlePktState.lfsrState[8] = state.lfsrState[8][i];
199                         singlePktState.lfsrState[9] = state.lfsrState[9][i];
200                         singlePktState.lfsrState[10] = state.lfsrState[10][i];
201                         singlePktState.lfsrState[11] = state.lfsrState[11][i];
202                         singlePktState.lfsrState[12] = state.lfsrState[12][i];
203                         singlePktState.lfsrState[13] = state.lfsrState[13][i];
204                         singlePktState.lfsrState[14] = state.lfsrState[14][i];
205                         singlePktState.lfsrState[15] = state.lfsrState[15][i];
206 
207                         singlePktState.fR1 = state.fR1[i];
208                         singlePktState.fR2 = state.fR2[i];
209 
210                         numKeyStreamsPerPkt = remainBytes[i] / KEYSTR_ROUND_LEN;
211                         numBytesLeftOver = remainBytes[i]  % KEYSTR_ROUND_LEN;
212 
213                         pTempBufInPtr = pBufferIn[i];
214                         pTempBufOutPtr = pBufferOut[i];
215 
216                         /* update the output and input pointers here to point
217                          * to the i'th buffers */
218                         pOut64[0] = (uint64_t *) &pTempBufOutPtr[length[i] -
219                                                                 remainBytes[i]];
220                         pIn64[0] = (const uint64_t *) &pTempBufInPtr[length[i] -
221                                                                 remainBytes[i]];
222 
223                         while (numKeyStreamsPerPkt--) {
224                                 /* Generate the key stream 16 bytes at a time */
225                                 asm_ZucGenKeystream16B_sse(
226                                                        (uint32_t *) keyStr[0],
227                                                        &singlePktState);
228                                 pKeyStream64 = (uint64_t *) keyStr[0];
229                                 asm_XorKeyStream16B_sse(pIn64[0],
230                                                         pOut64[0],
231                                                         pKeyStream64);
232                                 pIn64[0] += 2;
233                                 pOut64[0] += 2;
234                         }
235 
236                         /* Check for remaining 0 to 15 bytes */
237                         if (numBytesLeftOver) {
238                                 DECLARE_ALIGNED(uint8_t tempSrc[16], 64);
239                                 DECLARE_ALIGNED(uint8_t tempDst[16], 64);
240                                 uint64_t *pTempSrc64;
241                                 uint64_t *pTempDst64;
242                                 uint32_t offset = length[i] - numBytesLeftOver;
243                                 const uint64_t num4BRounds =
244                                         ((numBytesLeftOver - 1) / 4) + 1;
245 
246                                 asm_ZucGenKeystream_sse((uint32_t *)&keyStr[0],
247                                                         &singlePktState,
248                                                         num4BRounds);
249                                 /* copy the remaining bytes into temporary
250                                  * buffer and XOR with the 16 bytes of
251                                  * keystream. Then copy on the valid bytes back
252                                  * to the output buffer */
253                                 memcpy(&tempSrc[0], &pTempBufInPtr[offset],
254                                        numBytesLeftOver);
255                                 memset(&tempSrc[numBytesLeftOver], 0,
256                                        16 - numBytesLeftOver);
257 
258                                 pKeyStream64 = (uint64_t *) &keyStr[0][0];
259                                 pTempSrc64 = (uint64_t *) &tempSrc[0];
260                                 pTempDst64 = (uint64_t *) &tempDst[0];
261                                 asm_XorKeyStream16B_sse(pTempSrc64,
262                                                         pTempDst64,
263                                                         pKeyStream64);
264 
265                                 memcpy(&pTempBufOutPtr[offset],
266                                        &tempDst[0], numBytesLeftOver);
267 #ifdef SAFE_DATA
268                                 clear_mem(tempSrc, sizeof(tempSrc));
269                                 clear_mem(tempDst, sizeof(tempDst));
270 #endif
271                         }
272                 }
273         }
274 #ifdef SAFE_DATA
275         /* Clear sensitive data in stack */
276         clear_mem(keyStr, sizeof(keyStr));
277         clear_mem(&singlePktState, sizeof(singlePktState));
278         clear_mem(&state, sizeof(state));
279         clear_mem(&keys, sizeof(keys));
280 #endif
281 }
282 
zuc_eea3_1_buffer_sse(const void * pKey,const void * pIv,const void * pBufferIn,void * pBufferOut,const uint32_t length)283 void zuc_eea3_1_buffer_sse(const void *pKey,
284                            const void *pIv,
285                            const void *pBufferIn,
286                            void *pBufferOut,
287                            const uint32_t length)
288 {
289 #ifndef LINUX
290         DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
291 
292         SAVE_XMMS(xmm_save);
293 #endif
294 #ifdef SAFE_PARAM
295         /* Check for NULL pointers */
296         if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
297             pBufferOut == NULL)
298                 return;
299 
300         /* Check input data is in range of supported length */
301         if (length < ZUC_MIN_BYTELEN || length > ZUC_MAX_BYTELEN)
302                 return;
303 #endif
304 
305         _zuc_eea3_1_buffer_sse(pKey, pIv, pBufferIn, pBufferOut, length);
306 
307 #ifdef SAFE_DATA
308         /* Clear sensitive data in registers */
309         CLEAR_SCRATCH_GPS();
310         CLEAR_SCRATCH_SIMD_REGS();
311 #endif
312 #ifndef LINUX
313         RESTORE_XMMS(xmm_save);
314 #endif
315 }
316 
317 static inline
_zuc_eea3_4_buffer(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],void * pBufferOut[NUM_SSE_BUFS],const uint32_t length[NUM_SSE_BUFS],const unsigned use_gfni)318 void _zuc_eea3_4_buffer(const void * const pKey[NUM_SSE_BUFS],
319                         const void * const pIv[NUM_SSE_BUFS],
320                         const void * const pBufferIn[NUM_SSE_BUFS],
321                         void *pBufferOut[NUM_SSE_BUFS],
322                         const uint32_t length[NUM_SSE_BUFS],
323                         const unsigned use_gfni)
324 {
325 #ifndef LINUX
326         DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
327 
328         SAVE_XMMS(xmm_save);
329 #endif
330 #ifdef SAFE_PARAM
331         unsigned int i;
332 
333         /* Check for NULL pointers */
334         if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
335             pBufferOut == NULL || length == NULL)
336                 return;
337 
338         for (i = 0; i < NUM_SSE_BUFS; i++) {
339                 if (pKey[i] == NULL || pIv[i] == NULL ||
340                     pBufferIn[i] == NULL || pBufferOut[i] == NULL)
341                         return;
342 
343                 /* Check input data is in range of supported length */
344                 if (length[i] < ZUC_MIN_BYTELEN || length[i] > ZUC_MAX_BYTELEN)
345                         return;
346         }
347 #endif
348 
349         _zuc_eea3_4_buffer_sse(pKey, pIv, pBufferIn, pBufferOut, length,
350                                use_gfni);
351 
352 #ifdef SAFE_DATA
353         /* Clear sensitive data in registers */
354         CLEAR_SCRATCH_GPS();
355         CLEAR_SCRATCH_SIMD_REGS();
356 #endif
357 #ifndef LINUX
358         RESTORE_XMMS(xmm_save);
359 #endif
360 }
361 
zuc_eea3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],void * pBufferOut[NUM_SSE_BUFS],const uint32_t length[NUM_SSE_BUFS])362 void zuc_eea3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],
363                            const void * const pIv[NUM_SSE_BUFS],
364                            const void * const pBufferIn[NUM_SSE_BUFS],
365                            void *pBufferOut[NUM_SSE_BUFS],
366                            const uint32_t length[NUM_SSE_BUFS])
367 {
368         _zuc_eea3_4_buffer(pKey, pIv, pBufferIn, pBufferOut, length, 0);
369 }
370 
zuc_eea3_4_buffer_gfni_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],void * pBufferOut[NUM_SSE_BUFS],const uint32_t length[NUM_SSE_BUFS])371 void zuc_eea3_4_buffer_gfni_sse(const void * const pKey[NUM_SSE_BUFS],
372                                 const void * const pIv[NUM_SSE_BUFS],
373                                 const void * const pBufferIn[NUM_SSE_BUFS],
374                                 void *pBufferOut[NUM_SSE_BUFS],
375                                 const uint32_t length[NUM_SSE_BUFS])
376 {
377         _zuc_eea3_4_buffer(pKey, pIv, pBufferIn, pBufferOut, length, 1);
378 }
379 
380 static inline
_zuc_eea3_n_buffer(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],void * pBufferOut[],const uint32_t length[],const uint32_t numBuffers,const unsigned use_gfni)381 void _zuc_eea3_n_buffer(const void * const pKey[], const void * const pIv[],
382                         const void * const pBufferIn[], void *pBufferOut[],
383                         const uint32_t length[],
384                         const uint32_t numBuffers,
385                         const unsigned use_gfni)
386 {
387 #ifndef LINUX
388         DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
389 
390         SAVE_XMMS(xmm_save);
391 #endif
392 
393         unsigned int i;
394         unsigned int packetCount = numBuffers;
395 
396 #ifdef SAFE_PARAM
397         /* Check for NULL pointers */
398         if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
399             pBufferOut == NULL || length == NULL)
400                 return;
401 
402         for (i = 0; i < numBuffers; i++) {
403                 if (pKey[i] == NULL || pIv[i] == NULL ||
404                     pBufferIn[i] == NULL || pBufferOut[i] == NULL)
405                         return;
406 
407                 /* Check input data is in range of supported length */
408                 if (length[i] < ZUC_MIN_BYTELEN || length[i] > ZUC_MAX_BYTELEN)
409                         return;
410         }
411 #endif
412         i = 0;
413 
414         while (packetCount >= NUM_SSE_BUFS) {
415                 packetCount -= NUM_SSE_BUFS;
416                 _zuc_eea3_4_buffer(&pKey[i],
417                                    &pIv[i],
418                                    &pBufferIn[i],
419                                    &pBufferOut[i],
420                                    &length[i],
421                                    use_gfni);
422                 i += NUM_SSE_BUFS;
423         }
424 
425         while(packetCount--) {
426                 _zuc_eea3_1_buffer_sse(pKey[i],
427                                        pIv[i],
428                                        pBufferIn[i],
429                                        pBufferOut[i],
430                                        length[i]);
431                 i++;
432         }
433 
434 #ifdef SAFE_DATA
435         /* Clear sensitive data in registers */
436         CLEAR_SCRATCH_GPS();
437         CLEAR_SCRATCH_SIMD_REGS();
438 #endif
439 #ifndef LINUX
440         RESTORE_XMMS(xmm_save);
441 #endif
442 }
443 
zuc_eea3_n_buffer_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],void * pBufferOut[],const uint32_t length[],const uint32_t numBuffers)444 void zuc_eea3_n_buffer_sse(const void * const pKey[], const void * const pIv[],
445                            const void * const pBufferIn[], void *pBufferOut[],
446                            const uint32_t length[],
447                            const uint32_t numBuffers)
448 {
449         _zuc_eea3_n_buffer(pKey, pIv, pBufferIn, pBufferOut, length,
450                            numBuffers, 0);
451 }
452 
zuc_eea3_n_buffer_gfni_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],void * pBufferOut[],const uint32_t length[],const uint32_t numBuffers)453 void zuc_eea3_n_buffer_gfni_sse(const void * const pKey[],
454                                 const void * const pIv[],
455                                 const void * const pBufferIn[],
456                                 void *pBufferOut[],
457                                 const uint32_t length[],
458                                 const uint32_t numBuffers)
459 {
460         _zuc_eea3_n_buffer(pKey, pIv, pBufferIn, pBufferOut, length,
461                            numBuffers, 1);
462 }
463 
rotate_left(uint64_t u,size_t r)464 static inline uint64_t rotate_left(uint64_t u, size_t r)
465 {
466         return (((u) << (r)) | ((u) >> (64 - (r))));
467 }
468 
load_uint64(const void * ptr)469 static inline uint64_t load_uint64(const void *ptr)
470 {
471         return *((const uint64_t *)ptr);
472 }
473 
474 static inline
_zuc_eia3_1_buffer_sse(const void * pKey,const void * pIv,const void * pBufferIn,const uint32_t lengthInBits,uint32_t * pMacI)475 void _zuc_eia3_1_buffer_sse(const void *pKey,
476                             const void *pIv,
477                             const void *pBufferIn,
478                             const uint32_t lengthInBits,
479                             uint32_t *pMacI)
480 {
481         DECLARE_ALIGNED(ZucState_t zucState, 16);
482         DECLARE_ALIGNED(uint32_t keyStream[4 * 2], 64);
483         const uint32_t keyStreamLengthInBits = KEYSTR_ROUND_LEN * 8;
484         /* generate a key-stream 2 words longer than the input message */
485         const uint32_t N = lengthInBits + (2 * ZUC_WORD_BITS);
486         uint32_t L = (N + 31) / ZUC_WORD_BITS;
487         uint32_t *pZuc = (uint32_t *) &keyStream[0];
488         uint32_t remainingBits = lengthInBits;
489         uint32_t T = 0;
490         const uint8_t *pIn8 = (const uint8_t *) pBufferIn;
491 
492         asm_ZucInitialization_sse(pKey, pIv, &(zucState));
493         asm_ZucGenKeystream16B_sse(pZuc, &zucState);
494 
495         /* loop over the message bits */
496         while (remainingBits >= keyStreamLengthInBits) {
497                 remainingBits -=  keyStreamLengthInBits;
498                 L -= (keyStreamLengthInBits / 32);
499 
500                 /* Generate the next key stream 8 bytes or 16 bytes */
501                 if (!remainingBits)
502                         asm_ZucGenKeystream8B_sse(&keyStream[4], &zucState);
503                 else
504                         asm_ZucGenKeystream16B_sse(&keyStream[4], &zucState);
505                 T = asm_Eia3Round16BSSE(T, keyStream, pIn8);
506                 /* Copy the last keystream generated
507                  * to the first 16 bytes */
508                 memcpy(&keyStream[0], &keyStream[4], KEYSTR_ROUND_LEN);
509                 pIn8 = &pIn8[KEYSTR_ROUND_LEN];
510         }
511 
512         /*
513          * If remaining bits has more than 2 ZUC WORDS (double words),
514          * keystream needs to have up to another 2 ZUC WORDS (8B)
515          */
516         if (remainingBits > (2 * 32))
517                 asm_ZucGenKeystream8B_sse(&keyStream[4], &zucState);
518         T ^= asm_Eia3RemainderSSE(&keyStream[0], pIn8, remainingBits);
519         T ^= rotate_left(load_uint64(&keyStream[remainingBits / 32]),
520                          remainingBits % 32);
521 
522         /* save the final MAC-I result */
523         uint32_t keyBlock = keyStream[L - 1];
524         *pMacI = bswap4(T ^ keyBlock);
525 
526 #ifdef SAFE_DATA
527         /* Clear sensitive data (in registers and stack) */
528         clear_mem(keyStream, sizeof(keyStream));
529         clear_mem(&zucState, sizeof(zucState));
530 #endif
531 }
532 
533 static inline
_zuc_eia3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],const uint32_t lengthInBits[NUM_SSE_BUFS],uint32_t * pMacI[NUM_SSE_BUFS],const unsigned use_gfni)534 void _zuc_eia3_4_buffer_sse(const void * const pKey[NUM_SSE_BUFS],
535                             const void * const pIv[NUM_SSE_BUFS],
536                             const void * const pBufferIn[NUM_SSE_BUFS],
537                             const uint32_t lengthInBits[NUM_SSE_BUFS],
538                             uint32_t *pMacI[NUM_SSE_BUFS],
539                             const unsigned use_gfni)
540 {
541         unsigned int i;
542         DECLARE_ALIGNED(ZucState4_t state, 64);
543         DECLARE_ALIGNED(ZucState_t singlePktState, 64);
544         DECLARE_ALIGNED(uint8_t keyStr[NUM_SSE_BUFS][2*KEYSTR_ROUND_LEN], 64);
545         /* structure to store the 4 keys */
546         DECLARE_ALIGNED(ZucKey4_t keys, 64);
547         /* structure to store the 4 IV's */
548         DECLARE_ALIGNED(ZucIv4_t ivs, 64);
549         const uint8_t *pIn8[NUM_SSE_BUFS] = {NULL};
550         uint32_t remainCommonBits;
551         uint32_t numKeyStr = 0;
552         uint32_t T[NUM_SSE_BUFS] = {0};
553         const uint32_t keyStreamLengthInBits = KEYSTR_ROUND_LEN * 8;
554         DECLARE_ALIGNED(uint32_t *pKeyStrArr[NUM_SSE_BUFS], 16) = {NULL};
555         unsigned int allCommonBits;
556 
557         /* Check if all lengths are equal */
558         if ((lengthInBits[0] == lengthInBits[1]) &&
559             (lengthInBits[0] == lengthInBits[2]) &&
560             (lengthInBits[0] == lengthInBits[3])) {
561                 remainCommonBits = lengthInBits[0];
562                 allCommonBits = 1;
563         } else {
564                 /* Calculate the minimum input packet size */
565                 uint32_t bits1 = (lengthInBits[0] < lengthInBits[1] ?
566                                    lengthInBits[0] : lengthInBits[1]);
567                 uint32_t bits2 = (lengthInBits[2] < lengthInBits[3] ?
568                                    lengthInBits[2] : lengthInBits[3]);
569 
570                 remainCommonBits = (bits1 < bits2) ? bits1 : bits2;
571                 allCommonBits = 0;
572         }
573 
574         for (i = 0; i < NUM_SSE_BUFS; i++) {
575                 pIn8[i] = (const uint8_t *) pBufferIn[i];
576                 pKeyStrArr[i] = (uint32_t *) &keyStr[i][0];
577                 keys.pKeys[i] = pKey[i];
578                 ivs.pIvs[i] = pIv[i];
579         }
580 
581         if (use_gfni) {
582                 asm_ZucInitialization_4_gfni_sse(&keys, &ivs, &state);
583 
584                 /* Generate 16 bytes at a time */
585                 asm_ZucGenKeystream16B_4_gfni_sse(&state, pKeyStrArr);
586         } else {
587                 asm_ZucInitialization_4_sse(&keys, &ivs, &state);
588 
589                 /* Generate 16 bytes at a time */
590                 asm_ZucGenKeystream16B_4_sse(&state, pKeyStrArr);
591         }
592 
593         /* Point at the next 16 bytes of the key */
594         for (i = 0; i < NUM_SSE_BUFS; i++)
595                 pKeyStrArr[i] = (uint32_t *) &keyStr[i][KEYSTR_ROUND_LEN];
596 
597         /* loop over the message bits */
598         while (remainCommonBits >= keyStreamLengthInBits) {
599                 remainCommonBits -= keyStreamLengthInBits;
600                 numKeyStr++;
601                 /* Generate the next key stream 8 bytes or 16 bytes */
602                 if (use_gfni) {
603                         if (!remainCommonBits && allCommonBits)
604                                 asm_ZucGenKeystream8B_4_gfni_sse(&state,
605                                                                  pKeyStrArr);
606                         else
607                                 asm_ZucGenKeystream16B_4_gfni_sse(&state,
608                                                                   pKeyStrArr);
609                 } else {
610                         if (!remainCommonBits && allCommonBits)
611                                 asm_ZucGenKeystream8B_4_sse(&state, pKeyStrArr);
612                         else
613                                 asm_ZucGenKeystream16B_4_sse(&state,
614                                                              pKeyStrArr);
615                 }
616                 for (i = 0; i < NUM_SSE_BUFS; i++) {
617                         T[i] = asm_Eia3Round16BSSE(T[i], keyStr[i],
618                                                    pIn8[i]);
619                         /* Copy the last keystream generated
620                          * to the first 16 bytes */
621                         memcpy(&keyStr[i][0], &keyStr[i][KEYSTR_ROUND_LEN],
622                                KEYSTR_ROUND_LEN);
623                         pIn8[i] = &pIn8[i][KEYSTR_ROUND_LEN];
624                 }
625         }
626 
627         /* Process each packet separately for the remaining bits */
628         for (i = 0; i < NUM_SSE_BUFS; i++) {
629                 const uint32_t N = lengthInBits[i] + (2 * ZUC_WORD_BITS);
630                 uint32_t L = ((N + 31) / ZUC_WORD_BITS) -
631                              numKeyStr*(keyStreamLengthInBits / 32);
632                 uint32_t remainBits = lengthInBits[i] -
633                                       numKeyStr*keyStreamLengthInBits;
634                 uint32_t *keyStr32 = (uint32_t *) keyStr[i];
635 
636                 /* If remaining bits are more than 8 bytes, we need to generate
637                  * at least 8B more of keystream, so we need to copy
638                  * the zuc state to single packet state first */
639                 if (remainBits > (2*32)) {
640                         singlePktState.lfsrState[0] = state.lfsrState[0][i];
641                         singlePktState.lfsrState[1] = state.lfsrState[1][i];
642                         singlePktState.lfsrState[2] = state.lfsrState[2][i];
643                         singlePktState.lfsrState[3] = state.lfsrState[3][i];
644                         singlePktState.lfsrState[4] = state.lfsrState[4][i];
645                         singlePktState.lfsrState[5] = state.lfsrState[5][i];
646                         singlePktState.lfsrState[6] = state.lfsrState[6][i];
647                         singlePktState.lfsrState[7] = state.lfsrState[7][i];
648                         singlePktState.lfsrState[8] = state.lfsrState[8][i];
649                         singlePktState.lfsrState[9] = state.lfsrState[9][i];
650                         singlePktState.lfsrState[10] = state.lfsrState[10][i];
651                         singlePktState.lfsrState[11] = state.lfsrState[11][i];
652                         singlePktState.lfsrState[12] = state.lfsrState[12][i];
653                         singlePktState.lfsrState[13] = state.lfsrState[13][i];
654                         singlePktState.lfsrState[14] = state.lfsrState[14][i];
655                         singlePktState.lfsrState[15] = state.lfsrState[15][i];
656 
657                         singlePktState.fR1 = state.fR1[i];
658                         singlePktState.fR2 = state.fR2[i];
659                 }
660 
661                 while (remainBits >= keyStreamLengthInBits) {
662                         remainBits -= keyStreamLengthInBits;
663                         L -= (keyStreamLengthInBits / 32);
664 
665                         /* Generate the next key stream 8 bytes or 16 bytes */
666                         if (!remainBits)
667                                 asm_ZucGenKeystream8B_sse(&keyStr32[4],
668                                                           &singlePktState);
669                         else
670                                 asm_ZucGenKeystream16B_sse(&keyStr32[4],
671                                                            &singlePktState);
672                         T[i] = asm_Eia3Round16BSSE(T[i], keyStr32,
673                                                    pIn8[i]);
674                         /* Copy the last keystream generated
675                          * to the first 16 bytes */
676                         memcpy(keyStr32, &keyStr32[4], KEYSTR_ROUND_LEN);
677                         pIn8[i] = &pIn8[i][KEYSTR_ROUND_LEN];
678                 }
679 
680                 /*
681                  * If remaining bits has more than 2 ZUC WORDS (double words),
682                  * keystream needs to have up to another 2 ZUC WORDS (8B)
683                  */
684                 if (remainBits > (2 * 32))
685                         asm_ZucGenKeystream8B_sse(&keyStr32[4],
686                                                   &singlePktState);
687 
688                 uint32_t keyBlock = keyStr32[L - 1];
689 
690                 T[i] ^= asm_Eia3RemainderSSE(keyStr32, pIn8[i], remainBits);
691                 T[i] ^= rotate_left(load_uint64(&keyStr32[remainBits / 32]),
692                                  remainBits % 32);
693 
694                 /* save the final MAC-I result */
695                 *(pMacI[i]) = bswap4(T[i] ^ keyBlock);
696         }
697 
698 #ifdef SAFE_DATA
699         /* Clear sensitive data (in registers and stack) */
700         clear_mem(keyStr, sizeof(keyStr));
701         clear_mem(&singlePktState, sizeof(singlePktState));
702         clear_mem(&state, sizeof(state));
703         clear_mem(&keys, sizeof(keys));
704 #endif
705 }
706 
zuc_eia3_1_buffer_sse(const void * pKey,const void * pIv,const void * pBufferIn,const uint32_t lengthInBits,uint32_t * pMacI)707 void zuc_eia3_1_buffer_sse(const void *pKey,
708                            const void *pIv,
709                            const void *pBufferIn,
710                            const uint32_t lengthInBits,
711                            uint32_t *pMacI)
712 {
713 #ifndef LINUX
714         DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
715 
716         SAVE_XMMS(xmm_save);
717 #endif
718 #ifdef SAFE_PARAM
719         /* Check for NULL pointers */
720         if (pKey == NULL || pIv == NULL || pBufferIn == NULL || pMacI == NULL)
721                 return;
722 
723         /* Check input data is in range of supported length */
724         if (lengthInBits < ZUC_MIN_BITLEN || lengthInBits > ZUC_MAX_BITLEN)
725                 return;
726 #endif
727 
728         _zuc_eia3_1_buffer_sse(pKey, pIv, pBufferIn, lengthInBits, pMacI);
729 
730 #ifdef SAFE_DATA
731         CLEAR_SCRATCH_GPS();
732         CLEAR_SCRATCH_SIMD_REGS();
733 #endif
734 #ifndef LINUX
735         RESTORE_XMMS(xmm_save);
736 #endif
737 }
738 
739 static inline
_zuc_eia3_4_buffer_job(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],uint32_t * pMacI[NUM_SSE_BUFS],const uint16_t lengthInBits[NUM_SSE_BUFS],const void * const job_in_lane[NUM_SSE_BUFS],const unsigned use_gfni)740 void _zuc_eia3_4_buffer_job(const void * const pKey[NUM_SSE_BUFS],
741                             const void * const pIv[NUM_SSE_BUFS],
742                             const void * const pBufferIn[NUM_SSE_BUFS],
743                             uint32_t *pMacI[NUM_SSE_BUFS],
744                             const uint16_t lengthInBits[NUM_SSE_BUFS],
745                             const void * const job_in_lane[NUM_SSE_BUFS],
746                             const unsigned use_gfni)
747 {
748         unsigned int i;
749         DECLARE_ALIGNED(ZucState4_t state, 64);
750         DECLARE_ALIGNED(ZucState_t singlePktState, 64);
751         DECLARE_ALIGNED(uint8_t keyStr[NUM_SSE_BUFS][2*KEYSTR_ROUND_LEN], 64);
752         /* structure to store the 4 keys */
753         DECLARE_ALIGNED(ZucKey4_t keys, 64);
754         /* structure to store the 4 IV's */
755         DECLARE_ALIGNED(ZucIv4_t ivs, 64);
756         const uint8_t *pIn8[NUM_SSE_BUFS] = {NULL};
757         uint32_t remainCommonBits;
758         uint32_t numKeyStr = 0;
759         uint32_t T[NUM_SSE_BUFS] = {0};
760         const uint32_t keyStreamLengthInBits = KEYSTR_ROUND_LEN * 8;
761         DECLARE_ALIGNED(uint32_t *pKeyStrArr[NUM_SSE_BUFS], 16) = {NULL};
762         unsigned int allCommonBits;
763 
764         /* Check if all lengths are equal */
765         if ((lengthInBits[0] == lengthInBits[1]) &&
766             (lengthInBits[0] == lengthInBits[2]) &&
767             (lengthInBits[0] == lengthInBits[3])) {
768                 remainCommonBits = lengthInBits[0];
769                 allCommonBits = 1;
770         } else {
771                 /* Calculate the minimum input packet size */
772                 uint32_t bits1 = (lengthInBits[0] < lengthInBits[1] ?
773                                    lengthInBits[0] : lengthInBits[1]);
774                 uint32_t bits2 = (lengthInBits[2] < lengthInBits[3] ?
775                                    lengthInBits[2] : lengthInBits[3]);
776 
777                 remainCommonBits = (bits1 < bits2) ? bits1 : bits2;
778                 allCommonBits = 0;
779         }
780 
781         for (i = 0; i < NUM_SSE_BUFS; i++) {
782                 pIn8[i] = (const uint8_t *) pBufferIn[i];
783                 pKeyStrArr[i] = (uint32_t *) &keyStr[i][0];
784                 keys.pKeys[i] = pKey[i];
785                 ivs.pIvs[i] = pIv[i];
786         }
787 
788         if (use_gfni) {
789                 asm_ZucInitialization_4_gfni_sse(&keys, &ivs, &state);
790 
791                 /* Generate 16 bytes at a time */
792                 asm_ZucGenKeystream16B_4_gfni_sse(&state, pKeyStrArr);
793         } else {
794                 asm_ZucInitialization_4_sse(&keys,  &ivs, &state);
795 
796                 /* Generate 16 bytes at a time */
797                 asm_ZucGenKeystream16B_4_sse(&state, pKeyStrArr);
798         }
799 
800         /* Point at the next 16 bytes of the key */
801         for (i = 0; i < NUM_SSE_BUFS; i++)
802                 pKeyStrArr[i] = (uint32_t *) &keyStr[i][KEYSTR_ROUND_LEN];
803 
804         /* loop over the message bits */
805         while (remainCommonBits >= keyStreamLengthInBits) {
806                 remainCommonBits -= keyStreamLengthInBits;
807                 numKeyStr++;
808                 /* Generate the next key stream 8 bytes or 16 bytes */
809                 if (use_gfni) {
810                         if (!remainCommonBits && allCommonBits)
811                                 asm_ZucGenKeystream8B_4_gfni_sse(&state,
812                                                                  pKeyStrArr);
813                         else
814                                 asm_ZucGenKeystream16B_4_gfni_sse(&state,
815                                                                   pKeyStrArr);
816                 } else {
817                         if (!remainCommonBits && allCommonBits)
818                                 asm_ZucGenKeystream8B_4_sse(&state,
819                                                             pKeyStrArr);
820                         else
821                                 asm_ZucGenKeystream16B_4_sse(&state,
822                                                              pKeyStrArr);
823                 }
824                 for (i = 0; i < NUM_SSE_BUFS; i++) {
825                         if (job_in_lane[i] == NULL)
826                                 continue;
827                         T[i] = asm_Eia3Round16BSSE(T[i], keyStr[i],
828                                                    pIn8[i]);
829                         /* Copy the last keystream generated
830                          * to the first 16 bytes */
831                         memcpy(&keyStr[i][0], &keyStr[i][KEYSTR_ROUND_LEN],
832                                KEYSTR_ROUND_LEN);
833                         pIn8[i] = &pIn8[i][KEYSTR_ROUND_LEN];
834                 }
835         }
836 
837         /* Process each packet separately for the remaining bits */
838         for (i = 0; i < NUM_SSE_BUFS; i++) {
839                 if (job_in_lane[i] == NULL)
840                         continue;
841 
842                 const uint32_t N = lengthInBits[i] + (2 * ZUC_WORD_BITS);
843                 uint32_t L = ((N + 31) / ZUC_WORD_BITS) -
844                              numKeyStr*(keyStreamLengthInBits / 32);
845                 uint32_t remainBits = lengthInBits[i] -
846                                       numKeyStr*keyStreamLengthInBits;
847                 uint32_t *keyStr32 = (uint32_t *) keyStr[i];
848 
849                 /* If remaining bits are more than 8 bytes, we need to generate
850                  * at least 8B more of keystream, so we need to copy
851                  * the zuc state to single packet state first */
852                 if (remainBits > (2*32)) {
853                         singlePktState.lfsrState[0] = state.lfsrState[0][i];
854                         singlePktState.lfsrState[1] = state.lfsrState[1][i];
855                         singlePktState.lfsrState[2] = state.lfsrState[2][i];
856                         singlePktState.lfsrState[3] = state.lfsrState[3][i];
857                         singlePktState.lfsrState[4] = state.lfsrState[4][i];
858                         singlePktState.lfsrState[5] = state.lfsrState[5][i];
859                         singlePktState.lfsrState[6] = state.lfsrState[6][i];
860                         singlePktState.lfsrState[7] = state.lfsrState[7][i];
861                         singlePktState.lfsrState[8] = state.lfsrState[8][i];
862                         singlePktState.lfsrState[9] = state.lfsrState[9][i];
863                         singlePktState.lfsrState[10] = state.lfsrState[10][i];
864                         singlePktState.lfsrState[11] = state.lfsrState[11][i];
865                         singlePktState.lfsrState[12] = state.lfsrState[12][i];
866                         singlePktState.lfsrState[13] = state.lfsrState[13][i];
867                         singlePktState.lfsrState[14] = state.lfsrState[14][i];
868                         singlePktState.lfsrState[15] = state.lfsrState[15][i];
869 
870                         singlePktState.fR1 = state.fR1[i];
871                         singlePktState.fR2 = state.fR2[i];
872                 }
873 
874                 while (remainBits >= keyStreamLengthInBits) {
875                         remainBits -= keyStreamLengthInBits;
876                         L -= (keyStreamLengthInBits / 32);
877 
878                         /* Generate the next key stream 8 bytes or 16 bytes */
879                         if (!remainBits)
880                                 asm_ZucGenKeystream8B_sse(&keyStr32[4],
881                                                           &singlePktState);
882                         else
883                                 asm_ZucGenKeystream16B_sse(&keyStr32[4],
884                                                            &singlePktState);
885                         T[i] = asm_Eia3Round16BSSE(T[i], keyStr32,
886                                                    pIn8[i]);
887                         /* Copy the last keystream generated
888                          * to the first 16 bytes */
889                         memcpy(keyStr32, &keyStr32[4], KEYSTR_ROUND_LEN);
890                         pIn8[i] = &pIn8[i][KEYSTR_ROUND_LEN];
891                 }
892 
893                 /*
894                  * If remaining bits has more than 2 ZUC WORDS (double words),
895                  * keystream needs to have up to another 2 ZUC WORDS (8B)
896                  */
897                 if (remainBits > (2 * 32))
898                         asm_ZucGenKeystream8B_sse(&keyStr32[4],
899                                                   &singlePktState);
900 
901                 uint32_t keyBlock = keyStr32[L - 1];
902 
903                 T[i] ^= asm_Eia3RemainderSSE(keyStr32, pIn8[i], remainBits);
904                 T[i] ^= rotate_left(load_uint64(&keyStr32[remainBits / 32]),
905                                  remainBits % 32);
906 
907                 /* save the final MAC-I result */
908                 *(pMacI[i]) = bswap4(T[i] ^ keyBlock);
909         }
910 
911 #ifdef SAFE_DATA
912         /* Clear sensitive data (in registers and stack) */
913         clear_mem(keyStr, sizeof(keyStr));
914         clear_mem(&singlePktState, sizeof(singlePktState));
915         clear_mem(&state, sizeof(state));
916         clear_mem(&keys, sizeof(keys));
917 #endif
918 }
919 
zuc_eia3_4_buffer_job_no_gfni_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],uint32_t * pMacI[NUM_SSE_BUFS],const uint16_t lengthInBits[NUM_SSE_BUFS],const void * const job_in_lane[NUM_SSE_BUFS])920 void zuc_eia3_4_buffer_job_no_gfni_sse(const void * const pKey[NUM_SSE_BUFS],
921                                   const void * const pIv[NUM_SSE_BUFS],
922                                   const void * const pBufferIn[NUM_SSE_BUFS],
923                                   uint32_t *pMacI[NUM_SSE_BUFS],
924                                   const uint16_t lengthInBits[NUM_SSE_BUFS],
925                                   const void * const job_in_lane[NUM_SSE_BUFS])
926 {
927         _zuc_eia3_4_buffer_job(pKey, pIv, pBufferIn, pMacI, lengthInBits,
928                                job_in_lane, 0);
929 }
930 
zuc_eia3_4_buffer_job_gfni_sse(const void * const pKey[NUM_SSE_BUFS],const void * const pIv[NUM_SSE_BUFS],const void * const pBufferIn[NUM_SSE_BUFS],uint32_t * pMacI[NUM_SSE_BUFS],const uint16_t lengthInBits[NUM_SSE_BUFS],const void * const job_in_lane[NUM_SSE_BUFS])931 void zuc_eia3_4_buffer_job_gfni_sse(const void * const pKey[NUM_SSE_BUFS],
932                                   const void * const pIv[NUM_SSE_BUFS],
933                                   const void * const pBufferIn[NUM_SSE_BUFS],
934                                   uint32_t *pMacI[NUM_SSE_BUFS],
935                                   const uint16_t lengthInBits[NUM_SSE_BUFS],
936                                   const void * const job_in_lane[NUM_SSE_BUFS])
937 {
938         _zuc_eia3_4_buffer_job(pKey, pIv, pBufferIn, pMacI, lengthInBits,
939                                job_in_lane, 1);
940 }
941 
942 static inline
_zuc_eia3_n_buffer_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],const uint32_t lengthInBits[],uint32_t * pMacI[],const uint32_t numBuffers,const unsigned use_gfni)943 void _zuc_eia3_n_buffer_sse(const void * const pKey[],
944                             const void * const pIv[],
945                             const void * const pBufferIn[],
946                             const uint32_t lengthInBits[],
947                             uint32_t *pMacI[],
948                             const uint32_t numBuffers,
949                             const unsigned use_gfni)
950 {
951 #ifndef LINUX
952         DECLARE_ALIGNED(imb_uint128_t xmm_save[10], 16);
953 
954         SAVE_XMMS(xmm_save);
955 #endif
956 
957         unsigned int i;
958         unsigned int packetCount = numBuffers;
959 
960 #ifdef SAFE_PARAM
961         /* Check for NULL pointers */
962         if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
963             lengthInBits == NULL || pMacI == NULL)
964                 return;
965 
966         for (i = 0; i < numBuffers; i++) {
967                 if (pKey[i] == NULL || pIv[i] == NULL ||
968                     pBufferIn[i] == NULL || pMacI[i] == NULL)
969                         return;
970 
971                 /* Check input data is in range of supported length */
972                 if (lengthInBits[i] < ZUC_MIN_BITLEN ||
973                     lengthInBits[i] > ZUC_MAX_BITLEN)
974                         return;
975         }
976 #endif
977         i = 0;
978 
979         while(packetCount >= 4) {
980                 packetCount -=4;
981                 _zuc_eia3_4_buffer_sse(&pKey[i],
982                                        &pIv[i],
983                                        &pBufferIn[i],
984                                        &lengthInBits[i],
985                                        &pMacI[i],
986                                        use_gfni);
987                 i+=4;
988         }
989 
990         while(packetCount--) {
991                 _zuc_eia3_1_buffer_sse(pKey[i],
992                                        pIv[i],
993                                        pBufferIn[i],
994                                        lengthInBits[i],
995                                        pMacI[i]);
996                 i++;
997         }
998 
999 #ifdef SAFE_DATA
1000         /* Clear sensitive data in registers */
1001         CLEAR_SCRATCH_GPS();
1002         CLEAR_SCRATCH_SIMD_REGS();
1003 #endif
1004 #ifndef LINUX
1005         RESTORE_XMMS(xmm_save);
1006 #endif
1007 }
1008 
zuc_eia3_n_buffer_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],const uint32_t lengthInBits[],uint32_t * pMacI[],const uint32_t numBuffers)1009 void zuc_eia3_n_buffer_sse(const void * const pKey[],
1010                            const void * const pIv[],
1011                            const void * const pBufferIn[],
1012                            const uint32_t lengthInBits[],
1013                            uint32_t *pMacI[],
1014                            const uint32_t numBuffers)
1015 {
1016        _zuc_eia3_n_buffer_sse(pKey, pIv, pBufferIn, lengthInBits,
1017                               pMacI, numBuffers, 0);
1018 }
1019 
zuc_eia3_n_buffer_gfni_sse(const void * const pKey[],const void * const pIv[],const void * const pBufferIn[],const uint32_t lengthInBits[],uint32_t * pMacI[],const uint32_t numBuffers)1020 void zuc_eia3_n_buffer_gfni_sse(const void * const pKey[],
1021                                 const void * const pIv[],
1022                                 const void * const pBufferIn[],
1023                                 const uint32_t lengthInBits[],
1024                                 uint32_t *pMacI[],
1025                                 const uint32_t numBuffers)
1026 {
1027        _zuc_eia3_n_buffer_sse(pKey, pIv, pBufferIn, lengthInBits,
1028                               pMacI, numBuffers, 1);
1029 }
1030