1 /*******************************************************************************
2   Copyright (c) 2009-2020, Intel Corporation
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6 
7       * Redistributions of source code must retain the above copyright notice,
8         this list of conditions and the following disclaimer.
9       * Redistributions in binary form must reproduce the above copyright
10         notice, this list of conditions and the following disclaimer in the
11         documentation and/or other materials provided with the distribution.
12       * Neither the name of Intel Corporation nor the names of its contributors
13         may be used to endorse or promote products derived from this software
14         without specific prior written permission.
15 
16   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *******************************************************************************/
27 
28 /**
29  ******************************************************************************
30  * @file zuc_internal.h
31  *
32  * @description
33  *      This header file defines the internal API's and data types for the
34  *      3GPP algorithm ZUC.
35  *
36  *****************************************************************************/
37 
38 #ifndef ZUC_INTERNAL_H_
39 #define ZUC_INTERNAL_H_
40 
41 #include <stdio.h>
42 #include <stdint.h>
43 
44 #include "include/ipsec_ooo_mgr.h"
45 #include "intel-ipsec-mb.h"
46 #include "immintrin.h"
47 #include "include/wireless_common.h"
48 
49 /* 64 bytes of Keystream will be generated */
50 #define ZUC_KEYSTR_LEN                      (64)
51 #define NUM_LFSR_STATES                     (16)
52 #define ZUC_WORD_BITS                       (32)
53 #define ZUC_WORD_BYTES                      (ZUC_WORD_BITS / 8)
54 
55 /* Range of input data for ZUC is from 1 to 65504 bits */
56 #define ZUC_MIN_BITLEN     1
57 #define ZUC_MAX_BITLEN     65504
58 #define ZUC_MIN_BYTELEN    1
59 #define ZUC_MAX_BYTELEN    (ZUC_MAX_BITLEN / 8)
60 
61 #ifdef DEBUG
62 #ifdef _WIN32
63 #define DEBUG_PRINT(_fmt, ...) \
64         fprintf(stderr, "%s()::%d " _fmt , __FUNCTION__, __LINE__, __VA_ARGS__)
65 #else
66 #define DEBUG_PRINT(_fmt, ...) \
67         fprintf(stderr, "%s()::%d " _fmt , __func__, __LINE__, __VA_ARGS__)
68 #endif
69 #else
70 #define DEBUG_PRINT(_fmt, ...)
71 #endif
72 
73 /**
74  ******************************************************************************
75  * @description
76  *      Macro will loop through keystream of length 64bytes and xor with the
77  *      input buffer placing the result in the output buffer.
78  *      KeyStream bytes must be swapped on 32bit boundary before this operation
79  *
80  *****************************************************************************/
81 #define ZUC_XOR_KEYSTREAM(pIn64, pOut64, pKeyStream64)		\
82 {									\
83 	int i =0;							\
84 	union SwapBytes_t {						\
85 		uint64_t l64;						\
86 		uint32_t w32[2];					\
87 	}swapBytes;							\
88 	/* loop through the key stream and xor 64 bits at a time */	\
89 	for(i =0; i < ZUC_KEYSTR_LEN/8; i++) {				\
90 		swapBytes.l64 = *pKeyStream64++;			\
91 		swapBytes.w32[0] = bswap4(swapBytes.w32[0]); \
92 		swapBytes.w32[1] = bswap4(swapBytes.w32[1]); \
93 		*pOut64++ = *pIn64++ ^ swapBytes.l64;			\
94 	}								\
95 }
96 
97 /**
98  *****************************************************************************
99  * @description
100  *      Packed structure to store the ZUC state for 4 packets. *
101  *****************************************************************************/
102 typedef struct zuc_state_4_s {
103     uint32_t lfsrState[16][4];
104     /**< State registers of the LFSR */
105     uint32_t fR1[4];
106     /**< register of F */
107     uint32_t fR2[4];
108     /**< register of F */
109     uint32_t bX0[4];
110     /**< Output X0 of the bit reorganization for 4 packets */
111     uint32_t bX1[4];
112     /**< Output X1 of the bit reorganization for 4 packets */
113     uint32_t bX2[4];
114     /**< Output X2 of the bit reorganization for 4 packets */
115     uint32_t bX3[4];
116     /**< Output X3 of the bit reorganization for 4 packets */
117 } ZucState4_t;
118 
119 /**
120  *****************************************************************************
121  * @description
122  *      Packed structure to store the ZUC state for 8 packets. *
123  *****************************************************************************/
124 typedef struct zuc_state_8_s {
125     uint32_t lfsrState[16][8];
126     /**< State registers of the LFSR */
127     uint32_t fR1[8];
128     /**< register of F */
129     uint32_t fR2[8];
130     /**< register of F */
131     uint32_t bX0[8];
132     /**< Output X0 of the bit reorganization for 8 packets */
133     uint32_t bX1[8];
134     /**< Output X1 of the bit reorganization for 8 packets */
135     uint32_t bX2[8];
136     /**< Output X2 of the bit reorganization for 8 packets */
137     uint32_t bX3[8];
138     /**< Output X3 of the bit reorganization for 8 packets */
139 } ZucState8_t;
140 
141 /**
142  *****************************************************************************
143  * @description
144  *      Packed structure to store the ZUC state for a single packet. *
145  *****************************************************************************/
146 typedef struct zuc_state_s {
147     uint32_t lfsrState[16];
148     /**< State registers of the LFSR */
149     uint32_t fR1;
150     /**< register of F */
151     uint32_t fR2;
152     /**< register of F */
153     uint32_t bX0;
154     /**< Output X0 of the bit reorganization */
155     uint32_t bX1;
156     /**< Output X1 of the bit reorganization */
157     uint32_t bX2;
158     /**< Output X2 of the bit reorganization */
159     uint32_t bX3;
160     /**< Output X3 of the bit reorganization */
161 } ZucState_t;
162 
163 /**
164  *****************************************************************************
165  * @description
166  *      Structure to store pointers to the 4 keys to be used as input to
167  *      @ref asm_ZucInitialization_4 and @ref asm_ZucGenKeystream64B_4
168  *****************************************************************************/
169 typedef struct zuc_key_4_s {
170     const uint8_t *pKeys[4];
171     /**< Array of pointers to 128-bit keys for the 4 packets */
172 } ZucKey4_t;
173 
174 /**
175  *****************************************************************************
176  * @description
177  *      Structure to store pointers to the 4 IV's to be used as input to
178  *      @ref asm_ZucInitialization_4 and @ref asm_ZucGenKeystream64B_4
179  *****************************************************************************/
180 typedef struct zuc_iv_4_s {
181     const uint8_t *pIvs[4];
182     /**< Array of pointers to 128-bit IV's for the 4 packets */
183 } ZucIv4_t;
184 
185 /**
186  *****************************************************************************
187  * @description
188  *      Structure to store pointers to the 8 keys to be used as input to
189  *      @ref asm_ZucInitialization_8 and @ref asm_ZucGenKeystream64B_8
190  *****************************************************************************/
191 typedef struct zuc_key_8_s {
192     const uint8_t *pKeys[8];
193     /**< Array of pointers to 128-bit keys for the 8 packets */
194 } ZucKey8_t;
195 
196 /**
197  *****************************************************************************
198  * @description
199  *      Structure to store pointers to the 8 IV's to be used as input to
200  *      @ref asm_ZucInitialization_8 and @ref asm_ZucGenKeystream64B_8
201  *****************************************************************************/
202 typedef struct zuc_iv_8_s {
203     const uint8_t *pIvs[8];
204     /**< Array of pointers to 128-bit IV's for the 8 packets */
205 } ZucIv8_t;
206 
207 /**
208  *****************************************************************************
209  * @description
210  *      Structure to store pointers to the 16 keys to be used as input to
211  *      @ref asm_ZucInitialization_16 and @ref asm_ZucGenKeystream64B_16
212  *****************************************************************************/
213 typedef struct zuc_key_16_s {
214     const uint8_t *pKeys[16];
215     /**< Array of pointers to 128-bit keys for the 16 packets */
216 } ZucKey16_t;
217 
218 /**
219  *****************************************************************************
220  * @description
221  *      Structure to store pointers to the 16 IV's to be used as input to
222  *      @ref asm_ZucInitialization_16 and @ref asm_ZucGenKeystream64B_16
223  *****************************************************************************/
224 typedef struct zuc_iv_16_s {
225     const uint8_t *pIvs[16];
226     /**< Array of pointers to 128-bit IV's for the 16 packets */
227 } ZucIv16_t;
228 
229 /**
230  ******************************************************************************
231  *
232  * @description
233  *      Definition of the external function that implements the initialization
234  *      stage of the ZUC algorithm. The function will initialize the state
235  *      for a single packet operation.
236  *
237  * @param[in] pKey                  Pointer to the 128-bit initial key that
238  *                                  will be used when initializing the ZUC
239  *                                  state.
240  * @param[in] pIv                   Pointer to the 128-bit initial vector that
241  *                                  will be used when initializing the ZUC
242  *                                  state.
243  * @param[in,out] pState            Pointer to a ZUC state structure of type
244  *                                  @ref ZucState_t that will be populated
245  *                                  with the initialized ZUC state.
246  *
247  * @pre
248  *      None
249  *
250  *****************************************************************************/
251 IMB_DLL_LOCAL void asm_ZucInitialization_sse(const void *pKey,
252                                              const void *pIv,
253                                              ZucState_t *pState);
254 
255 IMB_DLL_LOCAL void asm_ZucInitialization_sse_no_aesni(const void *pKey,
256                                                       const void *pIv,
257                                                       ZucState_t *pState);
258 
259 IMB_DLL_LOCAL void asm_ZucInitialization_avx(const void *pKey,
260                                              const void *pIv,
261                                              ZucState_t *pState);
262 
263 /**
264  ******************************************************************************
265  * @description
266  *      Definition of the external function that implements the initialization
267  *      stage of the ZUC algorithm for 4 packets. The function will initialize
268  *      the state for 4 individual packets.
269  *
270  * @param[in] pKey                  Pointer to an array of 128-bit initial keys
271  *                                  that will be used when initializing the ZUC
272  *                                  state.
273  * @param[in] pIv                   Pointer to an array of 128-bit initial
274  *                                  vectors that will be used when initializing
275  *                                  the ZUC state.
276  * @param[in,out] pState            Pointer to a ZUC state structure of type
277  *                                  @ref ZucState4_t that will be populated
278  *                                  with the initialized ZUC state.
279  *
280  * @pre
281  *      None
282  *
283  *****************************************************************************/
284 IMB_DLL_LOCAL void asm_ZucInitialization_4_sse(ZucKey4_t *pKeys,
285                                                ZucIv4_t *pIvs,
286                                                ZucState4_t *pState);
287 
288 IMB_DLL_LOCAL void asm_ZucInitialization_4_sse_no_aesni(ZucKey4_t *pKeys,
289                                                         ZucIv4_t *pIvs,
290                                                         ZucState4_t *pState);
291 
292 IMB_DLL_LOCAL void asm_ZucInitialization_4_gfni_sse(ZucKey4_t *pKeys,
293                                                     ZucIv4_t *pIvs,
294                                                     ZucState4_t *pState);
295 
296 IMB_DLL_LOCAL void asm_ZucInitialization_4_avx(ZucKey4_t *pKeys,
297                                                ZucIv4_t *pIvs,
298                                                ZucState4_t *pState);
299 
300 /**
301  ******************************************************************************
302  * @description
303  *      Definition of the external function that implements the initialization
304  *      stage of the ZUC algorithm for 8 packets. The function will initialize
305  *      the state for 8 individual packets.
306  *
307  * @param[in] pKey                  Pointer to an array of 128-bit initial keys
308  *                                  that will be used when initializing the ZUC
309  *                                  state.
310  * @param[in] pIv                   Pointer to an array of 128-bit initial
311  *                                  vectors that will be used when initializing
312  *                                  the ZUC state.
313  * @param[in,out] pState            Pointer to a ZUC state structure of type
314  *                                  @ref ZucState8_t that will be populated
315  *                                  with the initialized ZUC state.
316  *
317  * @pre
318  *      None
319  *
320  *****************************************************************************/
321 IMB_DLL_LOCAL void asm_ZucInitialization_8_avx2(ZucKey8_t *pKeys,
322                                                 ZucIv8_t *pIvs,
323                                                 ZucState8_t *pState);
324 
325 /**
326  ******************************************************************************
327  * @description
328  *      Definition of the external function that implements the initialization
329  *      stage of the ZUC algorithm for 16 packets. The function will initialize
330  *      the state for 16 individual packets.
331  *
332  * @param[in] pKey                  Pointer to an array of 128-bit initial keys
333  *                                  that will be used when initializing the ZUC
334  *                                  state.
335  * @param[in] pIv                   Pointer to an array of 128-bit initial
336  *                                  vectors that will be used when initializing
337  *                                  the ZUC state.
338  * @param[in,out] pState            Pointer to a ZUC state structure of type
339  *                                  @ref ZucState16_t that will be populated
340  *                                  with the initialized ZUC state.
341  *
342  * @pre
343  *      None
344  *
345  *****************************************************************************/
346 IMB_DLL_LOCAL void asm_ZucInitialization_16_avx512(ZucKey16_t *pKeys,
347                                                    ZucIv16_t *pIvs,
348                                                    ZucState16_t *pState,
349                                                    const uint16_t lane_mask);
350 
351 IMB_DLL_LOCAL void asm_ZucInitialization_16_gfni_avx512(ZucKey16_t *pKeys,
352                                                       ZucIv16_t *pIvs,
353                                                       ZucState16_t *pState,
354                                                       const uint16_t lane_mask);
355 
356 /**
357  ******************************************************************************
358  *
359  * @description
360  *      Definition of the external function that implements the working
361  *      stage of the ZUC algorithm. The function will generate 64 bytes of
362  *      keystream.
363  *
364  * @param[in,out] pKeystream        Pointer to an input buffer that will
365  *                                  contain the generated keystream.
366 
367  * @param[in] pState                Pointer to a ZUC state structure of type
368  *                                  @ref ZucState_t
369  *
370  * @pre
371  *      A successful call to @ref asm_ZucInitialization to initialize the ZUC
372  *      state.
373  *
374  *****************************************************************************/
375 IMB_DLL_LOCAL void asm_ZucGenKeystream64B_avx(uint32_t *pKeystream,
376                                               ZucState_t *pState);
377 
378 /**
379  ******************************************************************************
380  *
381  * @description
382  *      Definition of the external function that implements the working
383  *      stage of the ZUC algorithm. The function will generate 32 bytes of
384  *      keystream.
385  *
386  * @param[in,out] pKeystream        Pointer to an input buffer that will
387  *                                  contain the generated keystream.
388 
389  * @param[in] pState                Pointer to a ZUC state structure of type
390  *                                  @ref ZucState_t
391  *
392  * @pre
393  *      A successful call to @ref asm_ZucInitialization to initialize the ZUC
394  *      state.
395  *
396  *****************************************************************************/
397 IMB_DLL_LOCAL void asm_ZucGenKeystream32B_avx(uint32_t *pKeystream,
398                                               ZucState_t *pState);
399 
400 /**
401  ******************************************************************************
402  *
403  * @description
404  *      Definition of the external function that implements the working
405  *      stage of the ZUC algorithm. The function will generate 16 bytes of
406  *      keystream.
407  *
408  * @param[in,out] pKeystream        Pointer to an input buffer that will
409  *                                  contain the generated keystream.
410 
411  * @param[in] pState                Pointer to a ZUC state structure of type
412  *                                  @ref ZucState_t
413  *
414  * @pre
415  *      A successful call to @ref asm_ZucInitialization to initialize the ZUC
416  *      state.
417  *
418  *****************************************************************************/
419 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_avx(uint32_t *pKeystream,
420                                               ZucState_t *pState);
421 
422 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_sse(uint32_t *pKeystream,
423                                               ZucState_t *pState);
424 
425 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_sse_no_aesni(uint32_t *pKeystream,
426                                                        ZucState_t *pState);
427 
428 /**
429  ******************************************************************************
430  *
431  * @description
432  *      Definition of the external function that implements the working
433  *      stage of the ZUC algorithm. The function will generate 8 bytes of
434  *      keystream.
435  *
436  * @param[in,out] pKeystream        Pointer to an input buffer that will
437  *                                  contain the generated keystream.
438 
439  * @param[in] pState                Pointer to a ZUC state structure of type
440  *                                  @ref ZucState_t
441  *
442  * @pre
443  *      A successful call to @ref asm_ZucInitialization to initialize the ZUC
444  *      state.
445  *
446  *****************************************************************************/
447 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_sse(void *pKeystream,
448                                              ZucState_t *pState);
449 
450 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_sse_no_aesni(void *pKeystream,
451                                                       ZucState_t *pState);
452 
453 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_avx(void *pKeystream,
454                                              ZucState_t *pState);
455 
456 /**
457  ******************************************************************************
458  *
459  * @description
460  *      Definition of the external function that implements the working
461  *      stage of the ZUC algorithm. The function will generate N*4 bytes of
462  *      keystream, being N the number of rounds specified
463  *      in the numRounds parameter (from 1 to 16 rounds,
464  *      equal to from 4 to 64 bytes)
465  *
466  * @param[in,out] pKeystream        Pointer to an input buffer that will
467  *                                  contain the generated keystream.
468 
469  * @param[in] pState                Pointer to a ZUC state structure of type
470  *                                  @ref ZucState_t
471  *
472  * @param[in] numRounds             Number of 4-byte rounds (1 to 16 rounds)
473  *
474  * @pre
475  *      A successful call to @ref asm_ZucInitialization to initialize the ZUC
476  *      state.
477  *
478  *****************************************************************************/
479 IMB_DLL_LOCAL void asm_ZucGenKeystream_sse(void *pKeystream,
480                                            ZucState_t *pState,
481                                            uint64_t numRounds);
482 
483 IMB_DLL_LOCAL void asm_ZucGenKeystream_sse_no_aesni(void *pKeystream,
484                                                     ZucState_t *pState,
485                                                     uint64_t numRounds);
486 
487 IMB_DLL_LOCAL void asm_ZucGenKeystream_avx(void *pKeystream,
488                                            ZucState_t *pState,
489                                            uint64_t numRounds);
490 
491 /**
492  ******************************************************************************
493  *
494  * @description
495  *      Definition of the external function that implements the working
496  *      stage of the ZUC algorithm. The function will generate 16 bytes of
497  *      keystream for four packets in parallel.
498  *
499  * @param[in] pState                Pointer to a ZUC state structure of type
500  *                                  @ref ZucState4_t
501  *
502  * @param[in,out] pKeyStr           Array of pointers to 4 input buffers that
503  *                                  will contain the generated keystream for
504  *                                  these 4 packets.
505  *
506  * @pre
507  *      A successful call to @ref asm_ZucInitialization_4 to initialize the ZUC
508  *      state.
509  *
510  *****************************************************************************/
511 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_4_sse(ZucState4_t *pState,
512                                                 uint32_t *pKeyStr[4]);
513 
514 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_4_sse_no_aesni(ZucState4_t *pState,
515                                                          uint32_t *pKeyStr[4]);
516 
517 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_4_gfni_sse(ZucState4_t *pState,
518                                                      uint32_t *pKeyStr[4]);
519 
520 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_4_avx(ZucState4_t *pState,
521                                                 uint32_t *pKeyStr[4]);
522 
523 /**
524  ******************************************************************************
525  *
526  * @description
527  *      Definition of the external function that implements the working
528  *      stage of the ZUC algorithm. The function will generate 32 bytes of
529  *      keystream for eight packets in parallel.
530  *
531  * @param[in] pState                Pointer to a ZUC state structure of type
532  *                                  @ref ZucState8_t
533  *
534  * @param[in,out] pKeyStr           Array of pointers to 8 input buffers that
535  *                                  will contain the generated keystream for
536  *                                  these 8 packets.
537  *
538  * @pre
539  *      A successful call to @ref asm_ZucInitialization_8 to initialize the ZUC
540  *      state.
541  *
542  *****************************************************************************/
543 IMB_DLL_LOCAL void asm_ZucGenKeystream32B_8_avx2(ZucState8_t *pState,
544                                                  uint32_t *pKeyStr[8]);
545 
546 /**
547  ******************************************************************************
548  *
549  * @description
550  *      Definition of the external function that implements the working
551  *      stage of the ZUC algorithm. The function will generate 64 bytes of
552  *      keystream for four packets in parallel.
553  *
554  * @param[in] pState                Pointer to a ZUC state structure of type
555  *                                  @ref ZucState16_t
556  *
557  * @param[in,out] pKeyStr           Array of pointers to 16 input buffers
558  *                                  that will contain the generated keystream
559  *                                  for these 16 packets.
560  *
561  * @pre
562  *      A successful call to @ref asm_ZucInitialization_4 to initialize the ZUC
563  *      state.
564  *
565  *****************************************************************************/
566 IMB_DLL_LOCAL void asm_ZucGenKeystream64B_16_avx512(ZucState16_t *pState,
567                                                     uint32_t *pKeyStr[16]);
568 
569 IMB_DLL_LOCAL void asm_ZucGenKeystream64B_16_gfni_avx512(ZucState16_t *pState,
570                                                          uint32_t *pKeyStr[16]);
571 /**
572  ******************************************************************************
573  *
574  * @description
575  *      Definition of the external function that implements the working
576  *      stage of the ZUC algorithm. The function will generate 8 bytes of
577  *      keystream for four packets in parallel.
578  *
579  * @param[in] pState                Pointer to a ZUC state structure of type
580  *                                  @ref ZucState4_t
581  *
582  * @param[in,out] pKeyStr           Array of pointers to 4 input buffers that
583  *                                  will contain the generated keystream for
584  *                                  these 4 packets.
585  *
586  * @pre
587  *      A successful call to @ref asm_ZucInitialization_4 to initialize the ZUC
588  *      state.
589  *
590  *****************************************************************************/
591 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_4_sse(ZucState4_t *pState,
592                                                uint32_t *pKeyStr[4]);
593 
594 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_4_sse_no_aesni(ZucState4_t *pState,
595                                                         uint32_t *pKeyStr[4]);
596 
597 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_4_gfni_sse(ZucState4_t *pState,
598                                                     uint32_t *pKeyStr[4]);
599 
600 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_4_avx(ZucState4_t *pState,
601                                                uint32_t *pKeyStr[4]);
602 
603 /**
604  ******************************************************************************
605  *
606  * @description
607  *      Definition of the external function that implements the working
608  *      stage of the ZUC algorithm. The function will generate 8 bytes of
609  *      keystream for eight packets in parallel.
610  *
611  * @param[in] pState                Pointer to a ZUC state structure of type
612  *                                  @ref ZucState8_t
613  *
614  * @param[in,out] pKeyStr           Array of pointers to 8 input buffers that
615  *                                  will contain the generated keystream for
616  *                                  these 8 packets.
617  *
618  * @pre
619  *      A successful call to @ref asm_ZucInitialization_8 to initialize the ZUC
620  *      state.
621  *
622  *****************************************************************************/
623 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_8_avx2(ZucState8_t *pState,
624                                                 uint32_t *pKeyStr[8]);
625 
626 /**
627  ******************************************************************************
628  *
629  * @description
630  *      Definition of the external function that implements the working
631  *      stage of the ZUC algorithm. The function will generate 8 bytes of
632  *      keystream for sixteen packets in parallel.
633  *
634  * @param[in] pState                Pointer to a ZUC state structure of type
635  *                                  @ref ZucState16_t
636  *
637  * @param[in,out] pKeyStr           Array of pointers to 16 input buffers
638  *                                  that will contain the generated keystream
639  *                                  for these 16 packets.
640  *
641  * @pre
642  *      A successful call to @ref asm_ZucInitialization_16 to initialize the ZUC
643  *      state.
644  *
645  *****************************************************************************/
646 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_16_avx512(ZucState16_t *pState,
647                                                    uint32_t *pKeyStr[16],
648                                                    const uint32_t lane_mask);
649 
650 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_16_gfni_avx512(ZucState16_t *pState,
651                                                       uint32_t *pKeyStr[16],
652                                                       const uint32_t lane_mask);
653 
654 /**
655  ******************************************************************************
656  *
657  * @description
658  *      Definition of the external function that implements the working
659  *      stage of the ZUC algorithm. The function will generate N*4 bytes of
660  *      keystream for sixteen packets in parallel.
661  *
662  * @param[in] pState                Pointer to a ZUC state structure of type
663  *                                  @ref ZucState16_t
664  *
665  * @param[in,out] pKeyStr           Array of pointers to 16 input buffers
666  *                                  that will contain the generated keystream
667  *                                  for these 16 packets.
668  *
669  *
670  * @param[in] numRounds             Number of 4-byte rounds (1 to 16 rounds)
671  *
672  * @pre
673  *      A successful call to @ref asm_ZucInitialization to initialize the ZUC
674  *      state.
675  *
676  *****************************************************************************/
677 IMB_DLL_LOCAL void asm_ZucGenKeystream_16_avx512(ZucState16_t *pState,
678                                                  uint32_t *pKstr[16],
679                                                  const uint64_t numRounds);
680 
681 IMB_DLL_LOCAL void asm_ZucGenKeystream_16_gfni_avx512(ZucState16_t *pState,
682                                                       uint32_t *pKstr[16],
683                                                       const uint64_t numRounds);
684 /**
685  ******************************************************************************
686  *
687  * @description
688  *      Definition of the external function that implements the working
689  *      stage of the ZUC algorithm. The function will generate a multiple of
690  *      4 bytes of keystream for 4 packets in parallel and will XOR this
691  *      keystream with the input text, producing output of up to the minimum
692  *      length of all bytes, rounded up to the nearest multiple of 4 bytes.
693  *      "lengths" array is updated after the function call, with the remaining
694  *      bytes to encrypt.
695  *
696  * @param[in] pState                Pointer to a ZUC state structure of type
697  *                                  @ref ZucState4_t
698  *
699  * @param[in] pIn                   Array of pointers to 4 input buffers.
700  * @param[out] pOut                 Array of pointers to 4 output buffers.
701  * @param[in/out] lengths           Remaining length of buffers to encrypt
702  * @param[in] minLength             Common length for all buffers to encrypt
703  *
704  * @pre
705  *      A successful call to @ref asm_ZucInitialization_4 to initialize the ZUC
706  *      state.
707  *
708  *****************************************************************************/
709 IMB_DLL_LOCAL void asm_ZucCipher_4_sse(ZucState4_t *pState,
710                                       const uint64_t *pIn[4],
711                                       uint64_t *pOut[4],
712                                       uint16_t lengths[4],
713                                       const uint64_t minLength);
714 
715 IMB_DLL_LOCAL void asm_ZucCipher_4_sse_no_aesni(ZucState4_t *pState,
716                                                 const uint64_t *pIn[4],
717                                                 uint64_t *pOut[4],
718                                                 uint16_t lengths[4],
719                                                 const uint64_t minLength);
720 
721 IMB_DLL_LOCAL void asm_ZucCipher_4_gfni_sse(ZucState4_t *pState,
722                                             const uint64_t *pIn[4],
723                                             uint64_t *pOut[4],
724                                             uint16_t lengths[4],
725                                             const uint64_t minLength);
726 
727 IMB_DLL_LOCAL void asm_ZucCipher_4_avx(ZucState4_t *pState,
728                                        const uint64_t *pIn[4],
729                                        uint64_t *pOut[4],
730                                        uint16_t lengths[4],
731                                        const uint64_t minLength);
732 
733 /**
734  ******************************************************************************
735  *
736  * @description
737  *      Definition of the external function that implements the working
738  *      stage of the ZUC algorithm. The function will generate a multiple of
739  *      4 bytes of keystream for 8 packets in parallel and will XOR this
740  *      keystream with the input text, producing output of up to the minimum
741  *      length of all bytes, rounded up to the nearest multiple of 4 bytes.
742  *      "lengths" array is updated after the function call, with the remaining
743  *      bytes to encrypt.
744  *
745  * @param[in] pState                Pointer to a ZUC state structure of type
746  *                                  @ref ZucState8_t
747  *
748  * @param[in] pIn                   Array of pointers to 8 input buffers.
749  * @param[out] pOut                 Array of pointers to 8 output buffers.
750  * @param[in/out] lengths           Remaining length of buffers to encrypt
751  * @param[in] minLength             Common length for all buffers to encrypt
752  *
753  * @pre
754  *      A successful call to @ref asm_ZucInitialization_8 to initialize the ZUC
755  *      state.
756  *
757  *****************************************************************************/
758 IMB_DLL_LOCAL void asm_ZucCipher_8_avx2(ZucState8_t *pState,
759                                         const uint64_t *pIn[8],
760                                         uint64_t *pOut[8],
761                                         const uint16_t lengths[8],
762                                         const uint64_t minLength);
763 
764 /**
765  ******************************************************************************
766  *
767  * @description
768  *      Definition of the external function that implements the working
769  *      stage of the ZUC algorithm. The function will generate a multiple of
770  *      4 bytes of keystream for sixteen packets in parallel and will XOR this
771  *      keystream with the input text, producing output of up to the minimum
772  *      length of all bytes, rounded up to the nearest multiple of 4 bytes.
773  *      "lengths" array is updated after the function call, with the remaining
774  *      bytes to encrypt.
775  *
776  * @param[in] pState                Pointer to a ZUC state structure of type
777  *                                  @ref ZucState16_t
778  * @param[in] pIn                   Array of pointers to 16 input buffers.
779  * @param[out] pOut                 Array of pointers to 16 output buffers.
780  * @param[in/out] lengths           Remaining length of buffers to encrypt
781  * @param[in] minLength             Common length for all buffers to encrypt
782  *
783  * @pre
784  *      A successful call to @ref asm_ZucInitialization_16 to initialize the ZUC
785  *      state.
786  *
787  *****************************************************************************/
788 IMB_DLL_LOCAL void asm_ZucCipher_16_avx512(ZucState16_t *pState,
789                                            const uint64_t *pIn[16],
790                                            uint64_t *pOut[16],
791                                            const uint16_t lengths[16],
792                                            const uint64_t minLength);
793 
794 IMB_DLL_LOCAL void asm_ZucCipher_16_gfni_avx512(ZucState16_t *pState,
795                                                 const uint64_t *pIn[16],
796                                                 uint64_t *pOut[16],
797                                                 const uint16_t lengths[16],
798                                                 const uint64_t minLength);
799 
800 /**
801  ******************************************************************************
802  * @description
803  *      Definition of the external function to update the authentication tag
804  *      based on keystream and data (SSE variant)
805  *
806  * @param[in] T                     Authentication tag
807  *
808  * @param[in] ks                    Pointer to key stream
809  *
810  * @param[in] data                  Pointer to the data
811  *
812  * @pre
813  *      None
814  *
815  *****************************************************************************/
816 IMB_DLL_LOCAL uint32_t asm_Eia3Round16BSSE(uint32_t T, const void *ks,
817                                            const void *data);
818 
819 IMB_DLL_LOCAL uint32_t asm_Eia3Round16BSSE_no_aesni(uint32_t T, const void *ks,
820                                                     const void *data);
821 
822 /**
823  ******************************************************************************
824  * @description
825  *      Definition of the external function to return the authentication
826  *      update value to be XOR'ed with current authentication tag (SSE variant)
827  *
828  * @param[in] ks                    Pointer to key stream
829  *
830  * @param[in] data                  Pointer to the data
831  *
832  * @param[in] n_words               Number of data bits to be processed
833  *
834  * @pre
835  *      None
836  *
837  *****************************************************************************/
838 IMB_DLL_LOCAL uint32_t asm_Eia3RemainderSSE(const void *ks, const void *data,
839                                             const uint64_t n_words);
840 
841 IMB_DLL_LOCAL uint32_t asm_Eia3RemainderSSE_no_aesni(const void *ks,
842                                                      const void *data,
843                                                      const uint64_t n_words);
844 
845 /**
846  ******************************************************************************
847  * @description
848  *      Definition of the external function to update the authentication tag
849  *      based on keystream and data (AVX variant)
850  *
851  * @param[in] T                     Authentication tag
852  *
853  * @param[in] ks                    Pointer to key stream
854  *
855  * @param[in] data                  Pointer to the data
856  *
857  * @pre
858  *      None
859  *
860  *****************************************************************************/
861 IMB_DLL_LOCAL uint32_t asm_Eia3Round64BAVX(uint32_t T, const void *ks,
862                                            const void *data);
863 
864 IMB_DLL_LOCAL void asm_Eia3Round64BAVX512_16(uint32_t *T,
865                                              const void * const *ks,
866                                              const void **data,
867                                              uint16_t *len);
868 
869 IMB_DLL_LOCAL void asm_Eia3Round64B_16_VPCLMUL(uint32_t *T,
870                                                const void * const *ks,
871                                                const void **data,
872                                                uint16_t *len);
873 
874 IMB_DLL_LOCAL uint32_t asm_Eia3Round32BAVX(uint32_t T, const void *ks,
875                                            const void *data);
876 
877 IMB_DLL_LOCAL uint32_t asm_Eia3Round16BAVX(uint32_t T, const void *ks,
878                                            const void *data);
879 
880 IMB_DLL_LOCAL void asm_Eia3Round64BAVX512(uint32_t *T, const void *ks,
881                                           const void *data);
882 
883 /**
884  ******************************************************************************
885  * @description
886  *      Definition of the external function to return the authentication
887  *      update value to be XOR'ed with current authentication tag (AVX variant)
888  *
889  * @param[in] ks                    Pointer to key stream
890  *
891  * @param[in] data                  Pointer to the data
892  *
893  * @param[in] n_words               Number of data bits to be processed
894  *
895  * @pre
896  *      None
897  *
898  *****************************************************************************/
899 IMB_DLL_LOCAL uint32_t asm_Eia3RemainderAVX(const void *ks, const void *data,
900                                             const uint64_t n_words);
901 
902 /**
903  ******************************************************************************
904  * @description
905  *      Definition of the external function to return the final authentication
906  *      tag of the message.
907  *
908  * @param[in/out] T                 Pointer to authentication tag to be updated
909  *
910  * @param[in] ks                    Pointer to key stream
911  *
912  * @param[in] data                  Pointer to the data
913  *
914  * @param[in] n_bits                Number of data bits to be processed
915  *
916  * @pre
917  *      None
918  *
919  *****************************************************************************/
920 IMB_DLL_LOCAL void asm_Eia3RemainderAVX512(uint32_t *T, const void *ks,
921                                            const void *data,
922                                            const uint64_t n_bits);
923 
924 IMB_DLL_LOCAL uint32_t asm_Eia3RemainderAVX512_16(uint32_t *T,
925                                                   const void * const ks,
926                                                   const void **data,
927                                                   uint16_t *lens,
928                                                   const uint64_t commonBits);
929 
930 IMB_DLL_LOCAL
931 void zuc_eia3_4_buffer_job_gfni_sse(const void * const pKey[4],
932                                     const void * const pIv[4],
933                                     const void * const pBufferIn[4],
934                                     uint32_t *pMacI[4],
935                                     const uint16_t lengthInBits[4],
936                                     const void * const job_in_lane[4]);
937 
938 IMB_DLL_LOCAL
939 void zuc_eia3_4_buffer_job_no_gfni_sse(const void * const pKey[4],
940                                        const void * const pIv[4],
941                                        const void * const pBufferIn[4],
942                                        uint32_t *pMacI[4],
943                                        const uint16_t lengthInBits[4],
944                                        const void * const job_in_lane[4]);
945 
946 IMB_DLL_LOCAL
947 void zuc_eia3_4_buffer_job_sse_no_aesni(const void * const pKey[4],
948                                         const void * const pIv[4],
949                                         const void * const pBufferIn[4],
950                                         uint32_t *pMacI[4],
951                                         const uint16_t lengthInBits[4],
952                                         const void * const job_in_lane[4]);
953 
954 IMB_DLL_LOCAL
955 void zuc_eia3_4_buffer_job_avx(const void * const pKey[4],
956                                const void * const pIv[4],
957                                const void * const pBufferIn[4],
958                                uint32_t *pMacI[4],
959                                const uint16_t lengthInBits[4],
960                                const void * const job_in_lane[4]);
961 
962 IMB_DLL_LOCAL
963 void zuc_eia3_8_buffer_job_avx2(const void * const pKey[8],
964                                 const void * const pIv[8],
965                                 const void * const pBufferIn[8],
966                                 uint32_t *pMacI[8],
967                                 const uint16_t lengthInBits[8],
968                                 const void * const job_in_lane[8]);
969 
970 IMB_DLL_LOCAL
971 void zuc_eia3_16_buffer_job_no_gfni_avx512(MB_MGR_ZUC_OOO *ooo);
972 
973 IMB_DLL_LOCAL
974 void zuc_eia3_16_buffer_job_gfni_avx512(MB_MGR_ZUC_OOO *ooo);
975 
976 /* the s-boxes */
977 extern const uint8_t S0[256];
978 extern const uint8_t S1[256];
979 
980 void zuc_eea3_1_buffer_sse(const void *pKey, const void *pIv,
981                            const void *pBufferIn, void *pBufferOut,
982                            const uint32_t lengthInBytes);
983 
984 void zuc_eea3_4_buffer_sse(const void * const pKey[4],
985                            const void * const pIv[4],
986                            const void * const pBufferIn[4],
987                            void *pBufferOut[4],
988                            const uint32_t lengthInBytes[4]);
989 
990 void zuc_eea3_n_buffer_sse(const void * const pKey[], const void * const pIv[],
991                            const void * const pBufferIn[], void *pBufferOut[],
992                            const uint32_t lengthInBytes[],
993                            const uint32_t numBuffers);
994 
995 void zuc_eia3_1_buffer_sse(const void *pKey, const void *pIv,
996                            const void *pBufferIn, const uint32_t lengthInBits,
997                            uint32_t *pMacI);
998 
999 void zuc_eia3_n_buffer_sse(const void * const pKey[],
1000                            const void * const pIv[],
1001                            const void * const pBufferIn[],
1002                            const uint32_t lengthInBits[],
1003                            uint32_t *pMacI[],
1004                            const uint32_t numBuffers);
1005 
1006 void zuc_eia3_n_buffer_gfni_sse(const void * const pKey[],
1007                                 const void * const pIv[],
1008                                 const void * const pBufferIn[],
1009                                 const uint32_t lengthInBits[],
1010                                 uint32_t *pMacI[],
1011                                 const uint32_t numBuffers);
1012 
1013 void zuc_eea3_1_buffer_sse_no_aesni(const void *pKey, const void *pIv,
1014                                     const void *pBufferIn, void *pBufferOut,
1015                                     const uint32_t lengthInBytes);
1016 
1017 void zuc_eea3_4_buffer_sse_no_aesni(const void * const pKey[4],
1018                                     const void * const pIv[4],
1019                                     const void * const pBufferIn[4],
1020                                     void *pBufferOut[4],
1021                                     const uint32_t lengthInBytes[4]);
1022 
1023 void zuc_eea3_n_buffer_sse_no_aesni(const void * const pKey[],
1024                                     const void * const pIv[],
1025                                     const void * const pBufferIn[],
1026                                     void *pBufferOut[],
1027                                     const uint32_t lengthInBytes[],
1028                                     const uint32_t numBuffers);
1029 
1030 void zuc_eea3_4_buffer_gfni_sse(const void * const pKey[4],
1031                                 const void * const pIv[4],
1032                                 const void * const pBufferIn[4],
1033                                 void *pBufferOut[4],
1034                                 const uint32_t lengthInBytes[4]);
1035 
1036 void zuc_eea3_n_buffer_gfni_sse(const void * const pKey[],
1037                                 const void * const pIv[],
1038                                 const void * const pBufferIn[],
1039                                 void *pBufferOut[],
1040                                 const uint32_t lengthInBytes[],
1041                                 const uint32_t numBuffers);
1042 
1043 void zuc_eia3_1_buffer_sse_no_aesni(const void *pKey, const void *pIv,
1044                                     const void *pBufferIn,
1045                                     const uint32_t lengthInBits,
1046                                     uint32_t *pMacI);
1047 
1048 void zuc_eia3_n_buffer_sse_no_aesni(const void * const pKey[],
1049                                     const void * const pIv[],
1050                                     const void * const pBufferIn[],
1051                                     const uint32_t lengthInBits[],
1052                                     uint32_t *pMacI[],
1053                                     const uint32_t numBuffers);
1054 
1055 void zuc_eea3_1_buffer_avx(const void *pKey, const void *pIv,
1056                            const void *pBufferIn, void *pBufferOut,
1057                            const uint32_t lengthInBytes);
1058 
1059 void zuc_eea3_4_buffer_avx(const void * const pKey[4],
1060                            const void * const pIv[4],
1061                            const void * const pBufferIn[4],
1062                            void *pBufferOut[4],
1063                            const uint32_t lengthInBytes[4]);
1064 
1065 void zuc_eea3_n_buffer_avx(const void * const pKey[], const void * const pIv[],
1066                            const void * const pBufferIn[], void *pBufferOut[],
1067                            const uint32_t lengthInBytes[],
1068                            const uint32_t numBuffers);
1069 
1070 void zuc_eia3_1_buffer_avx(const void *pKey, const void *pIv,
1071                            const void *pBufferIn, const uint32_t lengthInBits,
1072                            uint32_t *pMacI);
1073 
1074 void zuc_eia3_n_buffer_avx(const void * const pKey[],
1075                            const void * const pIv[],
1076                            const void * const pBufferIn[],
1077                            const uint32_t lengthInBits[],
1078                            uint32_t *pMacI[],
1079                            const uint32_t numBuffers);
1080 
1081 
1082 void zuc_eea3_1_buffer_avx2(const void *pKey, const void *pIv,
1083                             const void *pBufferIn, void *pBufferOut,
1084                             const uint32_t lengthInBytes);
1085 
1086 void zuc_eea3_n_buffer_avx2(const void * const pKey[], const void * const pIv[],
1087                             const void * const pBufferIn[], void *pBufferOut[],
1088                             const uint32_t lengthInBytes[],
1089                             const uint32_t numBuffers);
1090 
1091 void zuc_eia3_1_buffer_avx2(const void *pKey, const void *pIv,
1092                             const void *pBufferIn, const uint32_t lengthInBits,
1093                             uint32_t *pMacI);
1094 
1095 void zuc_eia3_n_buffer_avx2(const void * const pKey[],
1096                             const void * const pIv[],
1097                             const void * const pBufferIn[],
1098                             const uint32_t lengthInBits[],
1099                             uint32_t *pMacI[],
1100                             const uint32_t numBuffers);
1101 
1102 void zuc_eea3_1_buffer_avx512(const void *pKey, const void *pIv,
1103                               const void *pBufferIn, void *pBufferOut,
1104                               const uint32_t lengthInBytes);
1105 
1106 void zuc_eea3_n_buffer_avx512(const void * const pKey[],
1107                               const void * const pIv[],
1108                               const void * const pBufferIn[],
1109                               void *pBufferOut[],
1110                               const uint32_t lengthInBytes[],
1111                               const uint32_t numBuffers);
1112 
1113 void zuc_eea3_n_buffer_gfni_avx512(const void * const pKey[],
1114                                    const void * const pIv[],
1115                                    const void * const pBufferIn[],
1116                                    void *pBufferOut[],
1117                                    const uint32_t lengthInBytes[],
1118                                    const uint32_t numBuffers);
1119 
1120 void zuc_eia3_1_buffer_avx512(const void *pKey, const void *pIv,
1121                               const void *pBufferIn,
1122                               const uint32_t lengthInBits,
1123                               uint32_t *pMacI);
1124 
1125 void zuc_eia3_n_buffer_avx512(const void * const pKey[],
1126                               const void * const pIv[],
1127                               const void * const pBufferIn[],
1128                               const uint32_t lengthInBits[],
1129                               uint32_t *pMacI[],
1130                               const uint32_t numBuffers);
1131 
1132 void zuc_eia3_n_buffer_gfni_avx512(const void * const pKey[],
1133                                    const void * const pIv[],
1134                                    const void * const pBufferIn[],
1135                                    const uint32_t lengthInBits[],
1136                                    uint32_t *pMacI[],
1137                                    const uint32_t numBuffers);
1138 
1139 /* Internal API */
1140 IMB_DLL_LOCAL
1141 void _zuc_eea3_4_buffer_avx(const void * const pKey[4],
1142                             const void * const pIv[4],
1143                             const void * const pBufferIn[4],
1144                             void *pBufferOut[4],
1145                             const uint32_t length[4]);
1146 
1147 IMB_DLL_LOCAL
1148 void _zuc_eia3_4_buffer_avx(const void * const pKey[4],
1149                             const void * const pIv[4],
1150                             const void * const pBufferIn[4],
1151                             const uint32_t lengthInBits[4],
1152                             uint32_t *pMacI[4]);
1153 
1154 IMB_DLL_LOCAL
1155 void _zuc_eea3_8_buffer_avx2(const void * const pKey[8],
1156                              const void * const pIv[8],
1157                              const void * const pBufferIn[8],
1158                              void *pBufferOut[8],
1159                              const uint32_t length[8]);
1160 
1161 IMB_DLL_LOCAL
1162 void _zuc_eia3_8_buffer_avx2(const void * const pKey[8],
1163                              const void * const pIv[8],
1164                              const void * const pBufferIn[8],
1165                              const uint32_t lengthInBits[8],
1166                              uint32_t *pMacI[8]);
1167 
1168 #endif /* ZUC_INTERNAL_H_ */
1169 
1170