1 /*******************************************************************************
2   Copyright (c) 2012-2020, Intel Corporation
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6 
7       * Redistributions of source code must retain the above copyright notice,
8         this list of conditions and the following disclaimer.
9       * Redistributions in binary form must reproduce the above copyright
10         notice, this list of conditions and the following disclaimer in the
11         documentation and/or other materials provided with the distribution.
12       * Neither the name of Intel Corporation nor the names of its contributors
13         may be used to endorse or promote products derived from this software
14         without specific prior written permission.
15 
16   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *******************************************************************************/
27 
28 #ifndef IMB_IPSEC_MB_INTERNAL_H
29 #define IMB_IPSEC_MB_INTERNAL_H
30 
31 #include "intel-ipsec-mb.h"
32 
33 #define NUM_MD5_DIGEST_WORDS     4
34 #define NUM_SHA_DIGEST_WORDS     5
35 #define NUM_SHA_256_DIGEST_WORDS 8
36 #define NUM_SHA_224_DIGEST_WORDS 7
37 #define NUM_SHA_512_DIGEST_WORDS 8
38 #define NUM_SHA_384_DIGEST_WORDS 6
39 
40 #define SHA_DIGEST_WORD_SIZE      4
41 #define SHA224_DIGEST_WORD_SIZE   4
42 #define SHA256_DIGEST_WORD_SIZE   4
43 #define SHA384_DIGEST_WORD_SIZE   8
44 #define SHA512_DIGEST_WORD_SIZE   8
45 
46 /* Number of lanes AVX512, AVX2, AVX and SSE */
47 #define AVX512_NUM_SHA1_LANES   16
48 #define AVX512_NUM_SHA256_LANES 16
49 #define AVX512_NUM_SHA512_LANES 8
50 #define AVX512_NUM_MD5_LANES    32
51 #define AVX512_NUM_DES_LANES    16
52 
53 #define AVX2_NUM_SHA1_LANES     8
54 #define AVX2_NUM_SHA256_LANES   8
55 #define AVX2_NUM_SHA512_LANES   4
56 #define AVX2_NUM_MD5_LANES      16
57 
58 #define AVX_NUM_SHA1_LANES      4
59 #define AVX_NUM_SHA256_LANES    4
60 #define AVX_NUM_SHA512_LANES    2
61 #define AVX_NUM_MD5_LANES       8
62 
63 #define SSE_NUM_SHA1_LANES   AVX_NUM_SHA1_LANES
64 #define SSE_NUM_SHA256_LANES AVX_NUM_SHA256_LANES
65 #define SSE_NUM_SHA512_LANES AVX_NUM_SHA512_LANES
66 #define SSE_NUM_MD5_LANES    AVX_NUM_MD5_LANES
67 
68 /*
69  * Each row is sized to hold enough lanes for AVX2, AVX1 and SSE use a subset
70  * of each row. Thus one row is not adjacent in memory to its neighboring rows
71  * in the case of SSE and AVX1.
72  */
73 #define MD5_DIGEST_SZ    (NUM_MD5_DIGEST_WORDS * AVX512_NUM_MD5_LANES)
74 #define SHA1_DIGEST_SZ   (NUM_SHA_DIGEST_WORDS * AVX512_NUM_SHA1_LANES)
75 #define SHA256_DIGEST_SZ (NUM_SHA_256_DIGEST_WORDS * AVX512_NUM_SHA256_LANES)
76 #define SHA512_DIGEST_SZ (NUM_SHA_512_DIGEST_WORDS * AVX512_NUM_SHA512_LANES)
77 
78 /* Maximum size of the ZUC state (LFSR (16) + X0-X3 (4) + R1-R2 (2)).
79    For AVX512, each takes 16 double words, defining the maximum required size */
80 #define MAX_ZUC_STATE_SZ 16*(16 + 4 + 2)
81 
82 /**
83  *****************************************************************************
84  * @description
85  *      Packed structure to store the ZUC state for 16 packets. *
86  *****************************************************************************/
87 typedef struct zuc_state_16_s {
88     uint32_t lfsrState[16][16];
89     /**< State registers of the LFSR */
90     uint32_t fR1[16];
91     /**< register of F */
92     uint32_t fR2[16];
93     /**< register of F */
94     uint32_t bX0[16];
95     /**< Output X0 of the bit reorganization for 16 packets */
96     uint32_t bX1[16];
97     /**< Output X1 of the bit reorganization for 16 packets */
98     uint32_t bX2[16];
99     /**< Output X2 of the bit reorganization for 16 packets */
100     uint32_t bX3[16];
101     /**< Output X3 of the bit reorganization for 16 packets */
102 } ZucState16_t;
103 
104 /*
105  * Argument structures for various algorithms
106  */
107 typedef struct {
108         const uint8_t *in[16];
109         uint8_t *out[16];
110         const uint32_t *keys[16];
111         DECLARE_ALIGNED(imb_uint128_t IV[16], 64);
112         DECLARE_ALIGNED(imb_uint128_t key_tab[15][16], 64);
113 } AES_ARGS;
114 
115 typedef struct {
116         DECLARE_ALIGNED(uint32_t digest[SHA1_DIGEST_SZ], 32);
117         uint8_t *data_ptr[AVX512_NUM_SHA1_LANES];
118 } SHA1_ARGS;
119 
120 typedef struct {
121         DECLARE_ALIGNED(uint32_t digest[SHA256_DIGEST_SZ], 32);
122         uint8_t *data_ptr[AVX512_NUM_SHA256_LANES];
123 } SHA256_ARGS;
124 
125 typedef struct {
126         DECLARE_ALIGNED(uint64_t digest[SHA512_DIGEST_SZ], 32);
127         uint8_t *data_ptr[AVX512_NUM_SHA512_LANES];
128 }  SHA512_ARGS;
129 
130 typedef struct {
131         DECLARE_ALIGNED(uint32_t digest[MD5_DIGEST_SZ], 32);
132         uint8_t *data_ptr[AVX512_NUM_MD5_LANES];
133 } MD5_ARGS;
134 
135 typedef struct {
136         const uint8_t *in[16];
137         const uint32_t *keys[16];
138         DECLARE_ALIGNED(imb_uint128_t ICV[16], 32);
139         DECLARE_ALIGNED(imb_uint128_t key_tab[11][16], 64);
140 } AES_XCBC_ARGS_x16;
141 
142 typedef struct {
143         const uint8_t *in[AVX512_NUM_DES_LANES];
144         uint8_t *out[AVX512_NUM_DES_LANES];
145         const uint8_t *keys[AVX512_NUM_DES_LANES];
146         uint32_t IV[AVX512_NUM_DES_LANES * 2]; /* uint32_t is more handy here */
147         uint32_t partial_len[AVX512_NUM_DES_LANES];
148         uint32_t block_len[AVX512_NUM_DES_LANES];
149         const uint8_t *last_in[AVX512_NUM_DES_LANES];
150         uint8_t *last_out[AVX512_NUM_DES_LANES];
151 } DES_ARGS_x16;
152 
153 typedef struct {
154         const uint8_t *in[16];
155         uint8_t *out[16];
156         const uint8_t *keys[16];
157         const uint8_t *iv[16];
158         DECLARE_ALIGNED(uint32_t digest[16], 64);
159         DECLARE_ALIGNED(uint64_t prev_ks[16], 64);
160 } ZUC_ARGS_x16;
161 
162 /* AES out-of-order scheduler fields */
163 typedef struct {
164         AES_ARGS args;
165         DECLARE_ALIGNED(uint16_t lens[16], 16);
166         /* each nibble is index (0...15) of an unused lane,
167          * the last nibble is set to F as a flag
168          */
169         uint64_t unused_lanes;
170         IMB_JOB *job_in_lane[16];
171         uint64_t num_lanes_inuse;
172         DECLARE_ALIGNED(uint64_t lens64[16], 64);
173         uint64_t road_block;
174 } MB_MGR_AES_OOO;
175 
176 /* DOCSIS AES out-of-order scheduler fields */
177 typedef struct {
178         AES_ARGS args;
179         DECLARE_ALIGNED(uint16_t lens[16], 16);
180         /* each nibble is index (0...15) of an unused lane,
181          * the last nibble is set to F as a flag
182          */
183         uint64_t unused_lanes;
184         IMB_JOB *job_in_lane[16];
185         uint64_t num_lanes_inuse;
186         DECLARE_ALIGNED(imb_uint128_t crc_init[16], 64);
187         DECLARE_ALIGNED(uint16_t crc_len[16], 16);
188         DECLARE_ALIGNED(uint8_t crc_done[16], 16);
189         uint64_t road_block;
190 } MB_MGR_DOCSIS_AES_OOO;
191 
192 /* AES XCBC out-of-order scheduler fields */
193 typedef struct {
194         DECLARE_ALIGNED(uint8_t final_block[2 * 16], 32);
195         IMB_JOB *job_in_lane;
196         uint64_t final_done;
197 } XCBC_LANE_DATA;
198 
199 typedef struct {
200         AES_XCBC_ARGS_x16 args;
201         DECLARE_ALIGNED(uint16_t lens[16], 32);
202         /* each byte is index (0...3) of unused lanes
203          * byte 4 is set to FF as a flag
204          */
205         uint64_t unused_lanes;
206         XCBC_LANE_DATA ldata[16];
207         uint64_t num_lanes_inuse;
208         uint64_t road_block;
209 } MB_MGR_AES_XCBC_OOO;
210 
211 /* AES-CCM out-of-order scheduler structure */
212 typedef struct {
213         AES_ARGS args; /* need to re-use AES arguments */
214         DECLARE_ALIGNED(uint16_t lens[16], 32);
215         DECLARE_ALIGNED(uint16_t init_done[16], 32);
216         /* each byte is index (0...3) of unused lanes
217          * byte 4 is set to FF as a flag
218          */
219         uint64_t unused_lanes;
220         IMB_JOB *job_in_lane[16];
221         uint64_t num_lanes_inuse;
222         DECLARE_ALIGNED(uint8_t init_blocks[16 * (4 * 16)], 64);
223         uint64_t road_block;
224 } MB_MGR_CCM_OOO;
225 
226 
227 /* AES-CMAC out-of-order scheduler structure */
228 typedef struct {
229         AES_ARGS args; /* need to re-use AES arguments */
230         DECLARE_ALIGNED(uint16_t lens[16], 32);
231         DECLARE_ALIGNED(uint16_t init_done[16], 32);
232         /* each byte is index (0...3) of unused lanes
233          * byte 4 is set to FF as a flag
234          */
235         uint64_t unused_lanes;
236         IMB_JOB *job_in_lane[16];
237         uint64_t num_lanes_inuse;
238         DECLARE_ALIGNED(uint8_t scratch[16 * 16], 32);
239         uint64_t road_block;
240 } MB_MGR_CMAC_OOO;
241 
242 
243 /* DES out-of-order scheduler fields */
244 typedef struct {
245         DES_ARGS_x16 args;
246         DECLARE_ALIGNED(uint16_t lens[16], 16);
247         /* each nibble is index (0...7) of unused lanes
248          * nibble 8 is set to F as a flag
249          */
250         uint64_t unused_lanes;
251         IMB_JOB *job_in_lane[16];
252         uint64_t num_lanes_inuse;
253         uint64_t road_block;
254 } MB_MGR_DES_OOO;
255 
256 /* ZUC out-of-order scheduler fields */
257 typedef struct {
258         ZUC_ARGS_x16 args;
259         DECLARE_ALIGNED(uint16_t lens[16], 16);
260         uint64_t unused_lanes;
261         IMB_JOB *job_in_lane[16];
262         uint64_t num_lanes_inuse;
263         DECLARE_ALIGNED(uint32_t state[MAX_ZUC_STATE_SZ], 64);
264         uint16_t init_not_done;
265         uint16_t unused_lane_bitmask;
266         uint64_t road_block;
267 } MB_MGR_ZUC_OOO;
268 
269 /* HMAC-SHA1 and HMAC-SHA256/224 */
270 typedef struct {
271         /* YMM aligned access to extra_block */
272         DECLARE_ALIGNED(uint8_t extra_block[2 * SHA1_BLOCK_SIZE+8], 32);
273         IMB_JOB *job_in_lane;
274         uint8_t outer_block[64];
275         uint32_t outer_done;
276         uint32_t extra_blocks; /* num extra blocks (1 or 2) */
277         uint32_t size_offset;  /* offset in extra_block to start of
278                                 * size field */
279         uint32_t start_offset; /* offset to start of data */
280 } HMAC_SHA1_LANE_DATA;
281 
282 /* HMAC-SHA512/384 */
283 typedef struct {
284         DECLARE_ALIGNED(uint8_t extra_block[2 * SHA_512_BLOCK_SIZE + 16], 32);
285         uint8_t outer_block[SHA_512_BLOCK_SIZE];
286         IMB_JOB *job_in_lane;
287         uint32_t outer_done;
288         uint32_t extra_blocks; /* num extra blocks (1 or 2) */
289         uint32_t size_offset;  /* offset in extra_block to start of
290                                 * size field */
291         uint32_t start_offset; /* offset to start of data */
292 } HMAC_SHA512_LANE_DATA;
293 
294 /*
295  * unused_lanes contains a list of unused lanes stored as bytes or as
296  * nibbles depending on the arch. The end of list is either FF or F.
297  */
298 typedef struct {
299         SHA1_ARGS args;
300         DECLARE_ALIGNED(uint16_t lens[16], 32);
301         uint64_t unused_lanes;
302         HMAC_SHA1_LANE_DATA ldata[AVX512_NUM_SHA1_LANES];
303         uint32_t num_lanes_inuse;
304         uint64_t road_block;
305 } MB_MGR_HMAC_SHA_1_OOO;
306 
307 typedef struct {
308         SHA256_ARGS args;
309         DECLARE_ALIGNED(uint16_t lens[16], 16);
310         uint64_t unused_lanes;
311         HMAC_SHA1_LANE_DATA ldata[AVX512_NUM_SHA256_LANES];
312         uint32_t num_lanes_inuse;
313         uint64_t road_block;
314 } MB_MGR_HMAC_SHA_256_OOO;
315 
316 typedef struct {
317         SHA512_ARGS args;
318         DECLARE_ALIGNED(uint16_t lens[8], 16);
319         uint64_t unused_lanes;
320         HMAC_SHA512_LANE_DATA ldata[AVX512_NUM_SHA512_LANES];
321         uint64_t road_block;
322 } MB_MGR_HMAC_SHA_512_OOO;
323 
324 /* MD5-HMAC out-of-order scheduler fields */
325 typedef struct {
326         MD5_ARGS args;
327         DECLARE_ALIGNED(uint16_t lens[AVX512_NUM_MD5_LANES], 16);
328         /*
329          * In the avx2 case, all 16 nibbles of unused lanes are used.
330          * In that case num_lanes_inuse is used to detect the end of the list
331          */
332         uint64_t unused_lanes;
333         HMAC_SHA1_LANE_DATA ldata[AVX512_NUM_MD5_LANES];
334         uint32_t num_lanes_inuse;
335         uint64_t road_block;
336 } MB_MGR_HMAC_MD5_OOO;
337 
338 #endif /* IMB_IPSEC_MB_INTERNAL_H */
339