1 /*******************************************************************************
2   Copyright (c) 2017-2020, Intel Corporation
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6 
7       * Redistributions of source code must retain the above copyright notice,
8         this list of conditions and the following disclaimer.
9       * Redistributions in binary form must reproduce the above copyright
10         notice, this list of conditions and the following disclaimer in the
11         documentation and/or other materials provided with the distribution.
12       * Neither the name of Intel Corporation nor the names of its contributors
13         may be used to endorse or promote products derived from this software
14         without specific prior written permission.
15 
16   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *******************************************************************************/
27 
28 /* basic DES implementation */
29 
30 #include <stdint.h>
31 #include <string.h>
32 
33 #include "intel-ipsec-mb.h"
34 #include "include/des.h"
35 #include "include/des_utils.h"
36 #include "include/clear_regs_mem.h"
37 #include "include/constant_lookup.h"
38 
39 __forceinline
permute_operation(uint32_t * pa,uint32_t * pb,const uint32_t n,const uint32_t m)40 void permute_operation(uint32_t *pa, uint32_t *pb,
41                        const uint32_t n, const uint32_t m)
42 {
43         register uint32_t t = (*pb ^ (*pa >> n)) & m;
44 
45         *pb ^= t;
46         *pa ^= (t << n);
47 }
48 
49 /* initial permutation */
50 __forceinline
ip_z(uint32_t * pl,uint32_t * pr)51 void ip_z(uint32_t *pl, uint32_t *pr)
52 {
53         permute_operation(pr, pl, 4, 0x0f0f0f0f);
54         permute_operation(pl, pr, 16, 0x0000ffff);
55         permute_operation(pr, pl, 2, 0x33333333);
56         permute_operation(pl, pr, 8, 0x00ff00ff);
57         permute_operation(pr, pl, 1, 0x55555555);
58 }
59 
60 /* final permutation */
61 __forceinline
fp_z(uint32_t * pl,uint32_t * pr)62 void fp_z(uint32_t *pl, uint32_t *pr)
63 {
64         permute_operation(pl, pr, 1, 0x55555555);
65         permute_operation(pr, pl, 8, 0x00ff00ff);
66         permute_operation(pl, pr, 2, 0x33333333);
67         permute_operation(pr, pl, 16, 0x0000ffff);
68         permute_operation(pl, pr, 4, 0x0f0f0f0f);
69 }
70 
71 /* 1st part of DES round
72  * - permutes and exands R(32 bits) into 48 bits
73  */
74 __forceinline
e_phase(const uint64_t R)75 uint64_t e_phase(const uint64_t R)
76 {
77         /* E phase as in FIPS46-3 and also 8x6 to 8x8 expansion.
78          *
79          * Bit selection table for this operation looks as follows:
80          *         32, 1,  2,  3,  4,  5,  X, X,
81          *         4,  5,  6,  7,  8,  9,  X, X,
82          *         8,  9,  10, 11, 12, 13, X, X,
83          *         12, 13, 14, 15, 16, 17, X, X,
84          *         16, 17, 18, 19, 20, 21, X, X,
85          *         20, 21, 22, 23, 24, 25, X, X,
86          *         24, 25, 26, 27, 28, 29, X, X,
87          *         28, 29, 30, 31, 32,  1, X, X
88          * where 'X' is bit value 0.
89          */
90         return ((R << 1) & UINT64_C(0x3e)) | ((R >> 31) & UINT64_C(1)) |
91                 ((R << 5) & UINT64_C(0x3f00)) |
92                 ((R << 9) & UINT64_C(0x3f0000)) |
93                 ((R << 13) & UINT64_C(0x3f000000)) |
94                 ((R << 17) & UINT64_C(0x3f00000000)) |
95                 ((R << 21) & UINT64_C(0x3f0000000000)) |
96                 ((R << 25) & UINT64_C(0x3f000000000000)) |
97                 ((R << 29) & UINT64_C(0x1f00000000000000)) |
98                 ((R & UINT64_C(1)) << 61);
99 }
100 
101 static const uint32_t sbox0p[64] = {
102         UINT32_C(0x00410100), UINT32_C(0x00010000),
103         UINT32_C(0x40400000), UINT32_C(0x40410100),
104         UINT32_C(0x00400000), UINT32_C(0x40010100),
105         UINT32_C(0x40010000), UINT32_C(0x40400000),
106         UINT32_C(0x40010100), UINT32_C(0x00410100),
107         UINT32_C(0x00410000), UINT32_C(0x40000100),
108         UINT32_C(0x40400100), UINT32_C(0x00400000),
109         UINT32_C(0x00000000), UINT32_C(0x40010000),
110         UINT32_C(0x00010000), UINT32_C(0x40000000),
111         UINT32_C(0x00400100), UINT32_C(0x00010100),
112         UINT32_C(0x40410100), UINT32_C(0x00410000),
113         UINT32_C(0x40000100), UINT32_C(0x00400100),
114         UINT32_C(0x40000000), UINT32_C(0x00000100),
115         UINT32_C(0x00010100), UINT32_C(0x40410000),
116         UINT32_C(0x00000100), UINT32_C(0x40400100),
117         UINT32_C(0x40410000), UINT32_C(0x00000000),
118         UINT32_C(0x00000000), UINT32_C(0x40410100),
119         UINT32_C(0x00400100), UINT32_C(0x40010000),
120         UINT32_C(0x00410100), UINT32_C(0x00010000),
121         UINT32_C(0x40000100), UINT32_C(0x00400100),
122         UINT32_C(0x40410000), UINT32_C(0x00000100),
123         UINT32_C(0x00010100), UINT32_C(0x40400000),
124         UINT32_C(0x40010100), UINT32_C(0x40000000),
125         UINT32_C(0x40400000), UINT32_C(0x00410000),
126         UINT32_C(0x40410100), UINT32_C(0x00010100),
127         UINT32_C(0x00410000), UINT32_C(0x40400100),
128         UINT32_C(0x00400000), UINT32_C(0x40000100),
129         UINT32_C(0x40010000), UINT32_C(0x00000000),
130         UINT32_C(0x00010000), UINT32_C(0x00400000),
131         UINT32_C(0x40400100), UINT32_C(0x00410100),
132         UINT32_C(0x40000000), UINT32_C(0x40410000),
133         UINT32_C(0x00000100), UINT32_C(0x40010100)
134 };
135 
136 static const uint32_t sbox1p[64] = {
137         UINT32_C(0x08021002), UINT32_C(0x00000000),
138         UINT32_C(0x00021000), UINT32_C(0x08020000),
139         UINT32_C(0x08000002), UINT32_C(0x00001002),
140         UINT32_C(0x08001000), UINT32_C(0x00021000),
141         UINT32_C(0x00001000), UINT32_C(0x08020002),
142         UINT32_C(0x00000002), UINT32_C(0x08001000),
143         UINT32_C(0x00020002), UINT32_C(0x08021000),
144         UINT32_C(0x08020000), UINT32_C(0x00000002),
145         UINT32_C(0x00020000), UINT32_C(0x08001002),
146         UINT32_C(0x08020002), UINT32_C(0x00001000),
147         UINT32_C(0x00021002), UINT32_C(0x08000000),
148         UINT32_C(0x00000000), UINT32_C(0x00020002),
149         UINT32_C(0x08001002), UINT32_C(0x00021002),
150         UINT32_C(0x08021000), UINT32_C(0x08000002),
151         UINT32_C(0x08000000), UINT32_C(0x00020000),
152         UINT32_C(0x00001002), UINT32_C(0x08021002),
153         UINT32_C(0x00020002), UINT32_C(0x08021000),
154         UINT32_C(0x08001000), UINT32_C(0x00021002),
155         UINT32_C(0x08021002), UINT32_C(0x00020002),
156         UINT32_C(0x08000002), UINT32_C(0x00000000),
157         UINT32_C(0x08000000), UINT32_C(0x00001002),
158         UINT32_C(0x00020000), UINT32_C(0x08020002),
159         UINT32_C(0x00001000), UINT32_C(0x08000000),
160         UINT32_C(0x00021002), UINT32_C(0x08001002),
161         UINT32_C(0x08021000), UINT32_C(0x00001000),
162         UINT32_C(0x00000000), UINT32_C(0x08000002),
163         UINT32_C(0x00000002), UINT32_C(0x08021002),
164         UINT32_C(0x00021000), UINT32_C(0x08020000),
165         UINT32_C(0x08020002), UINT32_C(0x00020000),
166         UINT32_C(0x00001002), UINT32_C(0x08001000),
167         UINT32_C(0x08001002), UINT32_C(0x00000002),
168         UINT32_C(0x08020000), UINT32_C(0x00021000)
169 };
170 
171 static const uint32_t sbox2p[64] = {
172         UINT32_C(0x20800000), UINT32_C(0x00808020),
173         UINT32_C(0x00000020), UINT32_C(0x20800020),
174         UINT32_C(0x20008000), UINT32_C(0x00800000),
175         UINT32_C(0x20800020), UINT32_C(0x00008020),
176         UINT32_C(0x00800020), UINT32_C(0x00008000),
177         UINT32_C(0x00808000), UINT32_C(0x20000000),
178         UINT32_C(0x20808020), UINT32_C(0x20000020),
179         UINT32_C(0x20000000), UINT32_C(0x20808000),
180         UINT32_C(0x00000000), UINT32_C(0x20008000),
181         UINT32_C(0x00808020), UINT32_C(0x00000020),
182         UINT32_C(0x20000020), UINT32_C(0x20808020),
183         UINT32_C(0x00008000), UINT32_C(0x20800000),
184         UINT32_C(0x20808000), UINT32_C(0x00800020),
185         UINT32_C(0x20008020), UINT32_C(0x00808000),
186         UINT32_C(0x00008020), UINT32_C(0x00000000),
187         UINT32_C(0x00800000), UINT32_C(0x20008020),
188         UINT32_C(0x00808020), UINT32_C(0x00000020),
189         UINT32_C(0x20000000), UINT32_C(0x00008000),
190         UINT32_C(0x20000020), UINT32_C(0x20008000),
191         UINT32_C(0x00808000), UINT32_C(0x20800020),
192         UINT32_C(0x00000000), UINT32_C(0x00808020),
193         UINT32_C(0x00008020), UINT32_C(0x20808000),
194         UINT32_C(0x20008000), UINT32_C(0x00800000),
195         UINT32_C(0x20808020), UINT32_C(0x20000000),
196         UINT32_C(0x20008020), UINT32_C(0x20800000),
197         UINT32_C(0x00800000), UINT32_C(0x20808020),
198         UINT32_C(0x00008000), UINT32_C(0x00800020),
199         UINT32_C(0x20800020), UINT32_C(0x00008020),
200         UINT32_C(0x00800020), UINT32_C(0x00000000),
201         UINT32_C(0x20808000), UINT32_C(0x20000020),
202         UINT32_C(0x20800000), UINT32_C(0x20008020),
203         UINT32_C(0x00000020), UINT32_C(0x00808000)
204 };
205 
206 static const uint32_t sbox3p[64] = {
207         UINT32_C(0x00080201), UINT32_C(0x02000200),
208         UINT32_C(0x00000001), UINT32_C(0x02080201),
209         UINT32_C(0x00000000), UINT32_C(0x02080000),
210         UINT32_C(0x02000201), UINT32_C(0x00080001),
211         UINT32_C(0x02080200), UINT32_C(0x02000001),
212         UINT32_C(0x02000000), UINT32_C(0x00000201),
213         UINT32_C(0x02000001), UINT32_C(0x00080201),
214         UINT32_C(0x00080000), UINT32_C(0x02000000),
215         UINT32_C(0x02080001), UINT32_C(0x00080200),
216         UINT32_C(0x00000200), UINT32_C(0x00000001),
217         UINT32_C(0x00080200), UINT32_C(0x02000201),
218         UINT32_C(0x02080000), UINT32_C(0x00000200),
219         UINT32_C(0x00000201), UINT32_C(0x00000000),
220         UINT32_C(0x00080001), UINT32_C(0x02080200),
221         UINT32_C(0x02000200), UINT32_C(0x02080001),
222         UINT32_C(0x02080201), UINT32_C(0x00080000),
223         UINT32_C(0x02080001), UINT32_C(0x00000201),
224         UINT32_C(0x00080000), UINT32_C(0x02000001),
225         UINT32_C(0x00080200), UINT32_C(0x02000200),
226         UINT32_C(0x00000001), UINT32_C(0x02080000),
227         UINT32_C(0x02000201), UINT32_C(0x00000000),
228         UINT32_C(0x00000200), UINT32_C(0x00080001),
229         UINT32_C(0x00000000), UINT32_C(0x02080001),
230         UINT32_C(0x02080200), UINT32_C(0x00000200),
231         UINT32_C(0x02000000), UINT32_C(0x02080201),
232         UINT32_C(0x00080201), UINT32_C(0x00080000),
233         UINT32_C(0x02080201), UINT32_C(0x00000001),
234         UINT32_C(0x02000200), UINT32_C(0x00080201),
235         UINT32_C(0x00080001), UINT32_C(0x00080200),
236         UINT32_C(0x02080000), UINT32_C(0x02000201),
237         UINT32_C(0x00000201), UINT32_C(0x02000000),
238         UINT32_C(0x02000001), UINT32_C(0x02080200)
239 };
240 
241 static const uint32_t sbox4p[64] = {
242         UINT32_C(0x01000000), UINT32_C(0x00002000),
243         UINT32_C(0x00000080), UINT32_C(0x01002084),
244         UINT32_C(0x01002004), UINT32_C(0x01000080),
245         UINT32_C(0x00002084), UINT32_C(0x01002000),
246         UINT32_C(0x00002000), UINT32_C(0x00000004),
247         UINT32_C(0x01000004), UINT32_C(0x00002080),
248         UINT32_C(0x01000084), UINT32_C(0x01002004),
249         UINT32_C(0x01002080), UINT32_C(0x00000000),
250         UINT32_C(0x00002080), UINT32_C(0x01000000),
251         UINT32_C(0x00002004), UINT32_C(0x00000084),
252         UINT32_C(0x01000080), UINT32_C(0x00002084),
253         UINT32_C(0x00000000), UINT32_C(0x01000004),
254         UINT32_C(0x00000004), UINT32_C(0x01000084),
255         UINT32_C(0x01002084), UINT32_C(0x00002004),
256         UINT32_C(0x01002000), UINT32_C(0x00000080),
257         UINT32_C(0x00000084), UINT32_C(0x01002080),
258         UINT32_C(0x01002080), UINT32_C(0x01000084),
259         UINT32_C(0x00002004), UINT32_C(0x01002000),
260         UINT32_C(0x00002000), UINT32_C(0x00000004),
261         UINT32_C(0x01000004), UINT32_C(0x01000080),
262         UINT32_C(0x01000000), UINT32_C(0x00002080),
263         UINT32_C(0x01002084), UINT32_C(0x00000000),
264         UINT32_C(0x00002084), UINT32_C(0x01000000),
265         UINT32_C(0x00000080), UINT32_C(0x00002004),
266         UINT32_C(0x01000084), UINT32_C(0x00000080),
267         UINT32_C(0x00000000), UINT32_C(0x01002084),
268         UINT32_C(0x01002004), UINT32_C(0x01002080),
269         UINT32_C(0x00000084), UINT32_C(0x00002000),
270         UINT32_C(0x00002080), UINT32_C(0x01002004),
271         UINT32_C(0x01000080), UINT32_C(0x00000084),
272         UINT32_C(0x00000004), UINT32_C(0x00002084),
273         UINT32_C(0x01002000), UINT32_C(0x01000004)
274 };
275 
276 const uint32_t sbox5p[64] = {
277         UINT32_C(0x10000008), UINT32_C(0x00040008),
278         UINT32_C(0x00000000), UINT32_C(0x10040400),
279         UINT32_C(0x00040008), UINT32_C(0x00000400),
280         UINT32_C(0x10000408), UINT32_C(0x00040000),
281         UINT32_C(0x00000408), UINT32_C(0x10040408),
282         UINT32_C(0x00040400), UINT32_C(0x10000000),
283         UINT32_C(0x10000400), UINT32_C(0x10000008),
284         UINT32_C(0x10040000), UINT32_C(0x00040408),
285         UINT32_C(0x00040000), UINT32_C(0x10000408),
286         UINT32_C(0x10040008), UINT32_C(0x00000000),
287         UINT32_C(0x00000400), UINT32_C(0x00000008),
288         UINT32_C(0x10040400), UINT32_C(0x10040008),
289         UINT32_C(0x10040408), UINT32_C(0x10040000),
290         UINT32_C(0x10000000), UINT32_C(0x00000408),
291         UINT32_C(0x00000008), UINT32_C(0x00040400),
292         UINT32_C(0x00040408), UINT32_C(0x10000400),
293         UINT32_C(0x00000408), UINT32_C(0x10000000),
294         UINT32_C(0x10000400), UINT32_C(0x00040408),
295         UINT32_C(0x10040400), UINT32_C(0x00040008),
296         UINT32_C(0x00000000), UINT32_C(0x10000400),
297         UINT32_C(0x10000000), UINT32_C(0x00000400),
298         UINT32_C(0x10040008), UINT32_C(0x00040000),
299         UINT32_C(0x00040008), UINT32_C(0x10040408),
300         UINT32_C(0x00040400), UINT32_C(0x00000008),
301         UINT32_C(0x10040408), UINT32_C(0x00040400),
302         UINT32_C(0x00040000), UINT32_C(0x10000408),
303         UINT32_C(0x10000008), UINT32_C(0x10040000),
304         UINT32_C(0x00040408), UINT32_C(0x00000000),
305         UINT32_C(0x00000400), UINT32_C(0x10000008),
306         UINT32_C(0x10000408), UINT32_C(0x10040400),
307         UINT32_C(0x10040000), UINT32_C(0x00000408),
308         UINT32_C(0x00000008), UINT32_C(0x10040008)
309 };
310 
311 static const uint32_t sbox6p[64] = {
312         UINT32_C(0x00000800), UINT32_C(0x00000040),
313         UINT32_C(0x00200040), UINT32_C(0x80200000),
314         UINT32_C(0x80200840), UINT32_C(0x80000800),
315         UINT32_C(0x00000840), UINT32_C(0x00000000),
316         UINT32_C(0x00200000), UINT32_C(0x80200040),
317         UINT32_C(0x80000040), UINT32_C(0x00200800),
318         UINT32_C(0x80000000), UINT32_C(0x00200840),
319         UINT32_C(0x00200800), UINT32_C(0x80000040),
320         UINT32_C(0x80200040), UINT32_C(0x00000800),
321         UINT32_C(0x80000800), UINT32_C(0x80200840),
322         UINT32_C(0x00000000), UINT32_C(0x00200040),
323         UINT32_C(0x80200000), UINT32_C(0x00000840),
324         UINT32_C(0x80200800), UINT32_C(0x80000840),
325         UINT32_C(0x00200840), UINT32_C(0x80000000),
326         UINT32_C(0x80000840), UINT32_C(0x80200800),
327         UINT32_C(0x00000040), UINT32_C(0x00200000),
328         UINT32_C(0x80000840), UINT32_C(0x00200800),
329         UINT32_C(0x80200800), UINT32_C(0x80000040),
330         UINT32_C(0x00000800), UINT32_C(0x00000040),
331         UINT32_C(0x00200000), UINT32_C(0x80200800),
332         UINT32_C(0x80200040), UINT32_C(0x80000840),
333         UINT32_C(0x00000840), UINT32_C(0x00000000),
334         UINT32_C(0x00000040), UINT32_C(0x80200000),
335         UINT32_C(0x80000000), UINT32_C(0x00200040),
336         UINT32_C(0x00000000), UINT32_C(0x80200040),
337         UINT32_C(0x00200040), UINT32_C(0x00000840),
338         UINT32_C(0x80000040), UINT32_C(0x00000800),
339         UINT32_C(0x80200840), UINT32_C(0x00200000),
340         UINT32_C(0x00200840), UINT32_C(0x80000000),
341         UINT32_C(0x80000800), UINT32_C(0x80200840),
342         UINT32_C(0x80200000), UINT32_C(0x00200840),
343         UINT32_C(0x00200800), UINT32_C(0x80000800)
344 };
345 
346 static const uint32_t sbox7p[64] = {
347         UINT32_C(0x04100010), UINT32_C(0x04104000),
348         UINT32_C(0x00004010), UINT32_C(0x00000000),
349         UINT32_C(0x04004000), UINT32_C(0x00100010),
350         UINT32_C(0x04100000), UINT32_C(0x04104010),
351         UINT32_C(0x00000010), UINT32_C(0x04000000),
352         UINT32_C(0x00104000), UINT32_C(0x00004010),
353         UINT32_C(0x00104010), UINT32_C(0x04004010),
354         UINT32_C(0x04000010), UINT32_C(0x04100000),
355         UINT32_C(0x00004000), UINT32_C(0x00104010),
356         UINT32_C(0x00100010), UINT32_C(0x04004000),
357         UINT32_C(0x04104010), UINT32_C(0x04000010),
358         UINT32_C(0x00000000), UINT32_C(0x00104000),
359         UINT32_C(0x04000000), UINT32_C(0x00100000),
360         UINT32_C(0x04004010), UINT32_C(0x04100010),
361         UINT32_C(0x00100000), UINT32_C(0x00004000),
362         UINT32_C(0x04104000), UINT32_C(0x00000010),
363         UINT32_C(0x00100000), UINT32_C(0x00004000),
364         UINT32_C(0x04000010), UINT32_C(0x04104010),
365         UINT32_C(0x00004010), UINT32_C(0x04000000),
366         UINT32_C(0x00000000), UINT32_C(0x00104000),
367         UINT32_C(0x04100010), UINT32_C(0x04004010),
368         UINT32_C(0x04004000), UINT32_C(0x00100010),
369         UINT32_C(0x04104000), UINT32_C(0x00000010),
370         UINT32_C(0x00100010), UINT32_C(0x04004000),
371         UINT32_C(0x04104010), UINT32_C(0x00100000),
372         UINT32_C(0x04100000), UINT32_C(0x04000010),
373         UINT32_C(0x00104000), UINT32_C(0x00004010),
374         UINT32_C(0x04004010), UINT32_C(0x04100000),
375         UINT32_C(0x00000010), UINT32_C(0x04104000),
376         UINT32_C(0x00104010), UINT32_C(0x00000000),
377         UINT32_C(0x04000000), UINT32_C(0x04100010),
378         UINT32_C(0x00004000), UINT32_C(0x00104010)
379 };
380 
381 __forceinline
fRK(const uint32_t R,const uint64_t K)382 uint32_t fRK(const uint32_t R, const uint64_t K)
383 {
384         uint64_t x;
385 
386         /* Combined e-phase and 8x6bits to 8x8bits expansion.
387          * 32 bits -> 48 bits permutation
388          */
389         x = e_phase((uint64_t) R) ^ K;
390 
391         /* Combined s-box and p-phase.
392          *   s-box: 48 bits -> 32 bits
393          *   p-phase: 32 bits -> 32 bites permutation
394          */
395         return ((LOOKUP32_SSE(sbox0p, ((x >> (8 * 0)) & 0x3f),
396                               sizeof(sbox0p))) |
397                 (LOOKUP32_SSE(sbox1p, ((x >> (8 * 1)) & 0x3f),
398                               sizeof(sbox1p))) |
399                 (LOOKUP32_SSE(sbox2p, ((x >> (8 * 2)) & 0x3f),
400                               sizeof(sbox2p))) |
401                 (LOOKUP32_SSE(sbox3p, ((x >> (8 * 3)) & 0x3f),
402                               sizeof(sbox3p))) |
403                 (LOOKUP32_SSE(sbox4p, ((x >> (8 * 4)) & 0x3f),
404                               sizeof(sbox4p))) |
405                 (LOOKUP32_SSE(sbox5p, ((x >> (8 * 5)) & 0x3f),
406                               sizeof(sbox5p))) |
407                 (LOOKUP32_SSE(sbox6p, ((x >> (8 * 6)) & 0x3f),
408                               sizeof(sbox6p))) |
409                 (LOOKUP32_SSE(sbox7p, ((x >> (8 * 7)) & 0x3f),
410                               sizeof(sbox7p))));
411 }
412 
413 __forceinline
enc_dec_1(const uint64_t data,const uint64_t * ks,const int enc)414 uint64_t enc_dec_1(const uint64_t data, const uint64_t *ks, const int enc)
415 {
416         uint32_t l, r;
417 
418         r = (uint32_t) (data);
419         l = (uint32_t) (data >> 32);
420         ip_z(&r, &l);
421 
422         if (enc) {
423                 l ^= fRK(r, ks[0]);
424                 r ^= fRK(l, ks[1]);
425                 l ^= fRK(r, ks[2]);
426                 r ^= fRK(l, ks[3]);
427                 l ^= fRK(r, ks[4]);
428                 r ^= fRK(l, ks[5]);
429                 l ^= fRK(r, ks[6]);
430                 r ^= fRK(l, ks[7]);
431                 l ^= fRK(r, ks[8]);
432                 r ^= fRK(l, ks[9]);
433                 l ^= fRK(r, ks[10]);
434                 r ^= fRK(l, ks[11]);
435                 l ^= fRK(r, ks[12]);
436                 r ^= fRK(l, ks[13]);
437                 l ^= fRK(r, ks[14]);
438                 r ^= fRK(l, ks[15]);
439         } else {
440                 l ^= fRK(r, ks[15]);     /* l: l0 -> r1/l2 */
441                 r ^= fRK(l, ks[14]);     /* r: r0 -> r2 */
442                 l ^= fRK(r, ks[13]);
443                 r ^= fRK(l, ks[12]);
444                 l ^= fRK(r, ks[11]);
445                 r ^= fRK(l, ks[10]);
446                 l ^= fRK(r, ks[9]);
447                 r ^= fRK(l, ks[8]);
448                 l ^= fRK(r, ks[7]);
449                 r ^= fRK(l, ks[6]);
450                 l ^= fRK(r, ks[5]);
451                 r ^= fRK(l, ks[4]);
452                 l ^= fRK(r, ks[3]);
453                 r ^= fRK(l, ks[2]);
454                 l ^= fRK(r, ks[1]);
455                 r ^= fRK(l, ks[0]);
456         }
457 
458         fp_z(&r, &l);
459         return ((uint64_t) l) | (((uint64_t) r) << 32);
460 }
461 
462 IMB_DLL_LOCAL
463 void
des_enc_cbc_basic(const void * input,void * output,const int size,const uint64_t * ks,const uint64_t * ivec)464 des_enc_cbc_basic(const void *input, void *output, const int size,
465                   const uint64_t *ks, const uint64_t *ivec)
466 {
467 #ifdef SAFE_PARAM
468         if ((input == NULL) || (output == NULL) ||
469             (ks == NULL) || (ivec == NULL) || (size < 0))
470                 return;
471 #endif
472         const uint64_t *in = input;
473         uint64_t *out = output;
474         const int nblocks = size / 8;
475         int n;
476         uint64_t iv = *ivec;
477 
478         IMB_ASSERT(size >= 0);
479         IMB_ASSERT(input != NULL);
480         IMB_ASSERT(output != NULL);
481         IMB_ASSERT(ks != NULL);
482         IMB_ASSERT(ivec != NULL);
483 
484         for (n = 0; n < nblocks; n++)
485                 out[n] = iv = enc_dec_1(in[n] ^ iv, ks, 1 /* encrypt */);
486 
487 
488 #ifdef SAFE_DATA
489         /* *ivec = iv; */
490         clear_var(&iv, sizeof(iv));
491 #endif
492 }
493 
494 IMB_DLL_LOCAL
495 void
des_dec_cbc_basic(const void * input,void * output,const int size,const uint64_t * ks,const uint64_t * ivec)496 des_dec_cbc_basic(const void *input, void *output, const int size,
497                   const uint64_t *ks, const uint64_t *ivec)
498 {
499 #ifdef SAFE_PARAM
500         if ((input == NULL) || (output == NULL) ||
501             (ks == NULL) || (ivec == NULL) || (size < 0))
502                 return;
503 #endif
504         const uint64_t *in = input;
505         uint64_t *out = output;
506         const int nblocks = size / 8;
507         int n;
508         uint64_t iv = *ivec;
509 
510         IMB_ASSERT(size >= 0);
511         IMB_ASSERT(input != NULL);
512         IMB_ASSERT(output != NULL);
513         IMB_ASSERT(ks != NULL);
514         IMB_ASSERT(ivec != NULL);
515 
516         for (n = 0; n < nblocks; n++) {
517                 uint64_t in_block = in[n];
518 
519                 out[n] = enc_dec_1(in_block, ks, 0 /* decrypt */) ^ iv;
520                 iv = in_block;
521         }
522 
523 #ifdef SAFE_DATA
524         /* *ivec = iv; */
525         clear_var(&iv, sizeof(iv));
526 #endif
527 }
528 
529 IMB_DLL_LOCAL
530 void
des3_enc_cbc_basic(const void * input,void * output,const int size,const uint64_t * ks1,const uint64_t * ks2,const uint64_t * ks3,const uint64_t * ivec)531 des3_enc_cbc_basic(const void *input, void *output, const int size,
532                    const uint64_t *ks1, const uint64_t *ks2,
533                    const uint64_t *ks3, const uint64_t *ivec)
534 {
535 #ifdef SAFE_PARAM
536         if ((input == NULL) || (output == NULL) ||
537             (ks1 == NULL) || (ks2 == NULL) || (ks3 == NULL) ||
538             (ivec == NULL) || (size < 0))
539                 return;
540 #endif
541         const uint64_t *in = input;
542         uint64_t *out = output;
543         const int nblocks = size / 8;
544         int n;
545         uint64_t iv = *ivec;
546 
547         IMB_ASSERT(size >= 0);
548         IMB_ASSERT(input != NULL);
549         IMB_ASSERT(output != NULL);
550         IMB_ASSERT(ks1 != NULL);
551         IMB_ASSERT(ks2 != NULL);
552         IMB_ASSERT(ks3 != NULL);
553         IMB_ASSERT(ivec != NULL);
554 
555         for (n = 0; n < nblocks; n++) {
556                 uint64_t t = in[n] ^ iv;
557 
558                 t = enc_dec_1(t, ks1, 1 /* encrypt */);
559                 t = enc_dec_1(t, ks2, 0 /* decrypt */);
560                 t = enc_dec_1(t, ks3, 1 /* encrypt */);
561                 out[n] = iv = t;
562         }
563 
564 #ifdef SAFE_DATA
565         /* *ivec = iv; */
566         clear_var(&iv, sizeof(iv));
567 #endif
568 }
569 
570 IMB_DLL_LOCAL
571 void
des3_dec_cbc_basic(const void * input,void * output,const int size,const uint64_t * ks1,const uint64_t * ks2,const uint64_t * ks3,const uint64_t * ivec)572 des3_dec_cbc_basic(const void *input, void *output, const int size,
573                    const uint64_t *ks1, const uint64_t *ks2,
574                    const uint64_t *ks3, const uint64_t *ivec)
575 {
576 #ifdef SAFE_PARAM
577         if ((input == NULL) || (output == NULL) ||
578             (ks1 == NULL) || (ks2 == NULL) || (ks3 == NULL) ||
579             (ivec == NULL) || (size < 0))
580                 return;
581 #endif
582         const uint64_t *in = input;
583         uint64_t *out = output;
584         const int nblocks = size / 8;
585         int n;
586         uint64_t iv = *ivec;
587 
588         IMB_ASSERT(size >= 0);
589         IMB_ASSERT(input != NULL);
590         IMB_ASSERT(output != NULL);
591         IMB_ASSERT(ks1 != NULL);
592         IMB_ASSERT(ks2 != NULL);
593         IMB_ASSERT(ks3 != NULL);
594         IMB_ASSERT(ivec != NULL);
595 
596         for (n = 0; n < nblocks; n++) {
597                 uint64_t t;
598                 const uint64_t next_iv = in[n];
599 
600                 t = enc_dec_1(next_iv, ks3, 0 /* decrypt */);
601                 t = enc_dec_1(t, ks2, 1 /* encrypt */);
602                 t = enc_dec_1(t, ks1, 0 /* decrypt */);
603                 out[n] = t ^ iv;
604 
605                 iv = next_iv;
606         }
607 
608 #ifdef SAFE_DATA
609         /* *ivec = iv; */
610         clear_var(&iv, sizeof(iv));
611 #endif
612 }
613 
614 __forceinline
615 void
cfb_one_basic(const void * input,void * output,const int size,const uint64_t * ks,const uint64_t * ivec)616 cfb_one_basic(const void *input, void *output, const int size,
617               const uint64_t *ks, const uint64_t *ivec)
618 {
619 #ifdef SAFE_PARAM
620         if ((input == NULL) || (output == NULL) ||
621             (ks == NULL) || (ivec == NULL) || (size < 0))
622                 return;
623 #endif
624         uint8_t *out = (uint8_t *) output;
625         const uint8_t *in = (const uint8_t *) input;
626         uint64_t t;
627 
628         IMB_ASSERT(size <= 8 && size >= 0);
629         IMB_ASSERT(input != NULL);
630         IMB_ASSERT(output != NULL);
631         IMB_ASSERT(ks != NULL);
632         IMB_ASSERT(ivec != NULL);
633 
634         t = enc_dec_1(*ivec, ks, 1 /* encrypt */);
635 
636         /* XOR and copy in one go */
637         if (size & 1) {
638                 *out++ = *in++ ^ ((uint8_t) t);
639                 t >>= 8;
640         }
641 
642         if (size & 2) {
643                 uint16_t *out2 = (uint16_t *) out;
644                 const uint16_t *in2 = (const uint16_t *) in;
645 
646                 *out2 = *in2 ^ ((uint16_t) t);
647                 t >>= 16;
648                 out += 2;
649                 in += 2;
650         }
651 
652         if (size & 4) {
653                 uint32_t *out4 = (uint32_t *) out;
654                 const uint32_t *in4 = (const uint32_t *) in;
655 
656                 *out4 = *in4 ^ ((uint32_t) t);
657         }
658 
659 #ifdef SAFE_DATA
660         clear_var(&t, sizeof(t));
661 #endif
662 }
663 
664 IMB_DLL_LOCAL
665 void
docsis_des_enc_basic(const void * input,void * output,const int size,const uint64_t * ks,const uint64_t * ivec)666 docsis_des_enc_basic(const void *input, void *output, const int size,
667                      const uint64_t *ks, const uint64_t *ivec)
668 {
669 #ifdef SAFE_PARAM
670         if ((input == NULL) || (output == NULL) ||
671             (ks == NULL) || (ivec == NULL) || (size < 0))
672                 return;
673 #endif
674         const uint64_t *in = input;
675         uint64_t *out = output;
676         const int nblocks = size / DES_BLOCK_SIZE;
677         const int partial = size & 7;
678         int n;
679         uint64_t iv = *ivec;
680 
681         IMB_ASSERT(size >= 0);
682         IMB_ASSERT(input != NULL);
683         IMB_ASSERT(output != NULL);
684         IMB_ASSERT(ks != NULL);
685         IMB_ASSERT(ivec != NULL);
686 
687         for (n = 0; n < nblocks; n++)
688                 out[n] = iv = enc_dec_1(in[n] ^ iv, ks, 1 /* encrypt */);
689 
690         if (partial) {
691                 if (nblocks)
692                         cfb_one_basic(&in[nblocks], &out[nblocks], partial,
693                                       ks, &out[nblocks - 1]);
694                 else
695                         cfb_one_basic(input, output, partial, ks, ivec);
696         }
697 
698 #ifdef SAFE_DATA
699         /* *ivec = iv; */
700         clear_var(&iv, sizeof(iv));
701 #endif
702 }
703 
704 IMB_DLL_LOCAL
705 void
docsis_des_dec_basic(const void * input,void * output,const int size,const uint64_t * ks,const uint64_t * ivec)706 docsis_des_dec_basic(const void *input, void *output, const int size,
707                      const uint64_t *ks, const uint64_t *ivec)
708 {
709 #ifdef SAFE_PARAM
710         if ((input == NULL) || (output == NULL) ||
711             (ks == NULL) || (ivec == NULL) || (size < 0))
712                 return;
713 #endif
714         const uint64_t *in = input;
715         uint64_t *out = output;
716         const int nblocks = size / DES_BLOCK_SIZE;
717         const int partial = size & 7;
718         int n;
719         uint64_t iv = *ivec;
720 
721         IMB_ASSERT(size >= 0);
722         IMB_ASSERT(input != NULL);
723         IMB_ASSERT(output != NULL);
724         IMB_ASSERT(ks != NULL);
725         IMB_ASSERT(ivec != NULL);
726 
727         if (partial) {
728                 if (!nblocks) {
729                         /* first block is the partial one */
730                         cfb_one_basic(input, output, partial, ks, ivec);
731                         iv = 0;
732                         return;
733                 }
734                 /* last block is partial */
735                 cfb_one_basic(&in[nblocks], &out[nblocks], partial,
736                               ks, &in[nblocks - 1]);
737         }
738 
739         for (n = 0; n < nblocks; n++) {
740                 uint64_t in_block = in[n];
741 
742                 out[n] = enc_dec_1(in_block, ks, 0 /* decrypt */) ^ iv;
743                 iv = in_block;
744         }
745 
746 #ifdef SAFE_DATA
747         /* *ivec = iv; */
748         clear_var(&iv, sizeof(iv));
749 #endif
750 }
751