1 /*
2    Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; see the file COPYING. If not, write to the
15    Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
16    MA  02110-1301  USA.
17 */
18 
19 /* C++ part based on Wei Dai's des.cpp from CryptoPP */
20 /* x86 asm is original */
21 
22 
23 #if defined(TAOCRYPT_KERNEL_MODE)
24     #define DO_TAOCRYPT_KERNEL_MODE
25 #endif                                  // only some modules now support this
26 
27 
28 #include "runtime.hpp"
29 #include "des.hpp"
30 #ifdef USE_SYS_STL
31     #include <algorithm>
32 #else
33     #include "algorithm.hpp"
34 #endif
35 
36 
37 namespace STL = STL_NAMESPACE;
38 
39 
40 
41 namespace TaoCrypt {
42 
43 
44 /* permuted choice table (key) */
45 static const byte pc1[] = {
46        57, 49, 41, 33, 25, 17,  9,
47         1, 58, 50, 42, 34, 26, 18,
48        10,  2, 59, 51, 43, 35, 27,
49        19, 11,  3, 60, 52, 44, 36,
50 
51        63, 55, 47, 39, 31, 23, 15,
52         7, 62, 54, 46, 38, 30, 22,
53        14,  6, 61, 53, 45, 37, 29,
54        21, 13,  5, 28, 20, 12,  4
55 };
56 
57 /* number left rotations of pc1 */
58 static const byte totrot[] = {
59        1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
60 };
61 
62 /* permuted choice key (table) */
63 static const byte pc2[] = {
64        14, 17, 11, 24,  1,  5,
65         3, 28, 15,  6, 21, 10,
66        23, 19, 12,  4, 26,  8,
67        16,  7, 27, 20, 13,  2,
68        41, 52, 31, 37, 47, 55,
69        30, 40, 51, 45, 33, 48,
70        44, 49, 39, 56, 34, 53,
71        46, 42, 50, 36, 29, 32
72 };
73 
74 /* End of DES-defined tables */
75 
76 /* bit 0 is left-most in byte */
77 static const int bytebit[] = {
78        0200,0100,040,020,010,04,02,01
79 };
80 
81 const word32 Spbox[8][64] = {
82 {
83 0x01010400,0x00000000,0x00010000,0x01010404,
84 0x01010004,0x00010404,0x00000004,0x00010000,
85 0x00000400,0x01010400,0x01010404,0x00000400,
86 0x01000404,0x01010004,0x01000000,0x00000004,
87 0x00000404,0x01000400,0x01000400,0x00010400,
88 0x00010400,0x01010000,0x01010000,0x01000404,
89 0x00010004,0x01000004,0x01000004,0x00010004,
90 0x00000000,0x00000404,0x00010404,0x01000000,
91 0x00010000,0x01010404,0x00000004,0x01010000,
92 0x01010400,0x01000000,0x01000000,0x00000400,
93 0x01010004,0x00010000,0x00010400,0x01000004,
94 0x00000400,0x00000004,0x01000404,0x00010404,
95 0x01010404,0x00010004,0x01010000,0x01000404,
96 0x01000004,0x00000404,0x00010404,0x01010400,
97 0x00000404,0x01000400,0x01000400,0x00000000,
98 0x00010004,0x00010400,0x00000000,0x01010004},
99 {
100 0x80108020,0x80008000,0x00008000,0x00108020,
101 0x00100000,0x00000020,0x80100020,0x80008020,
102 0x80000020,0x80108020,0x80108000,0x80000000,
103 0x80008000,0x00100000,0x00000020,0x80100020,
104 0x00108000,0x00100020,0x80008020,0x00000000,
105 0x80000000,0x00008000,0x00108020,0x80100000,
106 0x00100020,0x80000020,0x00000000,0x00108000,
107 0x00008020,0x80108000,0x80100000,0x00008020,
108 0x00000000,0x00108020,0x80100020,0x00100000,
109 0x80008020,0x80100000,0x80108000,0x00008000,
110 0x80100000,0x80008000,0x00000020,0x80108020,
111 0x00108020,0x00000020,0x00008000,0x80000000,
112 0x00008020,0x80108000,0x00100000,0x80000020,
113 0x00100020,0x80008020,0x80000020,0x00100020,
114 0x00108000,0x00000000,0x80008000,0x00008020,
115 0x80000000,0x80100020,0x80108020,0x00108000},
116 {
117 0x00000208,0x08020200,0x00000000,0x08020008,
118 0x08000200,0x00000000,0x00020208,0x08000200,
119 0x00020008,0x08000008,0x08000008,0x00020000,
120 0x08020208,0x00020008,0x08020000,0x00000208,
121 0x08000000,0x00000008,0x08020200,0x00000200,
122 0x00020200,0x08020000,0x08020008,0x00020208,
123 0x08000208,0x00020200,0x00020000,0x08000208,
124 0x00000008,0x08020208,0x00000200,0x08000000,
125 0x08020200,0x08000000,0x00020008,0x00000208,
126 0x00020000,0x08020200,0x08000200,0x00000000,
127 0x00000200,0x00020008,0x08020208,0x08000200,
128 0x08000008,0x00000200,0x00000000,0x08020008,
129 0x08000208,0x00020000,0x08000000,0x08020208,
130 0x00000008,0x00020208,0x00020200,0x08000008,
131 0x08020000,0x08000208,0x00000208,0x08020000,
132 0x00020208,0x00000008,0x08020008,0x00020200},
133 {
134 0x00802001,0x00002081,0x00002081,0x00000080,
135 0x00802080,0x00800081,0x00800001,0x00002001,
136 0x00000000,0x00802000,0x00802000,0x00802081,
137 0x00000081,0x00000000,0x00800080,0x00800001,
138 0x00000001,0x00002000,0x00800000,0x00802001,
139 0x00000080,0x00800000,0x00002001,0x00002080,
140 0x00800081,0x00000001,0x00002080,0x00800080,
141 0x00002000,0x00802080,0x00802081,0x00000081,
142 0x00800080,0x00800001,0x00802000,0x00802081,
143 0x00000081,0x00000000,0x00000000,0x00802000,
144 0x00002080,0x00800080,0x00800081,0x00000001,
145 0x00802001,0x00002081,0x00002081,0x00000080,
146 0x00802081,0x00000081,0x00000001,0x00002000,
147 0x00800001,0x00002001,0x00802080,0x00800081,
148 0x00002001,0x00002080,0x00800000,0x00802001,
149 0x00000080,0x00800000,0x00002000,0x00802080},
150 {
151 0x00000100,0x02080100,0x02080000,0x42000100,
152 0x00080000,0x00000100,0x40000000,0x02080000,
153 0x40080100,0x00080000,0x02000100,0x40080100,
154 0x42000100,0x42080000,0x00080100,0x40000000,
155 0x02000000,0x40080000,0x40080000,0x00000000,
156 0x40000100,0x42080100,0x42080100,0x02000100,
157 0x42080000,0x40000100,0x00000000,0x42000000,
158 0x02080100,0x02000000,0x42000000,0x00080100,
159 0x00080000,0x42000100,0x00000100,0x02000000,
160 0x40000000,0x02080000,0x42000100,0x40080100,
161 0x02000100,0x40000000,0x42080000,0x02080100,
162 0x40080100,0x00000100,0x02000000,0x42080000,
163 0x42080100,0x00080100,0x42000000,0x42080100,
164 0x02080000,0x00000000,0x40080000,0x42000000,
165 0x00080100,0x02000100,0x40000100,0x00080000,
166 0x00000000,0x40080000,0x02080100,0x40000100},
167 {
168 0x20000010,0x20400000,0x00004000,0x20404010,
169 0x20400000,0x00000010,0x20404010,0x00400000,
170 0x20004000,0x00404010,0x00400000,0x20000010,
171 0x00400010,0x20004000,0x20000000,0x00004010,
172 0x00000000,0x00400010,0x20004010,0x00004000,
173 0x00404000,0x20004010,0x00000010,0x20400010,
174 0x20400010,0x00000000,0x00404010,0x20404000,
175 0x00004010,0x00404000,0x20404000,0x20000000,
176 0x20004000,0x00000010,0x20400010,0x00404000,
177 0x20404010,0x00400000,0x00004010,0x20000010,
178 0x00400000,0x20004000,0x20000000,0x00004010,
179 0x20000010,0x20404010,0x00404000,0x20400000,
180 0x00404010,0x20404000,0x00000000,0x20400010,
181 0x00000010,0x00004000,0x20400000,0x00404010,
182 0x00004000,0x00400010,0x20004010,0x00000000,
183 0x20404000,0x20000000,0x00400010,0x20004010},
184 {
185 0x00200000,0x04200002,0x04000802,0x00000000,
186 0x00000800,0x04000802,0x00200802,0x04200800,
187 0x04200802,0x00200000,0x00000000,0x04000002,
188 0x00000002,0x04000000,0x04200002,0x00000802,
189 0x04000800,0x00200802,0x00200002,0x04000800,
190 0x04000002,0x04200000,0x04200800,0x00200002,
191 0x04200000,0x00000800,0x00000802,0x04200802,
192 0x00200800,0x00000002,0x04000000,0x00200800,
193 0x04000000,0x00200800,0x00200000,0x04000802,
194 0x04000802,0x04200002,0x04200002,0x00000002,
195 0x00200002,0x04000000,0x04000800,0x00200000,
196 0x04200800,0x00000802,0x00200802,0x04200800,
197 0x00000802,0x04000002,0x04200802,0x04200000,
198 0x00200800,0x00000000,0x00000002,0x04200802,
199 0x00000000,0x00200802,0x04200000,0x00000800,
200 0x04000002,0x04000800,0x00000800,0x00200002},
201 {
202 0x10001040,0x00001000,0x00040000,0x10041040,
203 0x10000000,0x10001040,0x00000040,0x10000000,
204 0x00040040,0x10040000,0x10041040,0x00041000,
205 0x10041000,0x00041040,0x00001000,0x00000040,
206 0x10040000,0x10000040,0x10001000,0x00001040,
207 0x00041000,0x00040040,0x10040040,0x10041000,
208 0x00001040,0x00000000,0x00000000,0x10040040,
209 0x10000040,0x10001000,0x00041040,0x00040000,
210 0x00041040,0x00040000,0x10041000,0x00001000,
211 0x00000040,0x10040040,0x00001000,0x00041040,
212 0x10001000,0x00000040,0x10000040,0x10040000,
213 0x10040040,0x10000000,0x00040000,0x10001040,
214 0x00000000,0x10041040,0x00040040,0x10000040,
215 0x10040000,0x10001000,0x10001040,0x00000000,
216 0x10041040,0x00041000,0x00041000,0x00001040,
217 0x00001040,0x00040040,0x10000000,0x10041000}
218 };
219 
220 
SetKey(const byte * key,word32,CipherDir dir)221 void BasicDES::SetKey(const byte* key, word32 /*length*/, CipherDir dir)
222 {
223     byte buffer[56+56+8];
224     byte *const pc1m = buffer;                 /* place to modify pc1 into */
225     byte *const pcr = pc1m + 56;               /* place to rotate pc1 into */
226     byte *const ks = pcr + 56;
227     register int i,j,l;
228     int m;
229 
230     for (j = 0; j < 56; j++) {          /* convert pc1 to bits of key */
231         l = pc1[j] - 1;                 /* integer bit location  */
232         m = l & 07;                     /* find bit              */
233         pc1m[j] = (key[l >> 3] &        /* find which key byte l is in */
234             bytebit[m])                 /* and which bit of that byte */
235             ? 1 : 0;                    /* and store 1-bit result */
236     }
237     for (i = 0; i < 16; i++) {          /* key chunk for each iteration */
238         memset(ks, 0, 8);               /* Clear key schedule */
239         for (j = 0; j < 56; j++)        /* rotate pc1 the right amount */
240             pcr[j] = pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l: l-28];
241         /* rotate left and right halves independently */
242         for (j = 0; j < 48; j++){   /* select bits individually */
243             /* check bit that goes to ks[j] */
244             if (pcr[pc2[j] - 1]){
245                 /* mask it in if it's there */
246                 l= j % 6;
247                 ks[j/6] |= bytebit[l] >> 2;
248             }
249         }
250         /* Now convert to odd/even interleaved form for use in F */
251         k_[2*i] = ((word32)ks[0] << 24)
252             | ((word32)ks[2] << 16)
253             | ((word32)ks[4] << 8)
254             | ((word32)ks[6]);
255         k_[2*i + 1] = ((word32)ks[1] << 24)
256             | ((word32)ks[3] << 16)
257             | ((word32)ks[5] << 8)
258             | ((word32)ks[7]);
259     }
260 
261     // reverse key schedule order
262     if (dir == DECRYPTION)
263         for (i = 0; i < 16; i += 2) {
264             STL::swap(k_[i],   k_[32 - 2 - i]);
265             STL::swap(k_[i+1], k_[32 - 1 - i]);
266         }
267 
268 }
269 
IPERM(word32 & left,word32 & right)270 static inline void IPERM(word32& left, word32& right)
271 {
272     word32 work;
273 
274     right = rotlFixed(right, 4U);
275     work = (left ^ right) & 0xf0f0f0f0;
276     left ^= work;
277 
278     right = rotrFixed(right^work, 20U);
279     work = (left ^ right) & 0xffff0000;
280     left ^= work;
281 
282     right = rotrFixed(right^work, 18U);
283     work = (left ^ right) & 0x33333333;
284     left ^= work;
285 
286     right = rotrFixed(right^work, 6U);
287     work = (left ^ right) & 0x00ff00ff;
288     left ^= work;
289 
290     right = rotlFixed(right^work, 9U);
291     work = (left ^ right) & 0xaaaaaaaa;
292     left = rotlFixed(left^work, 1U);
293     right ^= work;
294 }
295 
FPERM(word32 & left,word32 & right)296 static inline void FPERM(word32& left, word32& right)
297 {
298     word32 work;
299 
300     right = rotrFixed(right, 1U);
301     work = (left ^ right) & 0xaaaaaaaa;
302     right ^= work;
303     left = rotrFixed(left^work, 9U);
304     work = (left ^ right) & 0x00ff00ff;
305     right ^= work;
306     left = rotlFixed(left^work, 6U);
307     work = (left ^ right) & 0x33333333;
308     right ^= work;
309     left = rotlFixed(left^work, 18U);
310     work = (left ^ right) & 0xffff0000;
311     right ^= work;
312     left = rotlFixed(left^work, 20U);
313     work = (left ^ right) & 0xf0f0f0f0;
314     right ^= work;
315     left = rotrFixed(left^work, 4U);
316 }
317 
318 
RawProcessBlock(word32 & lIn,word32 & rIn) const319 void BasicDES::RawProcessBlock(word32& lIn, word32& rIn) const
320 {
321     word32 l = lIn, r = rIn;
322     const word32* kptr = k_;
323 
324     for (unsigned i=0; i<8; i++)
325     {
326         word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
327         l ^= Spbox[6][(work) & 0x3f]
328           ^  Spbox[4][(work >> 8) & 0x3f]
329           ^  Spbox[2][(work >> 16) & 0x3f]
330           ^  Spbox[0][(work >> 24) & 0x3f];
331         work = r ^ kptr[4*i+1];
332         l ^= Spbox[7][(work) & 0x3f]
333           ^  Spbox[5][(work >> 8) & 0x3f]
334           ^  Spbox[3][(work >> 16) & 0x3f]
335           ^  Spbox[1][(work >> 24) & 0x3f];
336 
337         work = rotrFixed(l, 4U) ^ kptr[4*i+2];
338         r ^= Spbox[6][(work) & 0x3f]
339           ^  Spbox[4][(work >> 8) & 0x3f]
340           ^  Spbox[2][(work >> 16) & 0x3f]
341           ^  Spbox[0][(work >> 24) & 0x3f];
342         work = l ^ kptr[4*i+3];
343         r ^= Spbox[7][(work) & 0x3f]
344           ^  Spbox[5][(work >> 8) & 0x3f]
345           ^  Spbox[3][(work >> 16) & 0x3f]
346           ^  Spbox[1][(work >> 24) & 0x3f];
347     }
348 
349     lIn = l; rIn = r;
350 }
351 
352 
353 
354 typedef BlockGetAndPut<word32, BigEndian> Block;
355 
356 
ProcessAndXorBlock(const byte * in,const byte * xOr,byte * out) const357 void DES::ProcessAndXorBlock(const byte* in, const byte* xOr, byte* out) const
358 {
359     word32 l,r;
360     Block::Get(in)(l)(r);
361     IPERM(l,r);
362 
363     RawProcessBlock(l, r);
364 
365     FPERM(l,r);
366     Block::Put(xOr, out)(r)(l);
367 }
368 
369 
SetKey(const byte * key,word32 sz,CipherDir dir)370 void DES_EDE2::SetKey(const byte* key, word32 sz, CipherDir dir)
371 {
372     des1_.SetKey(key, sz, dir);
373     des2_.SetKey(key + 8, sz, ReverseDir(dir));
374 }
375 
376 
ProcessAndXorBlock(const byte * in,const byte * xOr,byte * out) const377 void DES_EDE2::ProcessAndXorBlock(const byte* in, const byte* xOr,
378                                   byte* out) const
379 {
380     word32 l,r;
381     Block::Get(in)(l)(r);
382     IPERM(l,r);
383 
384     des1_.RawProcessBlock(l, r);
385     des2_.RawProcessBlock(r, l);
386     des1_.RawProcessBlock(l, r);
387 
388     FPERM(l,r);
389     Block::Put(xOr, out)(r)(l);
390 }
391 
392 
SetKey(const byte * key,word32 sz,CipherDir dir)393 void DES_EDE3::SetKey(const byte* key, word32 sz, CipherDir dir)
394 {
395     des1_.SetKey(key+(dir==ENCRYPTION?0:2*8), sz, dir);
396     des2_.SetKey(key+8, sz, ReverseDir(dir));
397     des3_.SetKey(key+(dir==DECRYPTION?0:2*8), sz, dir);
398 }
399 
400 
401 
402 #if defined(DO_DES_ASM)
403 
404 // ia32 optimized version
Process(byte * out,const byte * in,word32 sz)405 void DES_EDE3::Process(byte* out, const byte* in, word32 sz)
406 {
407     if (!isMMX) {
408         Mode_BASE::Process(out, in, sz);
409         return;
410     }
411 
412     word32 blocks = sz / DES_BLOCK_SIZE;
413 
414     if (mode_ == CBC)
415         if (dir_ == ENCRYPTION)
416             while (blocks--) {
417                 r_[0] ^= *(word32*)in;
418                 r_[1] ^= *(word32*)(in + 4);
419 
420                 AsmProcess((byte*)r_, (byte*)r_, (void*)Spbox);
421 
422                 memcpy(out, r_, DES_BLOCK_SIZE);
423 
424                 in  += DES_BLOCK_SIZE;
425                 out += DES_BLOCK_SIZE;
426             }
427         else
428             while (blocks--) {
429                 AsmProcess(in, out, (void*)Spbox);
430 
431                 *(word32*)out       ^= r_[0];
432                 *(word32*)(out + 4) ^= r_[1];
433 
434                 memcpy(r_, in, DES_BLOCK_SIZE);
435 
436                 out += DES_BLOCK_SIZE;
437                 in  += DES_BLOCK_SIZE;
438             }
439     else
440         while (blocks--) {
441             AsmProcess(in, out, (void*)Spbox);
442 
443             out += DES_BLOCK_SIZE;
444             in  += DES_BLOCK_SIZE;
445         }
446 }
447 
448 #endif // DO_DES_ASM
449 
450 
ProcessAndXorBlock(const byte * in,const byte * xOr,byte * out) const451 void DES_EDE3::ProcessAndXorBlock(const byte* in, const byte* xOr,
452                                   byte* out) const
453 {
454     word32 l,r;
455     Block::Get(in)(l)(r);
456     IPERM(l,r);
457 
458     des1_.RawProcessBlock(l, r);
459     des2_.RawProcessBlock(r, l);
460     des3_.RawProcessBlock(l, r);
461 
462     FPERM(l,r);
463     Block::Put(xOr, out)(r)(l);
464 }
465 
466 
467 #if defined(DO_DES_ASM)
468 
469 /* Uses IPERM algorithm from above
470 
471    left  is in eax
472    right is in ebx
473 
474    uses ecx
475 */
476 #define AsmIPERM() \
477     AS2(    rol   ebx, 4                        )   \
478     AS2(    mov   ecx, eax                      )   \
479     AS2(    xor   ecx, ebx                      )   \
480     AS2(    and   ecx, 0xf0f0f0f0               )   \
481     AS2(    xor   ebx, ecx                      )   \
482     AS2(    xor   eax, ecx                      )   \
483     AS2(    ror   ebx, 20                       )   \
484     AS2(    mov   ecx, eax                      )   \
485     AS2(    xor   ecx, ebx                      )   \
486     AS2(    and   ecx, 0xffff0000               )   \
487     AS2(    xor   ebx, ecx                      )   \
488     AS2(    xor   eax, ecx                      )   \
489     AS2(    ror   ebx, 18                       )   \
490     AS2(    mov   ecx, eax                      )   \
491     AS2(    xor   ecx, ebx                      )   \
492     AS2(    and   ecx, 0x33333333               )   \
493     AS2(    xor   ebx, ecx                      )   \
494     AS2(    xor   eax, ecx                      )   \
495     AS2(    ror   ebx, 6                        )   \
496     AS2(    mov   ecx, eax                      )   \
497     AS2(    xor   ecx, ebx                      )   \
498     AS2(    and   ecx, 0x00ff00ff               )   \
499     AS2(    xor   ebx, ecx                      )   \
500     AS2(    xor   eax, ecx                      )   \
501     AS2(    rol   ebx, 9                        )   \
502     AS2(    mov   ecx, eax                      )   \
503     AS2(    xor   ecx, ebx                      )   \
504     AS2(    and   ecx, 0xaaaaaaaa               )   \
505     AS2(    xor   eax, ecx                      )   \
506     AS2(    rol   eax, 1                        )   \
507     AS2(    xor   ebx, ecx                      )
508 
509 
510 /* Uses FPERM algorithm from above
511 
512    left  is in eax
513    right is in ebx
514 
515    uses ecx
516 */
517 #define AsmFPERM()    \
518     AS2(    ror  ebx, 1                     )    \
519     AS2(    mov  ecx, eax                   )    \
520     AS2(    xor  ecx, ebx                   )    \
521     AS2(    and  ecx, 0xaaaaaaaa            )    \
522     AS2(    xor  eax, ecx                   )    \
523     AS2(    xor  ebx, ecx                   )    \
524     AS2(    ror  eax, 9                     )    \
525     AS2(    mov  ecx, ebx                   )    \
526     AS2(    xor  ecx, eax                   )    \
527     AS2(    and  ecx, 0x00ff00ff            )    \
528     AS2(    xor  eax, ecx                   )    \
529     AS2(    xor  ebx, ecx                   )    \
530     AS2(    rol  eax, 6                     )    \
531     AS2(    mov  ecx, ebx                   )    \
532     AS2(    xor  ecx, eax                   )    \
533     AS2(    and  ecx, 0x33333333            )    \
534     AS2(    xor  eax, ecx                   )    \
535     AS2(    xor  ebx, ecx                   )    \
536     AS2(    rol  eax, 18                    )    \
537     AS2(    mov  ecx, ebx                   )    \
538     AS2(    xor  ecx, eax                   )    \
539     AS2(    and  ecx, 0xffff0000            )    \
540     AS2(    xor  eax, ecx                   )    \
541     AS2(    xor  ebx, ecx                   )    \
542     AS2(    rol  eax, 20                    )    \
543     AS2(    mov  ecx, ebx                   )    \
544     AS2(    xor  ecx, eax                   )    \
545     AS2(    and  ecx, 0xf0f0f0f0            )    \
546     AS2(    xor  eax, ecx                   )    \
547     AS2(    xor  ebx, ecx                   )    \
548     AS2(    ror  eax, 4                     )
549 
550 
551 
552 
553 /* DesRound implements this algorithm:
554 
555         word32 work = rotrFixed(r, 4U) ^ key[0];
556         l ^= Spbox[6][(work) & 0x3f]
557           ^  Spbox[4][(work >> 8) & 0x3f]
558           ^  Spbox[2][(work >> 16) & 0x3f]
559           ^  Spbox[0][(work >> 24) & 0x3f];
560         work = r ^ key[1];
561         l ^= Spbox[7][(work) & 0x3f]
562           ^  Spbox[5][(work >> 8) & 0x3f]
563           ^  Spbox[3][(work >> 16) & 0x3f]
564           ^  Spbox[1][(work >> 24) & 0x3f];
565 
566         work = rotrFixed(l, 4U) ^ key[2];
567         r ^= Spbox[6][(work) & 0x3f]
568           ^  Spbox[4][(work >> 8) & 0x3f]
569           ^  Spbox[2][(work >> 16) & 0x3f]
570           ^  Spbox[0][(work >> 24) & 0x3f];
571         work = l ^ key[3];
572         r ^= Spbox[7][(work) & 0x3f]
573           ^  Spbox[5][(work >> 8) & 0x3f]
574           ^  Spbox[3][(work >> 16) & 0x3f]
575           ^  Spbox[1][(work >> 24) & 0x3f];
576 
577    left  is in aex
578    right is in ebx
579    key   is in edx
580 
581    edvances key for next round
582 
583    uses ecx, esi, and edi
584 */
585 #define DesRound() \
586     AS2(    mov   ecx,  ebx                     )\
587     AS2(    mov   esi,  DWORD PTR [edx]         )\
588     AS2(    ror   ecx,  4                       )\
589     AS2(    xor   ecx,  esi                     )\
590     AS2(    and   ecx,  0x3f3f3f3f              )\
591     AS2(    movzx esi,  cl                      )\
592     AS2(    movzx edi,  ch                      )\
593     AS2(    xor   eax,  [ebp + esi*4 + 6*256]   )\
594     AS2(    shr   ecx,  16                      )\
595     AS2(    xor   eax,  [ebp + edi*4 + 4*256]   )\
596     AS2(    movzx esi,  cl                      )\
597     AS2(    movzx edi,  ch                      )\
598     AS2(    xor   eax,  [ebp + esi*4 + 2*256]   )\
599     AS2(    mov   esi,  DWORD PTR [edx + 4]     )\
600     AS2(    xor   eax,  [ebp + edi*4]           )\
601     AS2(    mov   ecx,  ebx                     )\
602     AS2(    xor   ecx,  esi                     )\
603     AS2(    and   ecx,  0x3f3f3f3f              )\
604     AS2(    movzx esi,  cl                      )\
605     AS2(    movzx edi,  ch                      )\
606     AS2(    xor   eax,  [ebp + esi*4 + 7*256]   )\
607     AS2(    shr   ecx,  16                      )\
608     AS2(    xor   eax,  [ebp + edi*4 + 5*256]   )\
609     AS2(    movzx esi,  cl                      )\
610     AS2(    movzx edi,  ch                      )\
611     AS2(    xor   eax,  [ebp + esi*4 + 3*256]   )\
612     AS2(    mov   esi,  DWORD PTR [edx + 8]     )\
613     AS2(    xor   eax,  [ebp + edi*4 + 1*256]   )\
614     AS2(    mov   ecx,  eax                     )\
615     AS2(    ror   ecx,  4                       )\
616     AS2(    xor   ecx,  esi                     )\
617     AS2(    and   ecx,  0x3f3f3f3f              )\
618     AS2(    movzx esi,  cl                      )\
619     AS2(    movzx edi,  ch                      )\
620     AS2(    xor   ebx,  [ebp + esi*4 + 6*256]   )\
621     AS2(    shr   ecx,  16                      )\
622     AS2(    xor   ebx,  [ebp + edi*4 + 4*256]   )\
623     AS2(    movzx esi,  cl                      )\
624     AS2(    movzx edi,  ch                      )\
625     AS2(    xor   ebx,  [ebp + esi*4 + 2*256]   )\
626     AS2(    mov   esi,  DWORD PTR [edx + 12]    )\
627     AS2(    xor   ebx,  [ebp + edi*4]           )\
628     AS2(    mov   ecx,  eax                     )\
629     AS2(    xor   ecx,  esi                     )\
630     AS2(    and   ecx,  0x3f3f3f3f              )\
631     AS2(    movzx esi,  cl                      )\
632     AS2(    movzx edi,  ch                      )\
633     AS2(    xor   ebx,  [ebp + esi*4 + 7*256]   )\
634     AS2(    shr   ecx,  16                      )\
635     AS2(    xor   ebx,  [ebp + edi*4 + 5*256]   )\
636     AS2(    movzx esi,  cl                      )\
637     AS2(    movzx edi,  ch                      )\
638     AS2(    xor   ebx,  [ebp + esi*4 + 3*256]   )\
639     AS2(    add   edx,  16                      )\
640     AS2(    xor   ebx,  [ebp + edi*4 + 1*256]   )
641 
642 
643 #ifdef _MSC_VER
644     __declspec(naked)
645 #else
646     __attribute__ ((noinline))
647 #endif
AsmProcess(const byte * in,byte * out,void * box) const648 void DES_EDE3::AsmProcess(const byte* in, byte* out, void* box) const
649 {
650 #ifdef __GNUC__
651     #define AS1(x)    #x ";"
652     #define AS2(x, y) #x ", " #y ";"
653 
654     #define PROLOG()  \
655     __asm__ __volatile__ \
656     ( \
657         ".intel_syntax noprefix;" \
658         "push ebx;" \
659         "push ebp;" \
660         "movd mm6, ebp;" \
661         "movd mm7, ecx;" \
662         "mov  ebp, eax;"
663     #define EPILOG()  \
664         "pop ebp;" \
665         "pop ebx;" \
666        	"emms;" \
667        	".att_syntax;" \
668             :  \
669             : "d" (this), "S" (in), "a" (box), "c" (out) \
670             : "%edi", "memory", "cc" \
671     );
672 
673 #else
674     #define AS1(x)      __asm x
675     #define AS2(x, y)   __asm x, y
676 
677     #define PROLOG()  \
678         AS1(    push  ebp                           )   \
679         AS2(    mov   ebp, esp                      )   \
680         AS2(    movd  mm3, edi                      )   \
681         AS2(    movd  mm4, ebx                      )   \
682         AS2(    movd  mm5, esi                      )   \
683         AS2(    movd  mm6, ebp                      )   \
684         AS2(    mov   esi, DWORD PTR [ebp +  8]     )   \
685         AS2(    mov   edx, ecx                      )   \
686         AS2(    mov   ebp, DWORD PTR [ebp + 16]     )
687 
688     // ebp restored at end
689     #define EPILOG() \
690         AS2(    movd  edi, mm3                      )   \
691         AS2(    movd  ebx, mm4                      )   \
692         AS2(    movd  esi, mm5                      )   \
693         AS2(    mov   esp, ebp                      )   \
694         AS1(    pop   ebp                           )   \
695         AS1(    emms                                )   \
696         AS1(    ret 12                              )
697 
698 #endif
699 
700 
701     PROLOG()
702 
703     AS2(    movd  mm2, edx                      )
704 
705     #ifdef OLD_GCC_OFFSET
706         AS2(    add   edx, 60                       )   // des1 = des1 key
707     #else
708         AS2(    add   edx, 56                       )   // des1 = des1 key
709     #endif
710 
711     AS2(    mov   eax, DWORD PTR [esi]          )
712     AS2(    mov   ebx, DWORD PTR [esi + 4]      )
713     AS1(    bswap eax                           )    // left
714     AS1(    bswap ebx                           )    // right
715 
716     AsmIPERM()
717 
718     DesRound() // 1
719     DesRound() // 2
720     DesRound() // 3
721     DesRound() // 4
722     DesRound() // 5
723     DesRound() // 6
724     DesRound() // 7
725     DesRound() // 8
726 
727     // swap left and right
728     AS2(    xchg  eax, ebx                      )
729 
730     DesRound() // 1
731     DesRound() // 2
732     DesRound() // 3
733     DesRound() // 4
734     DesRound() // 5
735     DesRound() // 6
736     DesRound() // 7
737     DesRound() // 8
738 
739     // swap left and right
740     AS2(    xchg  eax, ebx                      )
741 
742     DesRound() // 1
743     DesRound() // 2
744     DesRound() // 3
745     DesRound() // 4
746     DesRound() // 5
747     DesRound() // 6
748     DesRound() // 7
749     DesRound() // 8
750 
751     AsmFPERM()
752 
753     //end
754     AS2(    movd  ebp, mm6                      )
755 
756     // swap and write out
757     AS1(    bswap ebx                           )
758     AS1(    bswap eax                           )
759 
760 #ifdef __GNUC__
761     AS2(    movd  esi, mm7   )   // outBlock
762 #else
763     AS2(    mov   esi, DWORD PTR [ebp +  12]    )   // outBlock
764 #endif
765 
766     AS2(    mov   DWORD PTR [esi],     ebx      )   // right first
767     AS2(    mov   DWORD PTR [esi + 4], eax      )
768 
769 
770     EPILOG()
771 }
772 
773 
774 
775 #endif // defined(DO_DES_ASM)
776 
777 
778 }  // namespace
779