1 /*
2 Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; see the file COPYING. If not, write to the
15 Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
16 MA 02110-1301 USA.
17 */
18
19 /* C++ part based on Wei Dai's des.cpp from CryptoPP */
20 /* x86 asm is original */
21
22
23 #if defined(TAOCRYPT_KERNEL_MODE)
24 #define DO_TAOCRYPT_KERNEL_MODE
25 #endif // only some modules now support this
26
27
28 #include "runtime.hpp"
29 #include "des.hpp"
30 #ifdef USE_SYS_STL
31 #include <algorithm>
32 #else
33 #include "algorithm.hpp"
34 #endif
35
36
37 namespace STL = STL_NAMESPACE;
38
39
40
41 namespace TaoCrypt {
42
43
44 /* permuted choice table (key) */
45 static const byte pc1[] = {
46 57, 49, 41, 33, 25, 17, 9,
47 1, 58, 50, 42, 34, 26, 18,
48 10, 2, 59, 51, 43, 35, 27,
49 19, 11, 3, 60, 52, 44, 36,
50
51 63, 55, 47, 39, 31, 23, 15,
52 7, 62, 54, 46, 38, 30, 22,
53 14, 6, 61, 53, 45, 37, 29,
54 21, 13, 5, 28, 20, 12, 4
55 };
56
57 /* number left rotations of pc1 */
58 static const byte totrot[] = {
59 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
60 };
61
62 /* permuted choice key (table) */
63 static const byte pc2[] = {
64 14, 17, 11, 24, 1, 5,
65 3, 28, 15, 6, 21, 10,
66 23, 19, 12, 4, 26, 8,
67 16, 7, 27, 20, 13, 2,
68 41, 52, 31, 37, 47, 55,
69 30, 40, 51, 45, 33, 48,
70 44, 49, 39, 56, 34, 53,
71 46, 42, 50, 36, 29, 32
72 };
73
74 /* End of DES-defined tables */
75
76 /* bit 0 is left-most in byte */
77 static const int bytebit[] = {
78 0200,0100,040,020,010,04,02,01
79 };
80
81 const word32 Spbox[8][64] = {
82 {
83 0x01010400,0x00000000,0x00010000,0x01010404,
84 0x01010004,0x00010404,0x00000004,0x00010000,
85 0x00000400,0x01010400,0x01010404,0x00000400,
86 0x01000404,0x01010004,0x01000000,0x00000004,
87 0x00000404,0x01000400,0x01000400,0x00010400,
88 0x00010400,0x01010000,0x01010000,0x01000404,
89 0x00010004,0x01000004,0x01000004,0x00010004,
90 0x00000000,0x00000404,0x00010404,0x01000000,
91 0x00010000,0x01010404,0x00000004,0x01010000,
92 0x01010400,0x01000000,0x01000000,0x00000400,
93 0x01010004,0x00010000,0x00010400,0x01000004,
94 0x00000400,0x00000004,0x01000404,0x00010404,
95 0x01010404,0x00010004,0x01010000,0x01000404,
96 0x01000004,0x00000404,0x00010404,0x01010400,
97 0x00000404,0x01000400,0x01000400,0x00000000,
98 0x00010004,0x00010400,0x00000000,0x01010004},
99 {
100 0x80108020,0x80008000,0x00008000,0x00108020,
101 0x00100000,0x00000020,0x80100020,0x80008020,
102 0x80000020,0x80108020,0x80108000,0x80000000,
103 0x80008000,0x00100000,0x00000020,0x80100020,
104 0x00108000,0x00100020,0x80008020,0x00000000,
105 0x80000000,0x00008000,0x00108020,0x80100000,
106 0x00100020,0x80000020,0x00000000,0x00108000,
107 0x00008020,0x80108000,0x80100000,0x00008020,
108 0x00000000,0x00108020,0x80100020,0x00100000,
109 0x80008020,0x80100000,0x80108000,0x00008000,
110 0x80100000,0x80008000,0x00000020,0x80108020,
111 0x00108020,0x00000020,0x00008000,0x80000000,
112 0x00008020,0x80108000,0x00100000,0x80000020,
113 0x00100020,0x80008020,0x80000020,0x00100020,
114 0x00108000,0x00000000,0x80008000,0x00008020,
115 0x80000000,0x80100020,0x80108020,0x00108000},
116 {
117 0x00000208,0x08020200,0x00000000,0x08020008,
118 0x08000200,0x00000000,0x00020208,0x08000200,
119 0x00020008,0x08000008,0x08000008,0x00020000,
120 0x08020208,0x00020008,0x08020000,0x00000208,
121 0x08000000,0x00000008,0x08020200,0x00000200,
122 0x00020200,0x08020000,0x08020008,0x00020208,
123 0x08000208,0x00020200,0x00020000,0x08000208,
124 0x00000008,0x08020208,0x00000200,0x08000000,
125 0x08020200,0x08000000,0x00020008,0x00000208,
126 0x00020000,0x08020200,0x08000200,0x00000000,
127 0x00000200,0x00020008,0x08020208,0x08000200,
128 0x08000008,0x00000200,0x00000000,0x08020008,
129 0x08000208,0x00020000,0x08000000,0x08020208,
130 0x00000008,0x00020208,0x00020200,0x08000008,
131 0x08020000,0x08000208,0x00000208,0x08020000,
132 0x00020208,0x00000008,0x08020008,0x00020200},
133 {
134 0x00802001,0x00002081,0x00002081,0x00000080,
135 0x00802080,0x00800081,0x00800001,0x00002001,
136 0x00000000,0x00802000,0x00802000,0x00802081,
137 0x00000081,0x00000000,0x00800080,0x00800001,
138 0x00000001,0x00002000,0x00800000,0x00802001,
139 0x00000080,0x00800000,0x00002001,0x00002080,
140 0x00800081,0x00000001,0x00002080,0x00800080,
141 0x00002000,0x00802080,0x00802081,0x00000081,
142 0x00800080,0x00800001,0x00802000,0x00802081,
143 0x00000081,0x00000000,0x00000000,0x00802000,
144 0x00002080,0x00800080,0x00800081,0x00000001,
145 0x00802001,0x00002081,0x00002081,0x00000080,
146 0x00802081,0x00000081,0x00000001,0x00002000,
147 0x00800001,0x00002001,0x00802080,0x00800081,
148 0x00002001,0x00002080,0x00800000,0x00802001,
149 0x00000080,0x00800000,0x00002000,0x00802080},
150 {
151 0x00000100,0x02080100,0x02080000,0x42000100,
152 0x00080000,0x00000100,0x40000000,0x02080000,
153 0x40080100,0x00080000,0x02000100,0x40080100,
154 0x42000100,0x42080000,0x00080100,0x40000000,
155 0x02000000,0x40080000,0x40080000,0x00000000,
156 0x40000100,0x42080100,0x42080100,0x02000100,
157 0x42080000,0x40000100,0x00000000,0x42000000,
158 0x02080100,0x02000000,0x42000000,0x00080100,
159 0x00080000,0x42000100,0x00000100,0x02000000,
160 0x40000000,0x02080000,0x42000100,0x40080100,
161 0x02000100,0x40000000,0x42080000,0x02080100,
162 0x40080100,0x00000100,0x02000000,0x42080000,
163 0x42080100,0x00080100,0x42000000,0x42080100,
164 0x02080000,0x00000000,0x40080000,0x42000000,
165 0x00080100,0x02000100,0x40000100,0x00080000,
166 0x00000000,0x40080000,0x02080100,0x40000100},
167 {
168 0x20000010,0x20400000,0x00004000,0x20404010,
169 0x20400000,0x00000010,0x20404010,0x00400000,
170 0x20004000,0x00404010,0x00400000,0x20000010,
171 0x00400010,0x20004000,0x20000000,0x00004010,
172 0x00000000,0x00400010,0x20004010,0x00004000,
173 0x00404000,0x20004010,0x00000010,0x20400010,
174 0x20400010,0x00000000,0x00404010,0x20404000,
175 0x00004010,0x00404000,0x20404000,0x20000000,
176 0x20004000,0x00000010,0x20400010,0x00404000,
177 0x20404010,0x00400000,0x00004010,0x20000010,
178 0x00400000,0x20004000,0x20000000,0x00004010,
179 0x20000010,0x20404010,0x00404000,0x20400000,
180 0x00404010,0x20404000,0x00000000,0x20400010,
181 0x00000010,0x00004000,0x20400000,0x00404010,
182 0x00004000,0x00400010,0x20004010,0x00000000,
183 0x20404000,0x20000000,0x00400010,0x20004010},
184 {
185 0x00200000,0x04200002,0x04000802,0x00000000,
186 0x00000800,0x04000802,0x00200802,0x04200800,
187 0x04200802,0x00200000,0x00000000,0x04000002,
188 0x00000002,0x04000000,0x04200002,0x00000802,
189 0x04000800,0x00200802,0x00200002,0x04000800,
190 0x04000002,0x04200000,0x04200800,0x00200002,
191 0x04200000,0x00000800,0x00000802,0x04200802,
192 0x00200800,0x00000002,0x04000000,0x00200800,
193 0x04000000,0x00200800,0x00200000,0x04000802,
194 0x04000802,0x04200002,0x04200002,0x00000002,
195 0x00200002,0x04000000,0x04000800,0x00200000,
196 0x04200800,0x00000802,0x00200802,0x04200800,
197 0x00000802,0x04000002,0x04200802,0x04200000,
198 0x00200800,0x00000000,0x00000002,0x04200802,
199 0x00000000,0x00200802,0x04200000,0x00000800,
200 0x04000002,0x04000800,0x00000800,0x00200002},
201 {
202 0x10001040,0x00001000,0x00040000,0x10041040,
203 0x10000000,0x10001040,0x00000040,0x10000000,
204 0x00040040,0x10040000,0x10041040,0x00041000,
205 0x10041000,0x00041040,0x00001000,0x00000040,
206 0x10040000,0x10000040,0x10001000,0x00001040,
207 0x00041000,0x00040040,0x10040040,0x10041000,
208 0x00001040,0x00000000,0x00000000,0x10040040,
209 0x10000040,0x10001000,0x00041040,0x00040000,
210 0x00041040,0x00040000,0x10041000,0x00001000,
211 0x00000040,0x10040040,0x00001000,0x00041040,
212 0x10001000,0x00000040,0x10000040,0x10040000,
213 0x10040040,0x10000000,0x00040000,0x10001040,
214 0x00000000,0x10041040,0x00040040,0x10000040,
215 0x10040000,0x10001000,0x10001040,0x00000000,
216 0x10041040,0x00041000,0x00041000,0x00001040,
217 0x00001040,0x00040040,0x10000000,0x10041000}
218 };
219
220
SetKey(const byte * key,word32,CipherDir dir)221 void BasicDES::SetKey(const byte* key, word32 /*length*/, CipherDir dir)
222 {
223 byte buffer[56+56+8];
224 byte *const pc1m = buffer; /* place to modify pc1 into */
225 byte *const pcr = pc1m + 56; /* place to rotate pc1 into */
226 byte *const ks = pcr + 56;
227 register int i,j,l;
228 int m;
229
230 for (j = 0; j < 56; j++) { /* convert pc1 to bits of key */
231 l = pc1[j] - 1; /* integer bit location */
232 m = l & 07; /* find bit */
233 pc1m[j] = (key[l >> 3] & /* find which key byte l is in */
234 bytebit[m]) /* and which bit of that byte */
235 ? 1 : 0; /* and store 1-bit result */
236 }
237 for (i = 0; i < 16; i++) { /* key chunk for each iteration */
238 memset(ks, 0, 8); /* Clear key schedule */
239 for (j = 0; j < 56; j++) /* rotate pc1 the right amount */
240 pcr[j] = pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l: l-28];
241 /* rotate left and right halves independently */
242 for (j = 0; j < 48; j++){ /* select bits individually */
243 /* check bit that goes to ks[j] */
244 if (pcr[pc2[j] - 1]){
245 /* mask it in if it's there */
246 l= j % 6;
247 ks[j/6] |= bytebit[l] >> 2;
248 }
249 }
250 /* Now convert to odd/even interleaved form for use in F */
251 k_[2*i] = ((word32)ks[0] << 24)
252 | ((word32)ks[2] << 16)
253 | ((word32)ks[4] << 8)
254 | ((word32)ks[6]);
255 k_[2*i + 1] = ((word32)ks[1] << 24)
256 | ((word32)ks[3] << 16)
257 | ((word32)ks[5] << 8)
258 | ((word32)ks[7]);
259 }
260
261 // reverse key schedule order
262 if (dir == DECRYPTION)
263 for (i = 0; i < 16; i += 2) {
264 STL::swap(k_[i], k_[32 - 2 - i]);
265 STL::swap(k_[i+1], k_[32 - 1 - i]);
266 }
267
268 }
269
IPERM(word32 & left,word32 & right)270 static inline void IPERM(word32& left, word32& right)
271 {
272 word32 work;
273
274 right = rotlFixed(right, 4U);
275 work = (left ^ right) & 0xf0f0f0f0;
276 left ^= work;
277
278 right = rotrFixed(right^work, 20U);
279 work = (left ^ right) & 0xffff0000;
280 left ^= work;
281
282 right = rotrFixed(right^work, 18U);
283 work = (left ^ right) & 0x33333333;
284 left ^= work;
285
286 right = rotrFixed(right^work, 6U);
287 work = (left ^ right) & 0x00ff00ff;
288 left ^= work;
289
290 right = rotlFixed(right^work, 9U);
291 work = (left ^ right) & 0xaaaaaaaa;
292 left = rotlFixed(left^work, 1U);
293 right ^= work;
294 }
295
FPERM(word32 & left,word32 & right)296 static inline void FPERM(word32& left, word32& right)
297 {
298 word32 work;
299
300 right = rotrFixed(right, 1U);
301 work = (left ^ right) & 0xaaaaaaaa;
302 right ^= work;
303 left = rotrFixed(left^work, 9U);
304 work = (left ^ right) & 0x00ff00ff;
305 right ^= work;
306 left = rotlFixed(left^work, 6U);
307 work = (left ^ right) & 0x33333333;
308 right ^= work;
309 left = rotlFixed(left^work, 18U);
310 work = (left ^ right) & 0xffff0000;
311 right ^= work;
312 left = rotlFixed(left^work, 20U);
313 work = (left ^ right) & 0xf0f0f0f0;
314 right ^= work;
315 left = rotrFixed(left^work, 4U);
316 }
317
318
RawProcessBlock(word32 & lIn,word32 & rIn) const319 void BasicDES::RawProcessBlock(word32& lIn, word32& rIn) const
320 {
321 word32 l = lIn, r = rIn;
322 const word32* kptr = k_;
323
324 for (unsigned i=0; i<8; i++)
325 {
326 word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
327 l ^= Spbox[6][(work) & 0x3f]
328 ^ Spbox[4][(work >> 8) & 0x3f]
329 ^ Spbox[2][(work >> 16) & 0x3f]
330 ^ Spbox[0][(work >> 24) & 0x3f];
331 work = r ^ kptr[4*i+1];
332 l ^= Spbox[7][(work) & 0x3f]
333 ^ Spbox[5][(work >> 8) & 0x3f]
334 ^ Spbox[3][(work >> 16) & 0x3f]
335 ^ Spbox[1][(work >> 24) & 0x3f];
336
337 work = rotrFixed(l, 4U) ^ kptr[4*i+2];
338 r ^= Spbox[6][(work) & 0x3f]
339 ^ Spbox[4][(work >> 8) & 0x3f]
340 ^ Spbox[2][(work >> 16) & 0x3f]
341 ^ Spbox[0][(work >> 24) & 0x3f];
342 work = l ^ kptr[4*i+3];
343 r ^= Spbox[7][(work) & 0x3f]
344 ^ Spbox[5][(work >> 8) & 0x3f]
345 ^ Spbox[3][(work >> 16) & 0x3f]
346 ^ Spbox[1][(work >> 24) & 0x3f];
347 }
348
349 lIn = l; rIn = r;
350 }
351
352
353
354 typedef BlockGetAndPut<word32, BigEndian> Block;
355
356
ProcessAndXorBlock(const byte * in,const byte * xOr,byte * out) const357 void DES::ProcessAndXorBlock(const byte* in, const byte* xOr, byte* out) const
358 {
359 word32 l,r;
360 Block::Get(in)(l)(r);
361 IPERM(l,r);
362
363 RawProcessBlock(l, r);
364
365 FPERM(l,r);
366 Block::Put(xOr, out)(r)(l);
367 }
368
369
SetKey(const byte * key,word32 sz,CipherDir dir)370 void DES_EDE2::SetKey(const byte* key, word32 sz, CipherDir dir)
371 {
372 des1_.SetKey(key, sz, dir);
373 des2_.SetKey(key + 8, sz, ReverseDir(dir));
374 }
375
376
ProcessAndXorBlock(const byte * in,const byte * xOr,byte * out) const377 void DES_EDE2::ProcessAndXorBlock(const byte* in, const byte* xOr,
378 byte* out) const
379 {
380 word32 l,r;
381 Block::Get(in)(l)(r);
382 IPERM(l,r);
383
384 des1_.RawProcessBlock(l, r);
385 des2_.RawProcessBlock(r, l);
386 des1_.RawProcessBlock(l, r);
387
388 FPERM(l,r);
389 Block::Put(xOr, out)(r)(l);
390 }
391
392
SetKey(const byte * key,word32 sz,CipherDir dir)393 void DES_EDE3::SetKey(const byte* key, word32 sz, CipherDir dir)
394 {
395 des1_.SetKey(key+(dir==ENCRYPTION?0:2*8), sz, dir);
396 des2_.SetKey(key+8, sz, ReverseDir(dir));
397 des3_.SetKey(key+(dir==DECRYPTION?0:2*8), sz, dir);
398 }
399
400
401
402 #if defined(DO_DES_ASM)
403
404 // ia32 optimized version
Process(byte * out,const byte * in,word32 sz)405 void DES_EDE3::Process(byte* out, const byte* in, word32 sz)
406 {
407 if (!isMMX) {
408 Mode_BASE::Process(out, in, sz);
409 return;
410 }
411
412 word32 blocks = sz / DES_BLOCK_SIZE;
413
414 if (mode_ == CBC)
415 if (dir_ == ENCRYPTION)
416 while (blocks--) {
417 r_[0] ^= *(word32*)in;
418 r_[1] ^= *(word32*)(in + 4);
419
420 AsmProcess((byte*)r_, (byte*)r_, (void*)Spbox);
421
422 memcpy(out, r_, DES_BLOCK_SIZE);
423
424 in += DES_BLOCK_SIZE;
425 out += DES_BLOCK_SIZE;
426 }
427 else
428 while (blocks--) {
429 AsmProcess(in, out, (void*)Spbox);
430
431 *(word32*)out ^= r_[0];
432 *(word32*)(out + 4) ^= r_[1];
433
434 memcpy(r_, in, DES_BLOCK_SIZE);
435
436 out += DES_BLOCK_SIZE;
437 in += DES_BLOCK_SIZE;
438 }
439 else
440 while (blocks--) {
441 AsmProcess(in, out, (void*)Spbox);
442
443 out += DES_BLOCK_SIZE;
444 in += DES_BLOCK_SIZE;
445 }
446 }
447
448 #endif // DO_DES_ASM
449
450
ProcessAndXorBlock(const byte * in,const byte * xOr,byte * out) const451 void DES_EDE3::ProcessAndXorBlock(const byte* in, const byte* xOr,
452 byte* out) const
453 {
454 word32 l,r;
455 Block::Get(in)(l)(r);
456 IPERM(l,r);
457
458 des1_.RawProcessBlock(l, r);
459 des2_.RawProcessBlock(r, l);
460 des3_.RawProcessBlock(l, r);
461
462 FPERM(l,r);
463 Block::Put(xOr, out)(r)(l);
464 }
465
466
467 #if defined(DO_DES_ASM)
468
469 /* Uses IPERM algorithm from above
470
471 left is in eax
472 right is in ebx
473
474 uses ecx
475 */
476 #define AsmIPERM() \
477 AS2( rol ebx, 4 ) \
478 AS2( mov ecx, eax ) \
479 AS2( xor ecx, ebx ) \
480 AS2( and ecx, 0xf0f0f0f0 ) \
481 AS2( xor ebx, ecx ) \
482 AS2( xor eax, ecx ) \
483 AS2( ror ebx, 20 ) \
484 AS2( mov ecx, eax ) \
485 AS2( xor ecx, ebx ) \
486 AS2( and ecx, 0xffff0000 ) \
487 AS2( xor ebx, ecx ) \
488 AS2( xor eax, ecx ) \
489 AS2( ror ebx, 18 ) \
490 AS2( mov ecx, eax ) \
491 AS2( xor ecx, ebx ) \
492 AS2( and ecx, 0x33333333 ) \
493 AS2( xor ebx, ecx ) \
494 AS2( xor eax, ecx ) \
495 AS2( ror ebx, 6 ) \
496 AS2( mov ecx, eax ) \
497 AS2( xor ecx, ebx ) \
498 AS2( and ecx, 0x00ff00ff ) \
499 AS2( xor ebx, ecx ) \
500 AS2( xor eax, ecx ) \
501 AS2( rol ebx, 9 ) \
502 AS2( mov ecx, eax ) \
503 AS2( xor ecx, ebx ) \
504 AS2( and ecx, 0xaaaaaaaa ) \
505 AS2( xor eax, ecx ) \
506 AS2( rol eax, 1 ) \
507 AS2( xor ebx, ecx )
508
509
510 /* Uses FPERM algorithm from above
511
512 left is in eax
513 right is in ebx
514
515 uses ecx
516 */
517 #define AsmFPERM() \
518 AS2( ror ebx, 1 ) \
519 AS2( mov ecx, eax ) \
520 AS2( xor ecx, ebx ) \
521 AS2( and ecx, 0xaaaaaaaa ) \
522 AS2( xor eax, ecx ) \
523 AS2( xor ebx, ecx ) \
524 AS2( ror eax, 9 ) \
525 AS2( mov ecx, ebx ) \
526 AS2( xor ecx, eax ) \
527 AS2( and ecx, 0x00ff00ff ) \
528 AS2( xor eax, ecx ) \
529 AS2( xor ebx, ecx ) \
530 AS2( rol eax, 6 ) \
531 AS2( mov ecx, ebx ) \
532 AS2( xor ecx, eax ) \
533 AS2( and ecx, 0x33333333 ) \
534 AS2( xor eax, ecx ) \
535 AS2( xor ebx, ecx ) \
536 AS2( rol eax, 18 ) \
537 AS2( mov ecx, ebx ) \
538 AS2( xor ecx, eax ) \
539 AS2( and ecx, 0xffff0000 ) \
540 AS2( xor eax, ecx ) \
541 AS2( xor ebx, ecx ) \
542 AS2( rol eax, 20 ) \
543 AS2( mov ecx, ebx ) \
544 AS2( xor ecx, eax ) \
545 AS2( and ecx, 0xf0f0f0f0 ) \
546 AS2( xor eax, ecx ) \
547 AS2( xor ebx, ecx ) \
548 AS2( ror eax, 4 )
549
550
551
552
553 /* DesRound implements this algorithm:
554
555 word32 work = rotrFixed(r, 4U) ^ key[0];
556 l ^= Spbox[6][(work) & 0x3f]
557 ^ Spbox[4][(work >> 8) & 0x3f]
558 ^ Spbox[2][(work >> 16) & 0x3f]
559 ^ Spbox[0][(work >> 24) & 0x3f];
560 work = r ^ key[1];
561 l ^= Spbox[7][(work) & 0x3f]
562 ^ Spbox[5][(work >> 8) & 0x3f]
563 ^ Spbox[3][(work >> 16) & 0x3f]
564 ^ Spbox[1][(work >> 24) & 0x3f];
565
566 work = rotrFixed(l, 4U) ^ key[2];
567 r ^= Spbox[6][(work) & 0x3f]
568 ^ Spbox[4][(work >> 8) & 0x3f]
569 ^ Spbox[2][(work >> 16) & 0x3f]
570 ^ Spbox[0][(work >> 24) & 0x3f];
571 work = l ^ key[3];
572 r ^= Spbox[7][(work) & 0x3f]
573 ^ Spbox[5][(work >> 8) & 0x3f]
574 ^ Spbox[3][(work >> 16) & 0x3f]
575 ^ Spbox[1][(work >> 24) & 0x3f];
576
577 left is in aex
578 right is in ebx
579 key is in edx
580
581 edvances key for next round
582
583 uses ecx, esi, and edi
584 */
585 #define DesRound() \
586 AS2( mov ecx, ebx )\
587 AS2( mov esi, DWORD PTR [edx] )\
588 AS2( ror ecx, 4 )\
589 AS2( xor ecx, esi )\
590 AS2( and ecx, 0x3f3f3f3f )\
591 AS2( movzx esi, cl )\
592 AS2( movzx edi, ch )\
593 AS2( xor eax, [ebp + esi*4 + 6*256] )\
594 AS2( shr ecx, 16 )\
595 AS2( xor eax, [ebp + edi*4 + 4*256] )\
596 AS2( movzx esi, cl )\
597 AS2( movzx edi, ch )\
598 AS2( xor eax, [ebp + esi*4 + 2*256] )\
599 AS2( mov esi, DWORD PTR [edx + 4] )\
600 AS2( xor eax, [ebp + edi*4] )\
601 AS2( mov ecx, ebx )\
602 AS2( xor ecx, esi )\
603 AS2( and ecx, 0x3f3f3f3f )\
604 AS2( movzx esi, cl )\
605 AS2( movzx edi, ch )\
606 AS2( xor eax, [ebp + esi*4 + 7*256] )\
607 AS2( shr ecx, 16 )\
608 AS2( xor eax, [ebp + edi*4 + 5*256] )\
609 AS2( movzx esi, cl )\
610 AS2( movzx edi, ch )\
611 AS2( xor eax, [ebp + esi*4 + 3*256] )\
612 AS2( mov esi, DWORD PTR [edx + 8] )\
613 AS2( xor eax, [ebp + edi*4 + 1*256] )\
614 AS2( mov ecx, eax )\
615 AS2( ror ecx, 4 )\
616 AS2( xor ecx, esi )\
617 AS2( and ecx, 0x3f3f3f3f )\
618 AS2( movzx esi, cl )\
619 AS2( movzx edi, ch )\
620 AS2( xor ebx, [ebp + esi*4 + 6*256] )\
621 AS2( shr ecx, 16 )\
622 AS2( xor ebx, [ebp + edi*4 + 4*256] )\
623 AS2( movzx esi, cl )\
624 AS2( movzx edi, ch )\
625 AS2( xor ebx, [ebp + esi*4 + 2*256] )\
626 AS2( mov esi, DWORD PTR [edx + 12] )\
627 AS2( xor ebx, [ebp + edi*4] )\
628 AS2( mov ecx, eax )\
629 AS2( xor ecx, esi )\
630 AS2( and ecx, 0x3f3f3f3f )\
631 AS2( movzx esi, cl )\
632 AS2( movzx edi, ch )\
633 AS2( xor ebx, [ebp + esi*4 + 7*256] )\
634 AS2( shr ecx, 16 )\
635 AS2( xor ebx, [ebp + edi*4 + 5*256] )\
636 AS2( movzx esi, cl )\
637 AS2( movzx edi, ch )\
638 AS2( xor ebx, [ebp + esi*4 + 3*256] )\
639 AS2( add edx, 16 )\
640 AS2( xor ebx, [ebp + edi*4 + 1*256] )
641
642
643 #ifdef _MSC_VER
644 __declspec(naked)
645 #else
646 __attribute__ ((noinline))
647 #endif
AsmProcess(const byte * in,byte * out,void * box) const648 void DES_EDE3::AsmProcess(const byte* in, byte* out, void* box) const
649 {
650 #ifdef __GNUC__
651 #define AS1(x) #x ";"
652 #define AS2(x, y) #x ", " #y ";"
653
654 #define PROLOG() \
655 __asm__ __volatile__ \
656 ( \
657 ".intel_syntax noprefix;" \
658 "push ebx;" \
659 "push ebp;" \
660 "movd mm6, ebp;" \
661 "movd mm7, ecx;" \
662 "mov ebp, eax;"
663 #define EPILOG() \
664 "pop ebp;" \
665 "pop ebx;" \
666 "emms;" \
667 ".att_syntax;" \
668 : \
669 : "d" (this), "S" (in), "a" (box), "c" (out) \
670 : "%edi", "memory", "cc" \
671 );
672
673 #else
674 #define AS1(x) __asm x
675 #define AS2(x, y) __asm x, y
676
677 #define PROLOG() \
678 AS1( push ebp ) \
679 AS2( mov ebp, esp ) \
680 AS2( movd mm3, edi ) \
681 AS2( movd mm4, ebx ) \
682 AS2( movd mm5, esi ) \
683 AS2( movd mm6, ebp ) \
684 AS2( mov esi, DWORD PTR [ebp + 8] ) \
685 AS2( mov edx, ecx ) \
686 AS2( mov ebp, DWORD PTR [ebp + 16] )
687
688 // ebp restored at end
689 #define EPILOG() \
690 AS2( movd edi, mm3 ) \
691 AS2( movd ebx, mm4 ) \
692 AS2( movd esi, mm5 ) \
693 AS2( mov esp, ebp ) \
694 AS1( pop ebp ) \
695 AS1( emms ) \
696 AS1( ret 12 )
697
698 #endif
699
700
701 PROLOG()
702
703 AS2( movd mm2, edx )
704
705 #ifdef OLD_GCC_OFFSET
706 AS2( add edx, 60 ) // des1 = des1 key
707 #else
708 AS2( add edx, 56 ) // des1 = des1 key
709 #endif
710
711 AS2( mov eax, DWORD PTR [esi] )
712 AS2( mov ebx, DWORD PTR [esi + 4] )
713 AS1( bswap eax ) // left
714 AS1( bswap ebx ) // right
715
716 AsmIPERM()
717
718 DesRound() // 1
719 DesRound() // 2
720 DesRound() // 3
721 DesRound() // 4
722 DesRound() // 5
723 DesRound() // 6
724 DesRound() // 7
725 DesRound() // 8
726
727 // swap left and right
728 AS2( xchg eax, ebx )
729
730 DesRound() // 1
731 DesRound() // 2
732 DesRound() // 3
733 DesRound() // 4
734 DesRound() // 5
735 DesRound() // 6
736 DesRound() // 7
737 DesRound() // 8
738
739 // swap left and right
740 AS2( xchg eax, ebx )
741
742 DesRound() // 1
743 DesRound() // 2
744 DesRound() // 3
745 DesRound() // 4
746 DesRound() // 5
747 DesRound() // 6
748 DesRound() // 7
749 DesRound() // 8
750
751 AsmFPERM()
752
753 //end
754 AS2( movd ebp, mm6 )
755
756 // swap and write out
757 AS1( bswap ebx )
758 AS1( bswap eax )
759
760 #ifdef __GNUC__
761 AS2( movd esi, mm7 ) // outBlock
762 #else
763 AS2( mov esi, DWORD PTR [ebp + 12] ) // outBlock
764 #endif
765
766 AS2( mov DWORD PTR [esi], ebx ) // right first
767 AS2( mov DWORD PTR [esi + 4], eax )
768
769
770 EPILOG()
771 }
772
773
774
775 #endif // defined(DO_DES_ASM)
776
777
778 } // namespace
779