1 /*
2 * sshdes.c: implementation of DES.
3 */
4
5 /*
6 * Background
7 * ----------
8 *
9 * The basic structure of DES is a Feistel network: the 64-bit cipher
10 * block is divided into two 32-bit halves L and R, and in each round,
11 * a mixing function is applied to one of them, the result is XORed
12 * into the other, and then the halves are swapped so that the other
13 * one will be the input to the mixing function next time. (This
14 * structure guarantees reversibility no matter whether the mixing
15 * function itself is bijective.)
16 *
17 * The mixing function for DES goes like this:
18 * + Extract eight contiguous 6-bit strings from the 32-bit word.
19 * They start at positions 4 bits apart, so each string overlaps
20 * the next one by one bit. At least one has to wrap cyclically
21 * round the end of the word.
22 * + XOR each of those strings with 6 bits of data from the key
23 * schedule (which consists of 8 x 6-bit strings per round).
24 * + Use the resulting 6-bit numbers as the indices into eight
25 * different lookup tables ('S-boxes'), each of which delivers a
26 * 4-bit output.
27 * + Concatenate those eight 4-bit values into a 32-bit word.
28 * + Finally, apply a fixed permutation P to that word.
29 *
30 * DES adds one more wrinkle on top of this structure, which is to
31 * conjugate it by a bitwise permutation of the cipher block. That is,
32 * before starting the main cipher rounds, the input bits are permuted
33 * according to a 64-bit permutation called IP, and after the rounds
34 * are finished, the output bits are permuted back again by applying
35 * the inverse of IP.
36 *
37 * This gives a lot of leeway to redefine the components of the cipher
38 * without actually changing the input and output. You could permute
39 * the bits in the output of any or all of the S-boxes, or reorder the
40 * S-boxes among themselves, and adjust the following permutation P to
41 * compensate. And you could adjust IP by post-composing a rotation of
42 * each 32-bit half, and adjust the starting offsets of the 6-bit
43 * S-box indices to compensate.
44 *
45 * test/desref.py demonstrates this by providing two equivalent forms
46 * of the cipher, called DES and SGTDES, which give the same output.
47 * DES is the form described in the original spec: if you make it
48 * print diagnostic output during the cipher and check it against the
49 * original, you should recognise the S-box outputs as matching the
50 * ones you expect. But SGTDES, which I egotistically name after
51 * myself, is much closer to the form implemented here: I've changed
52 * the permutation P to suit my implementation strategy and
53 * compensated by permuting the S-boxes, and also I've added a
54 * rotation right by 1 bit to IP so that only one S-box index has to
55 * wrap round the word and also so that the indices are nicely aligned
56 * for the constant-time selection system I'm using.
57 */
58
59 #include <stdio.h>
60
61 #include "ssh.h"
62 #include "mpint_i.h" /* we reuse the BignumInt system */
63
64 /* If you compile with -DDES_DIAGNOSTICS, intermediate results will be
65 * sent to debug() (so you also need to compile with -DDEBUG).
66 * Otherwise this ifdef will condition away all the debug() calls. */
67 #ifndef DES_DIAGNOSTICS
68 #undef debug
69 #define debug(...) ((void)0)
70 #endif
71
72 /*
73 * General utility functions.
74 */
rol(uint32_t x,unsigned c)75 static inline uint32_t rol(uint32_t x, unsigned c)
76 {
77 return (x << (31 & c)) | (x >> (31 & -c));
78 }
ror(uint32_t x,unsigned c)79 static inline uint32_t ror(uint32_t x, unsigned c)
80 {
81 return rol(x, -c);
82 }
83
84 /*
85 * The hard part of doing DES in constant time is the S-box lookup.
86 *
87 * My strategy is to iterate over the whole lookup table! That's slow,
88 * but I don't see any way to avoid _something_ along those lines: in
89 * every round, every entry in every S-box is potentially needed, and
90 * if you can't change your memory access pattern based on the input
91 * data, it follows that you have to read a quantity of information
92 * equal to the size of all the S-boxes. (Unless they were to turn out
93 * to be significantly compressible, but I for one couldn't show them
94 * to be.)
95 *
96 * In more detail, I construct a sort of counter-based 'selection
97 * gadget', which is 15 bits wide and starts off with the top bit
98 * zero, the next eight bits all 1, and the bottom six set to the
99 * input S-box index:
100 *
101 * 011111111xxxxxx
102 *
103 * Now if you add 1 in the lowest bit position, then either it carries
104 * into the top section (resetting it to 100000000), or it doesn't do
105 * that yet. If you do that 64 times, then it will _guarantee_ to have
106 * ticked over into 100000000. In between those increments, the eight
107 * bits that started off as 11111111 will have stayed that way for
108 * some number of iterations and then become 00000000, and exactly how
109 * many iterations depends on the input index.
110 *
111 * The purpose of the 0 bit at the top is to absorb the carry when the
112 * switch happens, which means you can pack more than one gadget into
113 * the same machine word and have them all work in parallel without
114 * each one intefering with the next.
115 *
116 * The next step is to use each of those 8-bit segments as a bit mask:
117 * each one is ANDed with a lookup table entry, and all the results
118 * are XORed together. So you end up with the bitwise XOR of some
119 * initial segment of the table entries. And the stored S-box tables
120 * are transformed in such a way that the real S-box values are given
121 * not by the individual entries, but by the cumulative XORs
122 * constructed in this way.
123 *
124 * A refinement is that I increment each gadget by 2 rather than 1
125 * each time, so I only iterate 32 times instead of 64. That's why
126 * there are 8 selection bits instead of 4: each gadget selects enough
127 * bits to reconstruct _two_ S-box entries, for a pair of indices
128 * (2n,2n+1), and then finally I use the low bit of the index to do a
129 * parallel selection between each of those pairs.
130 *
131 * The selection gadget is not quite 16 bits wide. So you can fit four
132 * of them across a 64-bit word at 16-bit intervals, which is also
133 * convenient because the place the S-box indices are coming from also
134 * has pairs of them separated by 16-bit distances, so it's easy to
135 * copy them into the gadgets in the first place.
136 */
137
138 /*
139 * The S-box data. Each pair of nonzero columns here describes one of
140 * the S-boxes, corresponding to the SGTDES tables in test/desref.py,
141 * under the following transformation.
142 *
143 * Take S-box #3 as an example. Its values in successive rows of this
144 * table are eb,e8,54,3d, ... So the cumulative XORs of initial
145 * sequences of those values are eb,(eb^e8),(eb^e8^54), ... which
146 * comes to eb,03,57,... Of _those_ values, the top nibble (e,0,5,...)
147 * gives the even-numbered entries in the S-box, in _reverse_ order
148 * (because a lower input index selects the XOR of a longer
149 * subsequence). The odd-numbered entries are given by XORing the two
150 * digits together: (e^b),(0^3),(5^7),... = 5,3,2,... And indeed, if
151 * you check SGTDES.sboxes[3] you find it ends ... 52 03 e5.
152 */
153 #define SBOX_ITERATION(X) \
154 /* 66 22 44 00 77 33 55 11 */ \
155 X(0xf600970083008500, 0x0e00eb007b002e00) \
156 X(0xda00e4009000e000, 0xad00e800a700b400) \
157 X(0x1a009d003f003600, 0xf60054004300cd00) \
158 X(0xaf00c500e900a900, 0x63003d00f2005900) \
159 X(0xf300750079001400, 0x80005000a2008900) \
160 X(0xa100d400d6007b00, 0xd3009000d300e100) \
161 X(0x450087002600ac00, 0xae003c0031009c00) \
162 X(0xd000b100b6003600, 0x3e006f0092005900) \
163 X(0x4d008a0026001000, 0x89007a00b8004a00) \
164 X(0xca00f5003f00ac00, 0x6f00f0003c009400) \
165 X(0x92008d0090001000, 0x8c00c600ce004a00) \
166 X(0xe2005900e9006d00, 0x790078007800fa00) \
167 X(0x1300b10090008d00, 0xa300170027001800) \
168 X(0xc70058005f006a00, 0x9c00c100e0006300) \
169 X(0x9b002000f000f000, 0xf70057001600f900) \
170 X(0xeb00b0009000af00, 0xa9006300b0005800) \
171 X(0xa2001d00cf000000, 0x3800b00066000000) \
172 X(0xf100da007900d000, 0xbc00790094007900) \
173 X(0x570015001900ad00, 0x6f00ef005100cb00) \
174 X(0xc3006100e9006d00, 0xc000b700f800f200) \
175 X(0x1d005800b600d000, 0x67004d00cd002c00) \
176 X(0xf400b800d600e000, 0x5e00a900b000e700) \
177 X(0x5400d1003f009c00, 0xc90069002c005300) \
178 X(0xe200e50060005900, 0x6a00b800c500f200) \
179 X(0xdf0047007900d500, 0x7000ec004c00ea00) \
180 X(0x7100d10060009c00, 0x3f00b10095005e00) \
181 X(0x82008200f0002000, 0x87001d00cd008000) \
182 X(0xd0007000af00c000, 0xe200be006100f200) \
183 X(0x8000930060001000, 0x36006e0081001200) \
184 X(0x6500a300d600ac00, 0xcf003d007d00c000) \
185 X(0x9000700060009800, 0x62008100ad009200) \
186 X(0xe000e4003f00f400, 0x5a00ed009000f200) \
187 /* end of list */
188
189 /*
190 * The S-box mapping function. Expects two 32-bit input words: si6420
191 * contains the table indices for S-boxes 0,2,4,6 with their low bits
192 * starting at position 2 (for S-box 0) and going up in steps of 8.
193 * si7531 has indices 1,3,5,7 in the same bit positions.
194 */
des_S(uint32_t si6420,uint32_t si7531)195 static inline uint32_t des_S(uint32_t si6420, uint32_t si7531)
196 {
197 debug("sindices: %02x %02x %02x %02x %02x %02x %02x %02x\n",
198 0x3F & (si6420 >> 2), 0x3F & (si7531 >> 2),
199 0x3F & (si6420 >> 10), 0x3F & (si7531 >> 10),
200 0x3F & (si6420 >> 18), 0x3F & (si7531 >> 18),
201 0x3F & (si6420 >> 26), 0x3F & (si7531 >> 26));
202
203 #ifdef SIXTY_FOUR_BIT
204 /*
205 * On 64-bit machines, we store the table in exactly the form
206 * shown above, and make two 64-bit words containing four
207 * selection gadgets each.
208 */
209
210 /* Set up the gadgets. The 'cNNNN' variables will be gradually
211 * incremented, and the bits in positions FF00FF00FF00FF00 will
212 * act as selectors for the words in the table.
213 *
214 * A side effect of moving the input indices further apart is that
215 * they change order, because it's easier to keep a pair that were
216 * originally 16 bits apart still 16 bits apart, which now makes
217 * them adjacent instead of separated by one. So the fact that
218 * si6420 turns into c6240 (with the 2,4 reversed) is not a typo!
219 * This will all be undone when we rebuild the output word later.
220 */
221 uint64_t c6240 = ((si6420 | ((uint64_t)si6420 << 24))
222 & 0x00FC00FC00FC00FC) | 0xFF00FF00FF00FF00;
223 uint64_t c7351 = ((si7531 | ((uint64_t)si7531 << 24))
224 & 0x00FC00FC00FC00FC) | 0xFF00FF00FF00FF00;
225 debug("S in: c6240=%016"PRIx64" c7351=%016"PRIx64"\n", c6240, c7351);
226
227 /* Iterate over the table. The 'sNNNN' variables accumulate the
228 * XOR of all the table entries not masked out. */
229 static const struct tbl { uint64_t t6240, t7351; } tbl[32] = {
230 #define TABLE64(a, b) { a, b },
231 SBOX_ITERATION(TABLE64)
232 #undef TABLE64
233 };
234 uint64_t s6240 = 0, s7351 = 0;
235 for (const struct tbl *t = tbl, *limit = tbl + 32; t < limit; t++) {
236 s6240 ^= c6240 & t->t6240; c6240 += 0x0008000800080008;
237 s7351 ^= c7351 & t->t7351; c7351 += 0x0008000800080008;
238 }
239 debug("S out: s6240=%016"PRIx64" s7351=%016"PRIx64"\n", s6240, s7351);
240
241 /* Final selection between each even/odd pair: mask off the low
242 * bits of all the input indices (which haven't changed throughout
243 * the iteration), and multiply by a bit mask that will turn each
244 * set bit into a mask covering the upper nibble of the selected
245 * pair. Then use those masks to control which set of lower
246 * nibbles is XORed into the upper nibbles. */
247 s6240 ^= (s6240 << 4) & ((0xf000/0x004) * (c6240 & 0x0004000400040004));
248 s7351 ^= (s7351 << 4) & ((0xf000/0x004) * (c7351 & 0x0004000400040004));
249
250 /* Now the eight final S-box outputs are in the upper nibble of
251 * each selection position. Mask away the rest of the clutter. */
252 s6240 &= 0xf000f000f000f000;
253 s7351 &= 0xf000f000f000f000;
254 debug("s0=%x s1=%x s2=%x s3=%x s4=%x s5=%x s6=%x s7=%x\n",
255 (unsigned)(0xF & (s6240 >> 12)),
256 (unsigned)(0xF & (s7351 >> 12)),
257 (unsigned)(0xF & (s6240 >> 44)),
258 (unsigned)(0xF & (s7351 >> 44)),
259 (unsigned)(0xF & (s6240 >> 28)),
260 (unsigned)(0xF & (s7351 >> 28)),
261 (unsigned)(0xF & (s6240 >> 60)),
262 (unsigned)(0xF & (s7351 >> 60)));
263
264 /* Combine them all into a single 32-bit output word, which will
265 * come out in the order 76543210. */
266 uint64_t combined = (s6240 >> 12) | (s7351 >> 8);
267 return combined | (combined >> 24);
268
269 #else /* SIXTY_FOUR_BIT */
270 /*
271 * For 32-bit platforms, we do the same thing but in four 32-bit
272 * words instead of two 64-bit ones, so the CPU doesn't have to
273 * waste time propagating carries or shifted bits between the two
274 * halves of a uint64 that weren't needed anyway.
275 */
276
277 /* Set up the gadgets */
278 uint32_t c40 = ((si6420 ) & 0x00FC00FC) | 0xFF00FF00;
279 uint32_t c62 = ((si6420 >> 8) & 0x00FC00FC) | 0xFF00FF00;
280 uint32_t c51 = ((si7531 ) & 0x00FC00FC) | 0xFF00FF00;
281 uint32_t c73 = ((si7531 >> 8) & 0x00FC00FC) | 0xFF00FF00;
282 debug("S in: c40=%08"PRIx32" c62=%08"PRIx32
283 " c51=%08"PRIx32" c73=%08"PRIx32"\n", c40, c62, c51, c73);
284
285 /* Iterate over the table */
286 static const struct tbl { uint32_t t40, t62, t51, t73; } tbl[32] = {
287 #define TABLE32(a, b) { ((uint32_t)a), (a>>32), ((uint32_t)b), (b>>32) },
288 SBOX_ITERATION(TABLE32)
289 #undef TABLE32
290 };
291 uint32_t s40 = 0, s62 = 0, s51 = 0, s73 = 0;
292 for (const struct tbl *t = tbl, *limit = tbl + 32; t < limit; t++) {
293 s40 ^= c40 & t->t40; c40 += 0x00080008;
294 s62 ^= c62 & t->t62; c62 += 0x00080008;
295 s51 ^= c51 & t->t51; c51 += 0x00080008;
296 s73 ^= c73 & t->t73; c73 += 0x00080008;
297 }
298 debug("S out: s40=%08"PRIx32" s62=%08"PRIx32
299 " s51=%08"PRIx32" s73=%08"PRIx32"\n", s40, s62, s51, s73);
300
301 /* Final selection within each pair */
302 s40 ^= (s40 << 4) & ((0xf000/0x004) * (c40 & 0x00040004));
303 s62 ^= (s62 << 4) & ((0xf000/0x004) * (c62 & 0x00040004));
304 s51 ^= (s51 << 4) & ((0xf000/0x004) * (c51 & 0x00040004));
305 s73 ^= (s73 << 4) & ((0xf000/0x004) * (c73 & 0x00040004));
306
307 /* Clean up the clutter */
308 s40 &= 0xf000f000;
309 s62 &= 0xf000f000;
310 s51 &= 0xf000f000;
311 s73 &= 0xf000f000;
312 debug("s0=%x s1=%x s2=%x s3=%x s4=%x s5=%x s6=%x s7=%x\n",
313 (unsigned)(0xF & (s40 >> 12)),
314 (unsigned)(0xF & (s51 >> 12)),
315 (unsigned)(0xF & (s62 >> 12)),
316 (unsigned)(0xF & (s73 >> 12)),
317 (unsigned)(0xF & (s40 >> 28)),
318 (unsigned)(0xF & (s51 >> 28)),
319 (unsigned)(0xF & (s62 >> 28)),
320 (unsigned)(0xF & (s73 >> 28)));
321
322 /* Recombine and return */
323 return (s40 >> 12) | (s62 >> 4) | (s51 >> 8) | (s73);
324
325 #endif /* SIXTY_FOUR_BIT */
326
327 }
328
329 /*
330 * Now for the permutation P. The basic strategy here is to use a
331 * Benes network: in each stage, the bit at position i is allowed to
332 * either stay where it is or swap with i ^ D, where D is a power of 2
333 * that varies with each phase. (So when D=1, pairs of the form
334 * {2n,2n+1} can swap; when D=2, the pairs are {4n+j,4n+j+2} for
335 * j={0,1}, and so on.)
336 *
337 * You can recursively construct a Benes network for an arbitrary
338 * permutation, in which the values of D iterate across all the powers
339 * of 2 less than the permutation size and then go back again. For
340 * example, the typical presentation for 32 bits would have D iterate
341 * over 16,8,4,2,1,2,4,8,16, and there's an easy algorithm that can
342 * express any permutation in that form by deciding which pairs of
343 * bits to swap in the outer pair of stages and then recursing to do
344 * all the stages in between.
345 *
346 * Actually implementing the swaps is easy when they're all between
347 * bits at the same separation: make the value x ^ (x >> D), mask out
348 * just the bits in the low position of a pair that needs to swap, and
349 * then use the resulting value y to make x ^ y ^ (y << D) which is
350 * the swapped version.
351 *
352 * In this particular case, I processed the bit indices in the other
353 * order (going 1,2,4,8,16,8,4,2,1), which makes no significant
354 * difference to the construction algorithm (it's just a relabelling),
355 * but it now means that the first two steps only permute entries
356 * within the output of each S-box - and therefore we can leave them
357 * completely out, in favour of just defining the S-boxes so that
358 * those permutation steps are already applied. Furthermore, by
359 * exhaustive search over the rest of the possible bit-orders for each
360 * S-box, I was able to find a version of P which could be represented
361 * in such a way that two further phases had all their control bits
362 * zero and could be skipped. So the number of swap stages is reduced
363 * to 5 from the 9 that might have been needed.
364 */
365
des_benes_step(uint32_t v,unsigned D,uint32_t mask)366 static inline uint32_t des_benes_step(uint32_t v, unsigned D, uint32_t mask)
367 {
368 uint32_t diff = (v ^ (v >> D)) & mask;
369 return v ^ diff ^ (diff << D);
370 }
371
des_P(uint32_t v_orig)372 static inline uint32_t des_P(uint32_t v_orig)
373 {
374 uint32_t v = v_orig;
375
376 /* initial stages with distance 1,2 are part of the S-box data table */
377 v = des_benes_step(v, 4, 0x07030702);
378 v = des_benes_step(v, 8, 0x004E009E);
379 v = des_benes_step(v, 16, 0x0000D9D3);
380 /* v = des_benes_step(v, 8, 0x00000000); no-op, so we can skip it */
381 v = des_benes_step(v, 4, 0x05040004);
382 /* v = des_benes_step(v, 2, 0x00000000); no-op, so we can skip it */
383 v = des_benes_step(v, 1, 0x04045015);
384
385 debug("P(%08"PRIx32") = %08"PRIx32"\n", v_orig, v);
386
387 return v;
388 }
389
390 /*
391 * Putting the S and P functions together, and adding in the round key
392 * as well, gives us the full mixing function f.
393 */
394
des_f(uint32_t R,uint32_t K7531,uint32_t K6420)395 static inline uint32_t des_f(uint32_t R, uint32_t K7531, uint32_t K6420)
396 {
397 uint32_t s7531 = R ^ K7531, s6420 = rol(R, 4) ^ K6420;
398 return des_P(des_S(s6420, s7531));
399 }
400
401 /*
402 * The key schedule, and the function to set it up.
403 */
404
405 typedef struct des_keysched des_keysched;
406 struct des_keysched {
407 uint32_t k7531[16], k6420[16];
408 };
409
410 /*
411 * Simplistic function to select an arbitrary sequence of bits from
412 * one value and glue them together into another value. bitnums[]
413 * gives the sequence of bit indices of the input, from the highest
414 * output bit downwards. An index of -1 means that output bit is left
415 * at zero.
416 *
417 * This function is only used during key setup, so it doesn't need to
418 * be highly optimised.
419 */
bitsel(uint64_t input,const int8_t * bitnums,size_t size)420 static inline uint64_t bitsel(
421 uint64_t input, const int8_t *bitnums, size_t size)
422 {
423 uint64_t ret = 0;
424 while (size-- > 0) {
425 int bitpos = *bitnums++;
426 ret <<= 1;
427 if (bitpos >= 0)
428 ret |= 1 & (input >> bitpos);
429 }
430 return ret;
431 }
432
des_key_setup(uint64_t key,des_keysched * sched)433 static void des_key_setup(uint64_t key, des_keysched *sched)
434 {
435 static const int8_t PC1[] = {
436 7, 15, 23, 31, 39, 47, 55, 63, 6, 14, 22, 30, 38, 46,
437 54, 62, 5, 13, 21, 29, 37, 45, 53, 61, 4, 12, 20, 28,
438 -1, -1, -1, -1,
439 1, 9, 17, 25, 33, 41, 49, 57, 2, 10, 18, 26, 34, 42,
440 50, 58, 3, 11, 19, 27, 35, 43, 51, 59, 36, 44, 52, 60,
441 };
442 static const int8_t PC2_7531[] = {
443 46, 43, 49, 36, 59, 55, -1, -1, /* index into S-box 7 */
444 37, 41, 48, 56, 34, 52, -1, -1, /* index into S-box 5 */
445 15, 4, 25, 19, 9, 1, -1, -1, /* index into S-box 3 */
446 12, 7, 17, 0, 22, 3, -1, -1, /* index into S-box 1 */
447 };
448 static const int8_t PC2_6420[] = {
449 57, 32, 45, 54, 39, 50, -1, -1, /* index into S-box 6 */
450 44, 53, 33, 40, 47, 58, -1, -1, /* index into S-box 4 */
451 26, 16, 5, 11, 23, 8, -1, -1, /* index into S-box 2 */
452 10, 14, 6, 20, 27, 24, -1, -1, /* index into S-box 0 */
453 };
454 static const int leftshifts[] = {1,1,2,2,2,2,2,2,1,2,2,2,2,2,2,1};
455
456 /* Select 56 bits from the 64-bit input key integer (the low bit
457 * of each input byte is unused), into a word consisting of two
458 * 28-bit integers starting at bits 0 and 32. */
459 uint64_t CD = bitsel(key, PC1, lenof(PC1));
460
461 for (size_t i = 0; i < 16; i++) {
462 /* Rotate each 28-bit half of CD left by 1 or 2 bits (varying
463 * between rounds) */
464 CD <<= leftshifts[i];
465 CD = (CD & 0x0FFFFFFF0FFFFFFF) | ((CD & 0xF0000000F0000000) >> 28);
466
467 /* Select key bits from the rotated word to use during the
468 * actual cipher */
469 sched->k7531[i] = bitsel(CD, PC2_7531, lenof(PC2_7531));
470 sched->k6420[i] = bitsel(CD, PC2_6420, lenof(PC2_6420));
471 }
472 }
473
474 /*
475 * Helper routines for dealing with 64-bit blocks in the form of an L
476 * and R word.
477 */
478
479 typedef struct LR LR;
480 struct LR { uint32_t L, R; };
481
des_load_lr(const void * vp)482 static inline LR des_load_lr(const void *vp)
483 {
484 const uint8_t *p = (const uint8_t *)vp;
485 LR out;
486 out.L = GET_32BIT_MSB_FIRST(p);
487 out.R = GET_32BIT_MSB_FIRST(p+4);
488 return out;
489 }
490
des_store_lr(void * vp,LR lr)491 static inline void des_store_lr(void *vp, LR lr)
492 {
493 uint8_t *p = (uint8_t *)vp;
494 PUT_32BIT_MSB_FIRST(p, lr.L);
495 PUT_32BIT_MSB_FIRST(p+4, lr.R);
496 }
497
des_xor_lr(LR a,LR b)498 static inline LR des_xor_lr(LR a, LR b)
499 {
500 a.L ^= b.L;
501 a.R ^= b.R;
502 return a;
503 }
504
des_swap_lr(LR in)505 static inline LR des_swap_lr(LR in)
506 {
507 LR out;
508 out.L = in.R;
509 out.R = in.L;
510 return out;
511 }
512
513 /*
514 * The initial and final permutations of official DES are in a
515 * restricted form, in which the 'before' and 'after' positions of a
516 * given data bit are derived from each other by permuting the bits of
517 * the _index_ and flipping some of them. This allows the permutation
518 * to be performed effectively by a method that looks rather like
519 * _half_ of a general Benes network, because the restricted form
520 * means only half of it is actually needed.
521 *
522 * _Our_ initial and final permutations include a rotation by 1 bit,
523 * but it's still easier to just suffix that to the standard IP/FP
524 * than to regenerate everything using a more general method.
525 *
526 * Because we're permuting 64 bits in this case, between two 32-bit
527 * words, there's a separate helper function for this code that
528 * doesn't look quite like des_benes_step() above.
529 */
530
des_bitswap_IP_FP(uint32_t * L,uint32_t * R,unsigned D,uint32_t mask)531 static inline void des_bitswap_IP_FP(uint32_t *L, uint32_t *R,
532 unsigned D, uint32_t mask)
533 {
534 uint32_t diff = mask & ((*R >> D) ^ *L);
535 *R ^= diff << D;
536 *L ^= diff;
537 }
538
des_IP(LR lr)539 static inline LR des_IP(LR lr)
540 {
541 des_bitswap_IP_FP(&lr.R, &lr.L, 4, 0x0F0F0F0F);
542 des_bitswap_IP_FP(&lr.R, &lr.L, 16, 0x0000FFFF);
543 des_bitswap_IP_FP(&lr.L, &lr.R, 2, 0x33333333);
544 des_bitswap_IP_FP(&lr.L, &lr.R, 8, 0x00FF00FF);
545 des_bitswap_IP_FP(&lr.R, &lr.L, 1, 0x55555555);
546
547 lr.L = ror(lr.L, 1);
548 lr.R = ror(lr.R, 1);
549
550 return lr;
551 }
552
des_FP(LR lr)553 static inline LR des_FP(LR lr)
554 {
555 lr.L = rol(lr.L, 1);
556 lr.R = rol(lr.R, 1);
557
558 des_bitswap_IP_FP(&lr.R, &lr.L, 1, 0x55555555);
559 des_bitswap_IP_FP(&lr.L, &lr.R, 8, 0x00FF00FF);
560 des_bitswap_IP_FP(&lr.L, &lr.R, 2, 0x33333333);
561 des_bitswap_IP_FP(&lr.R, &lr.L, 16, 0x0000FFFF);
562 des_bitswap_IP_FP(&lr.R, &lr.L, 4, 0x0F0F0F0F);
563
564 return lr;
565 }
566
567 /*
568 * The main cipher functions, which are identical except that they use
569 * the key schedule in opposite orders.
570 *
571 * We provide a version without the initial and final permutations,
572 * for use in triple-DES mode (no sense undoing and redoing it in
573 * between the phases).
574 */
575
des_round(LR in,const des_keysched * sched,size_t round)576 static inline LR des_round(LR in, const des_keysched *sched, size_t round)
577 {
578 LR out;
579 out.L = in.R;
580 out.R = in.L ^ des_f(in.R, sched->k7531[round], sched->k6420[round]);
581 return out;
582 }
583
des_inner_cipher(LR lr,const des_keysched * sched,size_t start,size_t step)584 static inline LR des_inner_cipher(LR lr, const des_keysched *sched,
585 size_t start, size_t step)
586 {
587 lr = des_round(lr, sched, start+0x0*step);
588 lr = des_round(lr, sched, start+0x1*step);
589 lr = des_round(lr, sched, start+0x2*step);
590 lr = des_round(lr, sched, start+0x3*step);
591 lr = des_round(lr, sched, start+0x4*step);
592 lr = des_round(lr, sched, start+0x5*step);
593 lr = des_round(lr, sched, start+0x6*step);
594 lr = des_round(lr, sched, start+0x7*step);
595 lr = des_round(lr, sched, start+0x8*step);
596 lr = des_round(lr, sched, start+0x9*step);
597 lr = des_round(lr, sched, start+0xa*step);
598 lr = des_round(lr, sched, start+0xb*step);
599 lr = des_round(lr, sched, start+0xc*step);
600 lr = des_round(lr, sched, start+0xd*step);
601 lr = des_round(lr, sched, start+0xe*step);
602 lr = des_round(lr, sched, start+0xf*step);
603 return des_swap_lr(lr);
604 }
605
des_full_cipher(LR lr,const des_keysched * sched,size_t start,size_t step)606 static inline LR des_full_cipher(LR lr, const des_keysched *sched,
607 size_t start, size_t step)
608 {
609 lr = des_IP(lr);
610 lr = des_inner_cipher(lr, sched, start, step);
611 lr = des_FP(lr);
612 return lr;
613 }
614
615 /*
616 * Parameter pairs for the start,step arguments to the cipher routines
617 * above, causing them to use the same key schedule in opposite orders.
618 */
619 #define ENCIPHER 0, 1 /* for encryption */
620 #define DECIPHER 15, -1 /* for decryption */
621
622 /* ----------------------------------------------------------------------
623 * Single-DES
624 */
625
626 struct des_cbc_ctx {
627 des_keysched sched;
628 LR iv;
629 ssh_cipher ciph;
630 };
631
des_cbc_new(const ssh_cipheralg * alg)632 static ssh_cipher *des_cbc_new(const ssh_cipheralg *alg)
633 {
634 struct des_cbc_ctx *ctx = snew(struct des_cbc_ctx);
635 ctx->ciph.vt = alg;
636 return &ctx->ciph;
637 }
638
des_cbc_free(ssh_cipher * ciph)639 static void des_cbc_free(ssh_cipher *ciph)
640 {
641 struct des_cbc_ctx *ctx = container_of(ciph, struct des_cbc_ctx, ciph);
642 smemclr(ctx, sizeof(*ctx));
643 sfree(ctx);
644 }
645
des_cbc_setkey(ssh_cipher * ciph,const void * vkey)646 static void des_cbc_setkey(ssh_cipher *ciph, const void *vkey)
647 {
648 struct des_cbc_ctx *ctx = container_of(ciph, struct des_cbc_ctx, ciph);
649 const uint8_t *key = (const uint8_t *)vkey;
650 des_key_setup(GET_64BIT_MSB_FIRST(key), &ctx->sched);
651 }
652
des_cbc_setiv(ssh_cipher * ciph,const void * iv)653 static void des_cbc_setiv(ssh_cipher *ciph, const void *iv)
654 {
655 struct des_cbc_ctx *ctx = container_of(ciph, struct des_cbc_ctx, ciph);
656 ctx->iv = des_load_lr(iv);
657 }
658
des_cbc_encrypt(ssh_cipher * ciph,void * vdata,int len)659 static void des_cbc_encrypt(ssh_cipher *ciph, void *vdata, int len)
660 {
661 struct des_cbc_ctx *ctx = container_of(ciph, struct des_cbc_ctx, ciph);
662 uint8_t *data = (uint8_t *)vdata;
663 for (; len > 0; len -= 8, data += 8) {
664 LR plaintext = des_load_lr(data);
665 LR cipher_in = des_xor_lr(plaintext, ctx->iv);
666 LR ciphertext = des_full_cipher(cipher_in, &ctx->sched, ENCIPHER);
667 des_store_lr(data, ciphertext);
668 ctx->iv = ciphertext;
669 }
670 }
671
des_cbc_decrypt(ssh_cipher * ciph,void * vdata,int len)672 static void des_cbc_decrypt(ssh_cipher *ciph, void *vdata, int len)
673 {
674 struct des_cbc_ctx *ctx = container_of(ciph, struct des_cbc_ctx, ciph);
675 uint8_t *data = (uint8_t *)vdata;
676 for (; len > 0; len -= 8, data += 8) {
677 LR ciphertext = des_load_lr(data);
678 LR cipher_out = des_full_cipher(ciphertext, &ctx->sched, DECIPHER);
679 LR plaintext = des_xor_lr(cipher_out, ctx->iv);
680 des_store_lr(data, plaintext);
681 ctx->iv = ciphertext;
682 }
683 }
684
685 const ssh_cipheralg ssh_des = {
686 .new = des_cbc_new,
687 .free = des_cbc_free,
688 .setiv = des_cbc_setiv,
689 .setkey = des_cbc_setkey,
690 .encrypt = des_cbc_encrypt,
691 .decrypt = des_cbc_decrypt,
692 .ssh2_id = "des-cbc",
693 .blksize = 8,
694 .real_keybits = 56,
695 .padded_keybytes = 8,
696 .flags = SSH_CIPHER_IS_CBC,
697 .text_name = "single-DES CBC",
698 };
699
700 const ssh_cipheralg ssh_des_sshcom_ssh2 = {
701 /* Same as ssh_des_cbc, but with a different SSH-2 ID */
702 .new = des_cbc_new,
703 .free = des_cbc_free,
704 .setiv = des_cbc_setiv,
705 .setkey = des_cbc_setkey,
706 .encrypt = des_cbc_encrypt,
707 .decrypt = des_cbc_decrypt,
708 .ssh2_id = "des-cbc@ssh.com",
709 .blksize = 8,
710 .real_keybits = 56,
711 .padded_keybytes = 8,
712 .flags = SSH_CIPHER_IS_CBC,
713 .text_name = "single-DES CBC",
714 };
715
716 static const ssh_cipheralg *const des_list[] = {
717 &ssh_des,
718 &ssh_des_sshcom_ssh2
719 };
720
721 const ssh2_ciphers ssh2_des = { lenof(des_list), des_list };
722
723 /* ----------------------------------------------------------------------
724 * Triple-DES CBC, SSH-2 style. The CBC mode treats the three
725 * invocations of DES as a single unified cipher, and surrounds it
726 * with just one layer of CBC, so only one IV is needed.
727 */
728
729 struct des3_cbc1_ctx {
730 des_keysched sched[3];
731 LR iv;
732 ssh_cipher ciph;
733 };
734
des3_cbc1_new(const ssh_cipheralg * alg)735 static ssh_cipher *des3_cbc1_new(const ssh_cipheralg *alg)
736 {
737 struct des3_cbc1_ctx *ctx = snew(struct des3_cbc1_ctx);
738 ctx->ciph.vt = alg;
739 return &ctx->ciph;
740 }
741
des3_cbc1_free(ssh_cipher * ciph)742 static void des3_cbc1_free(ssh_cipher *ciph)
743 {
744 struct des3_cbc1_ctx *ctx = container_of(ciph, struct des3_cbc1_ctx, ciph);
745 smemclr(ctx, sizeof(*ctx));
746 sfree(ctx);
747 }
748
des3_cbc1_setkey(ssh_cipher * ciph,const void * vkey)749 static void des3_cbc1_setkey(ssh_cipher *ciph, const void *vkey)
750 {
751 struct des3_cbc1_ctx *ctx = container_of(ciph, struct des3_cbc1_ctx, ciph);
752 const uint8_t *key = (const uint8_t *)vkey;
753 for (size_t i = 0; i < 3; i++)
754 des_key_setup(GET_64BIT_MSB_FIRST(key + 8*i), &ctx->sched[i]);
755 }
756
des3_cbc1_setiv(ssh_cipher * ciph,const void * iv)757 static void des3_cbc1_setiv(ssh_cipher *ciph, const void *iv)
758 {
759 struct des3_cbc1_ctx *ctx = container_of(ciph, struct des3_cbc1_ctx, ciph);
760 ctx->iv = des_load_lr(iv);
761 }
762
des3_cbc1_cbc_encrypt(ssh_cipher * ciph,void * vdata,int len)763 static void des3_cbc1_cbc_encrypt(ssh_cipher *ciph, void *vdata, int len)
764 {
765 struct des3_cbc1_ctx *ctx = container_of(ciph, struct des3_cbc1_ctx, ciph);
766 uint8_t *data = (uint8_t *)vdata;
767 for (; len > 0; len -= 8, data += 8) {
768 LR plaintext = des_load_lr(data);
769 LR cipher_in = des_xor_lr(plaintext, ctx->iv);
770
771 /* Run three copies of the cipher, without undoing and redoing
772 * IP/FP in between. */
773 LR lr = des_IP(cipher_in);
774 lr = des_inner_cipher(lr, &ctx->sched[0], ENCIPHER);
775 lr = des_inner_cipher(lr, &ctx->sched[1], DECIPHER);
776 lr = des_inner_cipher(lr, &ctx->sched[2], ENCIPHER);
777 LR ciphertext = des_FP(lr);
778
779 des_store_lr(data, ciphertext);
780 ctx->iv = ciphertext;
781 }
782 }
783
des3_cbc1_cbc_decrypt(ssh_cipher * ciph,void * vdata,int len)784 static void des3_cbc1_cbc_decrypt(ssh_cipher *ciph, void *vdata, int len)
785 {
786 struct des3_cbc1_ctx *ctx = container_of(ciph, struct des3_cbc1_ctx, ciph);
787 uint8_t *data = (uint8_t *)vdata;
788 for (; len > 0; len -= 8, data += 8) {
789 LR ciphertext = des_load_lr(data);
790
791 /* Similarly to encryption, but with the order reversed. */
792 LR lr = des_IP(ciphertext);
793 lr = des_inner_cipher(lr, &ctx->sched[2], DECIPHER);
794 lr = des_inner_cipher(lr, &ctx->sched[1], ENCIPHER);
795 lr = des_inner_cipher(lr, &ctx->sched[0], DECIPHER);
796 LR cipher_out = des_FP(lr);
797
798 LR plaintext = des_xor_lr(cipher_out, ctx->iv);
799 des_store_lr(data, plaintext);
800 ctx->iv = ciphertext;
801 }
802 }
803
804 const ssh_cipheralg ssh_3des_ssh2 = {
805 .new = des3_cbc1_new,
806 .free = des3_cbc1_free,
807 .setiv = des3_cbc1_setiv,
808 .setkey = des3_cbc1_setkey,
809 .encrypt = des3_cbc1_cbc_encrypt,
810 .decrypt = des3_cbc1_cbc_decrypt,
811 .ssh2_id = "3des-cbc",
812 .blksize = 8,
813 .real_keybits = 168,
814 .padded_keybytes = 24,
815 .flags = SSH_CIPHER_IS_CBC,
816 .text_name = "triple-DES CBC",
817 };
818
819 /* ----------------------------------------------------------------------
820 * Triple-DES in SDCTR mode. Again, the three DES instances are
821 * treated as one big cipher, with a single counter encrypted through
822 * all three.
823 */
824
825 #define SDCTR_WORDS (8 / BIGNUM_INT_BYTES)
826
827 struct des3_sdctr_ctx {
828 des_keysched sched[3];
829 BignumInt counter[SDCTR_WORDS];
830 ssh_cipher ciph;
831 };
832
des3_sdctr_new(const ssh_cipheralg * alg)833 static ssh_cipher *des3_sdctr_new(const ssh_cipheralg *alg)
834 {
835 struct des3_sdctr_ctx *ctx = snew(struct des3_sdctr_ctx);
836 ctx->ciph.vt = alg;
837 return &ctx->ciph;
838 }
839
des3_sdctr_free(ssh_cipher * ciph)840 static void des3_sdctr_free(ssh_cipher *ciph)
841 {
842 struct des3_sdctr_ctx *ctx = container_of(
843 ciph, struct des3_sdctr_ctx, ciph);
844 smemclr(ctx, sizeof(*ctx));
845 sfree(ctx);
846 }
847
des3_sdctr_setkey(ssh_cipher * ciph,const void * vkey)848 static void des3_sdctr_setkey(ssh_cipher *ciph, const void *vkey)
849 {
850 struct des3_sdctr_ctx *ctx = container_of(
851 ciph, struct des3_sdctr_ctx, ciph);
852 const uint8_t *key = (const uint8_t *)vkey;
853 for (size_t i = 0; i < 3; i++)
854 des_key_setup(GET_64BIT_MSB_FIRST(key + 8*i), &ctx->sched[i]);
855 }
856
des3_sdctr_setiv(ssh_cipher * ciph,const void * viv)857 static void des3_sdctr_setiv(ssh_cipher *ciph, const void *viv)
858 {
859 struct des3_sdctr_ctx *ctx = container_of(
860 ciph, struct des3_sdctr_ctx, ciph);
861 const uint8_t *iv = (const uint8_t *)viv;
862
863 /* Import the initial counter value into the internal representation */
864 for (unsigned i = 0; i < SDCTR_WORDS; i++)
865 ctx->counter[i] = GET_BIGNUMINT_MSB_FIRST(
866 iv + 8 - BIGNUM_INT_BYTES - i*BIGNUM_INT_BYTES);
867 }
868
des3_sdctr_encrypt_decrypt(ssh_cipher * ciph,void * vdata,int len)869 static void des3_sdctr_encrypt_decrypt(ssh_cipher *ciph, void *vdata, int len)
870 {
871 struct des3_sdctr_ctx *ctx = container_of(
872 ciph, struct des3_sdctr_ctx, ciph);
873 uint8_t *data = (uint8_t *)vdata;
874 uint8_t iv_buf[8];
875 for (; len > 0; len -= 8, data += 8) {
876 /* Format the counter value into the buffer. */
877 for (unsigned i = 0; i < SDCTR_WORDS; i++)
878 PUT_BIGNUMINT_MSB_FIRST(
879 iv_buf + 8 - BIGNUM_INT_BYTES - i*BIGNUM_INT_BYTES,
880 ctx->counter[i]);
881
882 /* Increment the counter. */
883 BignumCarry carry = 1;
884 for (unsigned i = 0; i < SDCTR_WORDS; i++)
885 BignumADC(ctx->counter[i], carry, ctx->counter[i], 0, carry);
886
887 /* Triple-encrypt the counter value from the IV. */
888 LR lr = des_IP(des_load_lr(iv_buf));
889 lr = des_inner_cipher(lr, &ctx->sched[0], ENCIPHER);
890 lr = des_inner_cipher(lr, &ctx->sched[1], DECIPHER);
891 lr = des_inner_cipher(lr, &ctx->sched[2], ENCIPHER);
892 LR keystream = des_FP(lr);
893
894 LR input = des_load_lr(data);
895 LR output = des_xor_lr(input, keystream);
896 des_store_lr(data, output);
897 }
898 smemclr(iv_buf, sizeof(iv_buf));
899 }
900
901 const ssh_cipheralg ssh_3des_ssh2_ctr = {
902 .new = des3_sdctr_new,
903 .free = des3_sdctr_free,
904 .setiv = des3_sdctr_setiv,
905 .setkey = des3_sdctr_setkey,
906 .encrypt = des3_sdctr_encrypt_decrypt,
907 .decrypt = des3_sdctr_encrypt_decrypt,
908 .ssh2_id = "3des-ctr",
909 .blksize = 8,
910 .real_keybits = 168,
911 .padded_keybytes = 24,
912 .flags = 0,
913 .text_name = "triple-DES SDCTR",
914 };
915
916 static const ssh_cipheralg *const des3_list[] = {
917 &ssh_3des_ssh2_ctr,
918 &ssh_3des_ssh2
919 };
920
921 const ssh2_ciphers ssh2_3des = { lenof(des3_list), des3_list };
922
923 /* ----------------------------------------------------------------------
924 * Triple-DES, SSH-1 style. SSH-1 replicated the whole CBC structure
925 * three times, so there have to be three separate IVs, one in each
926 * layer.
927 */
928
929 struct des3_cbc3_ctx {
930 des_keysched sched[3];
931 LR iv[3];
932 ssh_cipher ciph;
933 };
934
des3_cbc3_new(const ssh_cipheralg * alg)935 static ssh_cipher *des3_cbc3_new(const ssh_cipheralg *alg)
936 {
937 struct des3_cbc3_ctx *ctx = snew(struct des3_cbc3_ctx);
938 ctx->ciph.vt = alg;
939 return &ctx->ciph;
940 }
941
des3_cbc3_free(ssh_cipher * ciph)942 static void des3_cbc3_free(ssh_cipher *ciph)
943 {
944 struct des3_cbc3_ctx *ctx = container_of(ciph, struct des3_cbc3_ctx, ciph);
945 smemclr(ctx, sizeof(*ctx));
946 sfree(ctx);
947 }
948
des3_cbc3_setkey(ssh_cipher * ciph,const void * vkey)949 static void des3_cbc3_setkey(ssh_cipher *ciph, const void *vkey)
950 {
951 struct des3_cbc3_ctx *ctx = container_of(ciph, struct des3_cbc3_ctx, ciph);
952 const uint8_t *key = (const uint8_t *)vkey;
953 for (size_t i = 0; i < 3; i++)
954 des_key_setup(GET_64BIT_MSB_FIRST(key + 8*i), &ctx->sched[i]);
955 }
956
des3_cbc3_setiv(ssh_cipher * ciph,const void * viv)957 static void des3_cbc3_setiv(ssh_cipher *ciph, const void *viv)
958 {
959 struct des3_cbc3_ctx *ctx = container_of(ciph, struct des3_cbc3_ctx, ciph);
960
961 /*
962 * In principle, we ought to provide an interface for the user to
963 * input 24 instead of 8 bytes of IV. But that would make this an
964 * ugly exception to the otherwise universal rule that IV size =
965 * cipher block size, and there's really no need to violate that
966 * rule given that this is a historical one-off oddity and SSH-1
967 * always initialises all three IVs to zero anyway. So we fudge it
968 * by just setting all the IVs to the same value.
969 */
970
971 LR iv = des_load_lr(viv);
972
973 /* But we store the IVs in permuted form, so that we can handle
974 * all three CBC layers without having to do IP/FP in between. */
975 iv = des_IP(iv);
976 for (size_t i = 0; i < 3; i++)
977 ctx->iv[i] = iv;
978 }
979
des3_cbc3_cbc_encrypt(ssh_cipher * ciph,void * vdata,int len)980 static void des3_cbc3_cbc_encrypt(ssh_cipher *ciph, void *vdata, int len)
981 {
982 struct des3_cbc3_ctx *ctx = container_of(ciph, struct des3_cbc3_ctx, ciph);
983 uint8_t *data = (uint8_t *)vdata;
984 for (; len > 0; len -= 8, data += 8) {
985 /* Load and IP the input. */
986 LR plaintext = des_IP(des_load_lr(data));
987 LR lr = plaintext;
988
989 /* Do three passes of CBC, with the middle one inverted. */
990
991 lr = des_xor_lr(lr, ctx->iv[0]);
992 lr = des_inner_cipher(lr, &ctx->sched[0], ENCIPHER);
993 ctx->iv[0] = lr;
994
995 LR ciphertext = lr;
996 lr = des_inner_cipher(ciphertext, &ctx->sched[1], DECIPHER);
997 lr = des_xor_lr(lr, ctx->iv[1]);
998 ctx->iv[1] = ciphertext;
999
1000 lr = des_xor_lr(lr, ctx->iv[2]);
1001 lr = des_inner_cipher(lr, &ctx->sched[2], ENCIPHER);
1002 ctx->iv[2] = lr;
1003
1004 des_store_lr(data, des_FP(lr));
1005 }
1006 }
1007
des3_cbc3_cbc_decrypt(ssh_cipher * ciph,void * vdata,int len)1008 static void des3_cbc3_cbc_decrypt(ssh_cipher *ciph, void *vdata, int len)
1009 {
1010 struct des3_cbc3_ctx *ctx = container_of(ciph, struct des3_cbc3_ctx, ciph);
1011 uint8_t *data = (uint8_t *)vdata;
1012 for (; len > 0; len -= 8, data += 8) {
1013 /* Load and IP the input */
1014 LR lr = des_IP(des_load_lr(data));
1015 LR ciphertext;
1016
1017 /* Do three passes of CBC, with the middle one inverted. */
1018 ciphertext = lr;
1019 lr = des_inner_cipher(ciphertext, &ctx->sched[2], DECIPHER);
1020 lr = des_xor_lr(lr, ctx->iv[2]);
1021 ctx->iv[2] = ciphertext;
1022
1023 lr = des_xor_lr(lr, ctx->iv[1]);
1024 lr = des_inner_cipher(lr, &ctx->sched[1], ENCIPHER);
1025 ctx->iv[1] = lr;
1026
1027 ciphertext = lr;
1028 lr = des_inner_cipher(ciphertext, &ctx->sched[0], DECIPHER);
1029 lr = des_xor_lr(lr, ctx->iv[0]);
1030 ctx->iv[0] = ciphertext;
1031
1032 des_store_lr(data, des_FP(lr));
1033 }
1034 }
1035
1036 const ssh_cipheralg ssh_3des_ssh1 = {
1037 .new = des3_cbc3_new,
1038 .free = des3_cbc3_free,
1039 .setiv = des3_cbc3_setiv,
1040 .setkey = des3_cbc3_setkey,
1041 .encrypt = des3_cbc3_cbc_encrypt,
1042 .decrypt = des3_cbc3_cbc_decrypt,
1043 .blksize = 8,
1044 .real_keybits = 168,
1045 .padded_keybytes = 24,
1046 .flags = SSH_CIPHER_IS_CBC,
1047 .text_name = "triple-DES inner-CBC",
1048 };
1049