1 /*
2 * cast.c: CAST-128 bit encryption
3 *
4 * Written By Matthew Green.
5 *
6 * Copyright (c) 1998-2000 Matthew R. Green.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * XXX: this code is dependant upon 32 bit integer! XXX
35 */
36
37 /*
38 * note this source file is designed to be #include'd by crypt.c.
39 * this is why the rcsid below is an expansion of the IRCII_RCSID
40 * macro, rather than using it.
41 */
42
43 #ifndef lint
44 static const char cast_rcsid[] __attribute__((unused)) = "@(#)$eterna: cast.c,v 2.30 2001/08/12 15:44:21 mrg Exp $";
45 #endif
46
47 static int cast_encrypt_str _((crypt_key *, u_char **, int *));
48 static int cast_decrypt_str _((crypt_key *, u_char **, int *));
49
50 /* pull in the sboxes */
51 #include "cast_sbox.h"
52
53 /* our structured cast key: 32 subkeys, and do we do 12 or 16 rounds? */
54 typedef struct {
55 u_32int rk[32]; /* prepared key */
56 int full16; /* do 12 our 16 rounds? */
57 } castkey;
58
59 static void cast_setkey _((castkey *, u_char *, size_t));
60 static void cast_encrypt _((castkey *, u_char *, u_char *, int));
61 static void cast_decrypt _((castkey *, u_char *, u_char *, int));
62
63 /* get different 8 bit parts of a 32 bit variable */
64 #define E0(x) ((u_char) (x >> 24))
65 #define E1(x) ((u_char)((x >> 16) & 255))
66 #define E2(x) ((u_char)((x >> 8) & 255))
67 #define E3(x) ((u_char)((x) & 255))
68
69 /* rotate left */
70 #define ROT(x, n) ( ((x)<<(n)) | ((x)>>(32-(n))) )
71
72 /* CAST-128 needs three rounding functions */
73 #define R1(l, r, i) do { \
74 I = ROT((k)->rk[(i)] + (r), (k)->rk[(i) + 16]); \
75 l ^= ((cast_S1[E0(I)] ^ cast_S2[E1(I)]) - cast_S3[E2(I)]) \
76 + cast_S4[E3(I)]; \
77 } while (0)
78
79 #define R2(l, r, i) do { \
80 I = ROT((k)->rk[(i)] ^ (r), (k)->rk[(i) + 16]); \
81 l ^= ((cast_S1[E0(I)] - cast_S2[E1(I)]) + cast_S3[E2(I)]) \
82 ^ cast_S4[E3(I)]; \
83 } while (0)
84
85 #define R3(l, r, i) do { \
86 I = ROT((k)->rk[(i)] - (r), (k)->rk[(i) + 16]); \
87 l ^= ((cast_S1[E0(I)] + cast_S2[E1(I)]) ^ cast_S3[E2(I)]) \
88 - cast_S4[E3(I)]; \
89 } while (0)
90
91 /* get 32 bits from the block, from the specified offset */
92 #define G32(s, o) \
93 (((u_32int)(s)[(o) + 0] << 24) | ((u_32int)(s)[(o) + 1] << 16) | \
94 ((u_32int)(s)[(o) + 2] << 8) | (u_32int)(s)[(o) + 3])
95
96 /*
97 * cast_encrypt:
98 * - converts 8 bytes of data from src to dest using key k.
99 * - note that we only do 12 rounds if we have a long enough
100 * key (80 or more bits).
101 */
102 static void
cast_encrypt(k,src,dest,first)103 cast_encrypt(k, src, dest, first)
104 castkey *k;
105 u_char *src;
106 u_char *dest;
107 int first;
108 {
109 static u_32int oldr, oldl;
110 u_32int I, l, r;
111
112 /*
113 * if this is the first encryption, we only want to
114 * setup internal state
115 */
116 if (first)
117 {
118 oldl = G32(src, 0);
119 oldr = G32(src, 4);
120 return;
121 }
122
123 /*
124 * split src into left and right parts, xoring the previous
125 * cipherblock as we go
126 */
127 l = G32(src, 0) ^ oldl;
128 r = G32(src, 4) ^ oldr;
129
130 /* do it */
131 R1(l, r, 0);
132 R2(r, l, 1);
133 R3(l, r, 2);
134 R1(r, l, 3);
135 R2(l, r, 4);
136 R3(r, l, 5);
137 R1(l, r, 6);
138 R2(r, l, 7);
139 R3(l, r, 8);
140 R1(r, l, 9);
141 R2(l, r, 10);
142 R3(r, l, 11);
143 if (k->full16) {
144 R1(l, r, 12);
145 R2(r, l, 13);
146 R3(l, r, 14);
147 R1(r, l, 15);
148 }
149
150 /* now put the left and right parts back into dest */
151 dest[0] = E0(r);
152 dest[1] = E1(r);
153 dest[2] = E2(r);
154 dest[3] = E3(r);
155 dest[4] = E0(l);
156 dest[5] = E1(l);
157 dest[6] = E2(l);
158 dest[7] = E3(l);
159
160 /* save the final cipherblock for the next block's encryption */
161 oldl = G32(dest, 0);
162 oldr = G32(dest, 4);
163
164 /* and clean up our stack */
165 I = l = r = 0;
166 }
167
168 /*
169 * cast_decrypt:
170 * - unconverts 8 bytes of data from src to dest using key k
171 * - note that we only do 12 rounds if we have a long enough
172 * key (80 or more bits).
173 */
174 static void
cast_decrypt(k,src,dest,first)175 cast_decrypt(k, src, dest, first)
176 castkey *k;
177 u_char *src;
178 u_char *dest;
179 int first;
180 {
181 static u_32int oldr, oldl;
182 u_32int new_oldr, new_oldl;
183 u_32int I, r, l;
184
185 /*
186 * if this is the first decryption, we only want to
187 * setup internal state
188 */
189 if (first)
190 {
191 oldl = G32(src, 0);
192 oldr = G32(src, 4);
193 return;
194 }
195 new_oldl = G32(src, 0);
196 new_oldr = G32(src, 4);
197
198 /* split src into left and right parts */
199 r = G32(src, 0);
200 l = G32(src, 4);
201
202 /* do it */
203 if (k->full16) {
204 R1(r, l, 15);
205 R3(l, r, 14);
206 R2(r, l, 13);
207 R1(l, r, 12);
208 }
209 R3(r, l, 11);
210 R2(l, r, 10);
211 R1(r, l, 9);
212 R3(l, r, 8);
213 R2(r, l, 7);
214 R1(l, r, 6);
215 R3(r, l, 5);
216 R2(l, r, 4);
217 R1(r, l, 3);
218 R3(l, r, 2);
219 R2(r, l, 1);
220 R1(l, r, 0);
221
222 /* now put the left and right parts back into dest */
223 dest[0] = E0(l) ^ E0(oldl);
224 dest[1] = E1(l) ^ E1(oldl);
225 dest[2] = E2(l) ^ E2(oldl);
226 dest[3] = E3(l) ^ E3(oldl);
227 dest[4] = E0(r) ^ E0(oldr);
228 dest[5] = E1(r) ^ E1(oldr);
229 dest[6] = E2(r) ^ E2(oldr);
230 dest[7] = E3(r) ^ E3(oldr);
231
232 /* save the final cipherblock for the next block's encryption */
233 oldr = new_oldr;
234 oldl = new_oldl;
235
236 /* and clean up our stack */
237 I = l = r = 0;
238 }
239
240 /*
241 * cast_setkey:
242 * - fill in key from the raw bytes in key for length len.
243 */
244 static void
cast_setkey(k,key,len)245 cast_setkey(k, key, len)
246 castkey *k;
247 u_char *key;
248 size_t len;
249 {
250 u_32int t[4], x[4], z[4];
251 int i;
252
253 /* convert the key so we can use it ... */
254 for (i = 0; i < 4; i++) {
255 x[i] = 0;
256 if ((i * 4 + 0) < len)
257 x[i] = (u_32int)key[i * 4 + 0] << 24;
258 if ((i * 4 + 1) < len)
259 x[i] |= (u_32int)key[i * 4 + 1] << 16;
260 if ((i * 4 + 2) < len)
261 x[i] |= (u_32int)key[i * 4 + 2] << 8;
262 if ((i * 4 + 3) < len)
263 x[i] |= (u_32int)key[i * 4 + 3];
264 }
265
266 /* if the key length is not sufficient, only do 12 rounds */
267 k->full16 = (len > 10 ? 1 : 0);
268
269 /*
270 * generate our 32 subkeys (4 at a time, as we can). used an
271 * idea from steve reid on how to collapse this code a little
272 * more than the fully expanded version .. (pity i found that
273 * later)
274 */
275 for (i = 0; i < 32; i += 4) {
276 switch (i & 4) {
277 case 0:
278 t[0] = z[0] = x[0] ^ cast_S5[E1(x[3])] ^ cast_S6[E3(x[3])] ^ cast_S7[E0(x[3])] ^ cast_S8[E2(x[3])] ^ cast_S7[E0(x[2])];
279 t[1] = z[1] = x[2] ^ cast_S5[E0(z[0])] ^ cast_S6[E2(z[0])] ^ cast_S7[E1(z[0])] ^ cast_S8[E3(z[0])] ^ cast_S8[E2(x[2])];
280 t[2] = z[2] = x[3] ^ cast_S5[E3(z[1])] ^ cast_S6[E2(z[1])] ^ cast_S7[E1(z[1])] ^ cast_S8[E0(z[1])] ^ cast_S5[E1(x[2])];
281 t[3] = z[3] = x[1] ^ cast_S5[E2(z[2])] ^ cast_S6[E1(z[2])] ^ cast_S7[E3(z[2])] ^ cast_S8[E0(z[2])] ^ cast_S6[E3(x[2])];
282 break;
283 case 4:
284 t[0] = x[0] = z[2] ^ cast_S5[E1(z[1])] ^ cast_S6[E3(z[1])] ^ cast_S7[E0(z[1])] ^ cast_S8[E2(z[1])] ^ cast_S7[E0(z[0])];
285 t[1] = x[1] = z[0] ^ cast_S5[E0(x[0])] ^ cast_S6[E2(x[0])] ^ cast_S7[E1(x[0])] ^ cast_S8[E3(x[0])] ^ cast_S8[E2(z[0])];
286 t[2] = x[2] = z[1] ^ cast_S5[E3(x[1])] ^ cast_S6[E2(x[1])] ^ cast_S7[E1(x[1])] ^ cast_S8[E0(x[1])] ^ cast_S5[E1(z[0])];
287 t[3] = x[3] = z[3] ^ cast_S5[E2(x[2])] ^ cast_S6[E1(x[2])] ^ cast_S7[E3(x[2])] ^ cast_S8[E0(x[2])] ^ cast_S6[E3(z[0])];
288 break;
289 }
290 switch (i & 12) {
291 case 0:
292 case 12:
293 k->rk[i + 0] = cast_S5[E0(t[2])] ^ cast_S6[E1(t[2])] ^ cast_S7[E3(t[1])] ^ cast_S8[E2(t[1])];
294 k->rk[i + 1] = cast_S5[E2(t[2])] ^ cast_S6[E3(t[2])] ^ cast_S7[E1(t[1])] ^ cast_S8[E0(t[1])];
295 k->rk[i + 2] = cast_S5[E0(t[3])] ^ cast_S6[E1(t[3])] ^ cast_S7[E3(t[0])] ^ cast_S8[E2(t[0])];
296 k->rk[i + 3] = cast_S5[E2(t[3])] ^ cast_S6[E3(t[3])] ^ cast_S7[E1(t[0])] ^ cast_S8[E0(t[0])];
297 break;
298 case 4:
299 case 8:
300 k->rk[i + 0] = cast_S5[E3(t[0])] ^ cast_S6[E2(t[0])] ^ cast_S7[E0(t[3])] ^ cast_S8[E1(t[3])];
301 k->rk[i + 1] = cast_S5[E1(t[0])] ^ cast_S6[E0(t[0])] ^ cast_S7[E2(t[3])] ^ cast_S8[E3(t[3])];
302 k->rk[i + 2] = cast_S5[E3(t[1])] ^ cast_S6[E2(t[1])] ^ cast_S7[E0(t[2])] ^ cast_S8[E1(t[2])];
303 k->rk[i + 3] = cast_S5[E1(t[1])] ^ cast_S6[E0(t[1])] ^ cast_S7[E2(t[2])] ^ cast_S8[E3(t[2])];
304 break;
305 }
306 switch (i & 12) {
307 case 0:
308 k->rk[i + 0] ^= cast_S5[E2(z[0])];
309 k->rk[i + 1] ^= cast_S6[E2(z[1])];
310 k->rk[i + 2] ^= cast_S7[E1(z[2])];
311 k->rk[i + 3] ^= cast_S8[E0(z[3])];
312 break;
313 case 4:
314 k->rk[i + 0] ^= cast_S5[E0(x[2])];
315 k->rk[i + 1] ^= cast_S6[E1(x[3])];
316 k->rk[i + 2] ^= cast_S7[E3(x[0])];
317 k->rk[i + 3] ^= cast_S8[E3(x[1])];
318 break;
319 case 8:
320 k->rk[i + 0] ^= cast_S5[E1(z[2])];
321 k->rk[i + 1] ^= cast_S6[E0(z[3])];
322 k->rk[i + 2] ^= cast_S7[E2(z[0])];
323 k->rk[i + 3] ^= cast_S8[E2(z[1])];
324 break;
325 case 12:
326 k->rk[i + 0] ^= cast_S5[E3(x[0])];
327 k->rk[i + 1] ^= cast_S6[E3(x[1])];
328 k->rk[i + 2] ^= cast_S7[E0(x[2])];
329 k->rk[i + 3] ^= cast_S8[E1(x[3])];
330 break;
331 }
332 if (i >= 16) {
333 k->rk[i + 0] &= 31;
334 k->rk[i + 1] &= 31;
335 k->rk[i + 2] &= 31;
336 k->rk[i + 3] &= 31;
337 }
338 }
339
340 /* and clean up our stack */
341 for (i = 0; i < 4; i++)
342 t[i] = x[i] = z[i] = 0;
343 }
344
345 /*
346 * we implement cyclic block chaining mode here, where each previous
347 * encryption block (and a random initial vector sent with each message,
348 * for the first block) is exclusived-ORed with the plaintext before
349 * being encryptioned. this avoids many problems.
350 */
351
352 /*
353 * and here are the functions we pass to the crypt module.
354 *
355 * XXX: we copy non-64-bit-with-trailing-nul sized data into a new
356 * string, and fill the end with garbage, expecting clients to throw
357 * away data after the nul.
358 */
359 static int
cast_encrypt_str(key,str,len)360 cast_encrypt_str(key, str, len)
361 crypt_key *key;
362 u_char **str;
363 int *len;
364 {
365 castkey k;
366 u_char *s, *newstr;
367 int i;
368 size_t nlen;
369
370 /*
371 * pad the string to 64bit block boundary. we use the same
372 * trick of DES does, and put the number of pad bytes (not
373 * inclusive) there are. eg, a 47 byte string will become
374 * a 48 byte string with a '0' in the final byte, where as
375 * a 48 byte string will become a 56 byte string, with a '7'
376 * in the final byte, with garbage from 49 -> 55.
377 *
378 * note we allocate 8 bytes for the IV and generate it here.
379 */
380 nlen = (*len + 8 + 8) & 0xfffffff8; /* XXX int == 32 bits */
381 newstr = (u_char *) new_malloc(nlen + 1);
382 my_strcpy(newstr + 8, *str);
383 /* XXX this can read upto 15 1 byte chunks.. lame */
384 for (i = 0; i < 8; i++)
385 newstr[i] = (u_char)GET_RANDOM_BYTE;
386 for (i = *len + 8; i < nlen - 1; i++)
387 newstr[i] = (u_char)GET_RANDOM_BYTE;
388 newstr[nlen - 1] = nlen - *len - 1 - 8;
389 newstr[nlen] = '\0';
390
391 /*
392 * fill in str for our parent. note that we don't free the
393 * old str as it is the property of our caller (and in the
394 * only caller, it is an automatic variable).
395 */
396 *str = newstr;
397
398 /*
399 * pity we can't do this once per session, but the current
400 * way encryption is done in ircii makes it hard. XXX fix me
401 */
402 cast_setkey(&k, key->key, my_strlen(key->key));
403
404 /* encrypt each 64bit chunk */
405 for (i = nlen, s = (u_char *)*str; i > 0; s += 8, i -= 8)
406 cast_encrypt(&k, s, s, i == nlen);
407
408 /* set this so that our caller knows it has changed size */
409 *len = nlen;
410 (*str)[nlen] = '\0';
411
412 bzero(&k, sizeof k); /* wipe our stack */
413
414 return (0);
415 }
416
417 static int
cast_decrypt_str(key,str,len)418 cast_decrypt_str(key, str, len)
419 crypt_key *key;
420 u_char **str;
421 int *len;
422 {
423 castkey k;
424 u_char *s;
425 int i;
426
427 *len &= 0xfffffff8; /* XXX int == 32 bits */
428
429 /*
430 * pity we can't do this once per session, but the current
431 * way encryption is done in ircii makes it hard. XXX fix me
432 */
433 cast_setkey(&k, key->key, my_strlen(key->key));
434
435 for (i = *len, s = (u_char *)*str; i > 0; s += 8, i -= 8)
436 cast_decrypt(&k, s, s, i == *len);
437
438 /* find the final byte */
439 i = (*str)[*len - 1];
440 if (i > 7)
441 i = 7;
442 *len = *len - 1 - 8 - i;
443
444 /* now remove the trash IV from the top */
445 for (i = 0; i < *len; i++)
446 (*str)[i] = (*str)[i+8];
447 /* fill in our nul byte from the final byte of the data */
448 (*str)[i] = 0;
449
450 bzero(&k, sizeof k); /* wipe our stack */
451
452 return (0);
453 }
454