1 /*
2  * cast.c: CAST-128 bit encryption
3  *
4  * Written By Matthew Green.
5  *
6  * Copyright (c) 1998-2000 Matthew R. Green.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * XXX: this code is dependant upon 32 bit integer! XXX
35  */
36 
37 /*
38  * note this source file is designed to be #include'd by crypt.c.
39  * this is why the rcsid below is an expansion of the IRCII_RCSID
40  * macro, rather than using it.
41  */
42 
43 #ifndef lint
44 static	const char cast_rcsid[] __attribute__((unused)) = "@(#)$eterna: cast.c,v 2.30 2001/08/12 15:44:21 mrg Exp $";
45 #endif
46 
47 static	int	cast_encrypt_str _((crypt_key *, u_char **, int *));
48 static	int	cast_decrypt_str _((crypt_key *, u_char **, int *));
49 
50 /* pull in the sboxes */
51 #include "cast_sbox.h"
52 
53 /* our structured cast key: 32 subkeys, and do we do 12 or 16 rounds? */
54 typedef struct {
55 	u_32int	rk[32];		/* prepared key */
56 	int	full16;		/* do 12 our 16 rounds? */
57 } castkey;
58 
59 static void cast_setkey _((castkey *, u_char *, size_t));
60 static void cast_encrypt _((castkey *, u_char *, u_char *, int));
61 static void cast_decrypt _((castkey *, u_char *, u_char *, int));
62 
63 /* get different 8 bit parts of a 32 bit variable */
64 #define E0(x)	((u_char) (x >> 24))
65 #define E1(x)	((u_char)((x >> 16) & 255))
66 #define E2(x)	((u_char)((x >> 8)  & 255))
67 #define E3(x)	((u_char)((x)       & 255))
68 
69 /* rotate left */
70 #define ROT(x, n) ( ((x)<<(n)) | ((x)>>(32-(n))) )
71 
72 /* CAST-128 needs three rounding functions */
73 #define R1(l, r, i) do { \
74 	I = ROT((k)->rk[(i)] + (r), (k)->rk[(i) + 16]); \
75 	l ^= ((cast_S1[E0(I)] ^ cast_S2[E1(I)]) - cast_S3[E2(I)]) \
76 	     + cast_S4[E3(I)]; \
77 } while (0)
78 
79 #define R2(l, r, i) do { \
80 	I = ROT((k)->rk[(i)] ^ (r), (k)->rk[(i) + 16]); \
81 	l ^= ((cast_S1[E0(I)] - cast_S2[E1(I)]) + cast_S3[E2(I)]) \
82 	     ^ cast_S4[E3(I)]; \
83 } while (0)
84 
85 #define R3(l, r, i) do { \
86 	I = ROT((k)->rk[(i)] - (r), (k)->rk[(i) + 16]); \
87 	l ^= ((cast_S1[E0(I)] + cast_S2[E1(I)]) ^ cast_S3[E2(I)]) \
88 	     - cast_S4[E3(I)]; \
89 } while (0)
90 
91 /* get 32 bits from the block, from the specified offset */
92 #define G32(s, o) \
93 	(((u_32int)(s)[(o) + 0] << 24) | ((u_32int)(s)[(o) + 1] << 16) | \
94 	 ((u_32int)(s)[(o) + 2] << 8)  |  (u_32int)(s)[(o) + 3])
95 
96 /*
97  * cast_encrypt:
98  *	- converts 8 bytes of data from src to dest using key k.
99  *	- note that we only do 12 rounds if we have a long enough
100  *	  key (80 or more bits).
101  */
102 static void
cast_encrypt(k,src,dest,first)103 cast_encrypt(k, src, dest, first)
104 	castkey *k;
105 	u_char *src;
106 	u_char *dest;
107 	int first;
108 {
109 	static	u_32int oldr, oldl;
110 	u_32int I, l, r;
111 
112 	/*
113 	 * if this is the first encryption, we only want to
114 	 * setup internal state
115 	 */
116 	if (first)
117 	{
118 		oldl = G32(src, 0);
119 		oldr = G32(src, 4);
120 		return;
121 	}
122 
123 	/*
124 	 * split src into left and right parts, xoring the previous
125 	 * cipherblock as we go
126 	 */
127 	l = G32(src, 0) ^ oldl;
128 	r = G32(src, 4) ^ oldr;
129 
130 	/* do it */
131 	R1(l, r,  0);
132 	R2(r, l,  1);
133 	R3(l, r,  2);
134 	R1(r, l,  3);
135 	R2(l, r,  4);
136 	R3(r, l,  5);
137 	R1(l, r,  6);
138 	R2(r, l,  7);
139 	R3(l, r,  8);
140 	R1(r, l,  9);
141 	R2(l, r, 10);
142 	R3(r, l, 11);
143 	if (k->full16) {
144 		R1(l, r, 12);
145 		R2(r, l, 13);
146 		R3(l, r, 14);
147 		R1(r, l, 15);
148 	}
149 
150 	/* now put the left and right parts back into dest */
151 	dest[0] = E0(r);
152 	dest[1] = E1(r);
153 	dest[2] = E2(r);
154 	dest[3] = E3(r);
155 	dest[4] = E0(l);
156 	dest[5] = E1(l);
157 	dest[6] = E2(l);
158 	dest[7] = E3(l);
159 
160 	/* save the final cipherblock for the next block's encryption */
161 	oldl = G32(dest, 0);
162 	oldr = G32(dest, 4);
163 
164 	/* and clean up our stack */
165 	I = l = r = 0;
166 }
167 
168 /*
169  * cast_decrypt:
170  *	- unconverts 8 bytes of data from src to dest using key k
171  *	- note that we only do 12 rounds if we have a long enough
172  *	  key (80 or more bits).
173  */
174 static void
cast_decrypt(k,src,dest,first)175 cast_decrypt(k, src, dest, first)
176 	castkey *k;
177 	u_char *src;
178 	u_char *dest;
179 	int first;
180 {
181 	static	u_32int oldr, oldl;
182 	u_32int new_oldr, new_oldl;
183 	u_32int I, r, l;
184 
185 	/*
186 	 * if this is the first decryption, we only want to
187 	 * setup internal state
188 	 */
189 	if (first)
190 	{
191 		oldl = G32(src, 0);
192 		oldr = G32(src, 4);
193 		return;
194 	}
195 	new_oldl = G32(src, 0);
196 	new_oldr = G32(src, 4);
197 
198 	/* split src into left and right parts */
199 	r = G32(src, 0);
200 	l = G32(src, 4);
201 
202 	/* do it */
203 	if (k->full16) {
204 		R1(r, l, 15);
205 		R3(l, r, 14);
206 		R2(r, l, 13);
207 		R1(l, r, 12);
208 	}
209 	R3(r, l, 11);
210 	R2(l, r, 10);
211 	R1(r, l,  9);
212 	R3(l, r,  8);
213 	R2(r, l,  7);
214 	R1(l, r,  6);
215 	R3(r, l,  5);
216 	R2(l, r,  4);
217 	R1(r, l,  3);
218 	R3(l, r,  2);
219 	R2(r, l,  1);
220 	R1(l, r,  0);
221 
222 	/* now put the left and right parts back into dest */
223 	dest[0] = E0(l) ^ E0(oldl);
224 	dest[1] = E1(l) ^ E1(oldl);
225 	dest[2] = E2(l) ^ E2(oldl);
226 	dest[3] = E3(l) ^ E3(oldl);
227 	dest[4] = E0(r) ^ E0(oldr);
228 	dest[5] = E1(r) ^ E1(oldr);
229  	dest[6] = E2(r) ^ E2(oldr);
230 	dest[7] = E3(r) ^ E3(oldr);
231 
232 	/* save the final cipherblock for the next block's encryption */
233 	oldr = new_oldr;
234 	oldl = new_oldl;
235 
236 	/* and clean up our stack */
237 	I = l = r = 0;
238 }
239 
240 /*
241  * cast_setkey:
242  *	- fill in key from the raw bytes in key for length len.
243  */
244 static void
cast_setkey(k,key,len)245 cast_setkey(k, key, len)
246 	castkey *k;
247 	u_char *key;
248 	size_t len;
249 {
250 	u_32int t[4], x[4], z[4];
251 	int i;
252 
253 	/* convert the key so we can use it ... */
254 	for (i = 0; i < 4; i++) {
255 		x[i] = 0;
256 		if ((i * 4 + 0) < len)
257 			x[i] = (u_32int)key[i * 4 + 0] << 24;
258 		if ((i * 4 + 1) < len)
259 			x[i] |= (u_32int)key[i * 4 + 1] << 16;
260 		if ((i * 4 + 2) < len)
261 			x[i] |= (u_32int)key[i * 4 + 2] << 8;
262 		if ((i * 4 + 3) < len)
263 			x[i] |= (u_32int)key[i * 4 + 3];
264 	}
265 
266 	/* if the key length is not sufficient, only do 12 rounds */
267 	k->full16 = (len > 10 ? 1 : 0);
268 
269 	/*
270 	 * generate our 32 subkeys (4 at a time, as we can).  used an
271 	 * idea from steve reid on how to collapse this code a little
272 	 * more than the fully expanded version .. (pity i found that
273 	 * later)
274 	 */
275 	for (i = 0; i < 32; i += 4) {
276 		switch (i & 4) {
277 		case 0:
278 			t[0] = z[0] = x[0] ^ cast_S5[E1(x[3])] ^ cast_S6[E3(x[3])] ^ cast_S7[E0(x[3])] ^ cast_S8[E2(x[3])] ^ cast_S7[E0(x[2])];
279 			t[1] = z[1] = x[2] ^ cast_S5[E0(z[0])] ^ cast_S6[E2(z[0])] ^ cast_S7[E1(z[0])] ^ cast_S8[E3(z[0])] ^ cast_S8[E2(x[2])];
280 			t[2] = z[2] = x[3] ^ cast_S5[E3(z[1])] ^ cast_S6[E2(z[1])] ^ cast_S7[E1(z[1])] ^ cast_S8[E0(z[1])] ^ cast_S5[E1(x[2])];
281 			t[3] = z[3] = x[1] ^ cast_S5[E2(z[2])] ^ cast_S6[E1(z[2])] ^ cast_S7[E3(z[2])] ^ cast_S8[E0(z[2])] ^ cast_S6[E3(x[2])];
282 			break;
283 		case 4:
284 			t[0] = x[0] = z[2] ^ cast_S5[E1(z[1])] ^ cast_S6[E3(z[1])] ^ cast_S7[E0(z[1])] ^ cast_S8[E2(z[1])] ^ cast_S7[E0(z[0])];
285 			t[1] = x[1] = z[0] ^ cast_S5[E0(x[0])] ^ cast_S6[E2(x[0])] ^ cast_S7[E1(x[0])] ^ cast_S8[E3(x[0])] ^ cast_S8[E2(z[0])];
286 			t[2] = x[2] = z[1] ^ cast_S5[E3(x[1])] ^ cast_S6[E2(x[1])] ^ cast_S7[E1(x[1])] ^ cast_S8[E0(x[1])] ^ cast_S5[E1(z[0])];
287 			t[3] = x[3] = z[3] ^ cast_S5[E2(x[2])] ^ cast_S6[E1(x[2])] ^ cast_S7[E3(x[2])] ^ cast_S8[E0(x[2])] ^ cast_S6[E3(z[0])];
288 			break;
289 		}
290 		switch (i & 12) {
291 		case 0:
292 		case 12:
293 			k->rk[i + 0] = cast_S5[E0(t[2])] ^ cast_S6[E1(t[2])] ^ cast_S7[E3(t[1])] ^ cast_S8[E2(t[1])];
294 			k->rk[i + 1] = cast_S5[E2(t[2])] ^ cast_S6[E3(t[2])] ^ cast_S7[E1(t[1])] ^ cast_S8[E0(t[1])];
295 			k->rk[i + 2] = cast_S5[E0(t[3])] ^ cast_S6[E1(t[3])] ^ cast_S7[E3(t[0])] ^ cast_S8[E2(t[0])];
296 			k->rk[i + 3] = cast_S5[E2(t[3])] ^ cast_S6[E3(t[3])] ^ cast_S7[E1(t[0])] ^ cast_S8[E0(t[0])];
297 			break;
298 		case 4:
299 		case 8:
300 			k->rk[i + 0] = cast_S5[E3(t[0])] ^ cast_S6[E2(t[0])] ^ cast_S7[E0(t[3])] ^ cast_S8[E1(t[3])];
301 			k->rk[i + 1] = cast_S5[E1(t[0])] ^ cast_S6[E0(t[0])] ^ cast_S7[E2(t[3])] ^ cast_S8[E3(t[3])];
302 			k->rk[i + 2] = cast_S5[E3(t[1])] ^ cast_S6[E2(t[1])] ^ cast_S7[E0(t[2])] ^ cast_S8[E1(t[2])];
303 			k->rk[i + 3] = cast_S5[E1(t[1])] ^ cast_S6[E0(t[1])] ^ cast_S7[E2(t[2])] ^ cast_S8[E3(t[2])];
304 			break;
305 		}
306 		switch (i & 12) {
307 		case 0:
308 			k->rk[i + 0] ^= cast_S5[E2(z[0])];
309 			k->rk[i + 1] ^= cast_S6[E2(z[1])];
310 			k->rk[i + 2] ^= cast_S7[E1(z[2])];
311 			k->rk[i + 3] ^= cast_S8[E0(z[3])];
312 			break;
313 		case 4:
314 			k->rk[i + 0] ^= cast_S5[E0(x[2])];
315 			k->rk[i + 1] ^= cast_S6[E1(x[3])];
316 			k->rk[i + 2] ^= cast_S7[E3(x[0])];
317 			k->rk[i + 3] ^= cast_S8[E3(x[1])];
318 			break;
319 		case 8:
320 			k->rk[i + 0] ^= cast_S5[E1(z[2])];
321 			k->rk[i + 1] ^= cast_S6[E0(z[3])];
322 			k->rk[i + 2] ^= cast_S7[E2(z[0])];
323 			k->rk[i + 3] ^= cast_S8[E2(z[1])];
324 			break;
325 		case 12:
326 			k->rk[i + 0] ^= cast_S5[E3(x[0])];
327 			k->rk[i + 1] ^= cast_S6[E3(x[1])];
328 			k->rk[i + 2] ^= cast_S7[E0(x[2])];
329 			k->rk[i + 3] ^= cast_S8[E1(x[3])];
330 			break;
331 		}
332 		if (i >= 16) {
333 			k->rk[i + 0] &= 31;
334 			k->rk[i + 1] &= 31;
335 			k->rk[i + 2] &= 31;
336 			k->rk[i + 3] &= 31;
337 		}
338 	}
339 
340 	/* and clean up our stack */
341 	for (i = 0; i < 4; i++)
342 		t[i] = x[i] = z[i] = 0;
343 }
344 
345 /*
346  * we implement cyclic block chaining mode here, where each previous
347  * encryption block (and a random initial vector sent with each message,
348  * for the first block) is exclusived-ORed with the plaintext before
349  * being encryptioned.  this avoids many problems.
350  */
351 
352 /*
353  * and here are the functions we pass to the crypt module.
354  *
355  * XXX: we copy non-64-bit-with-trailing-nul sized data into a new
356  * string, and fill the end with garbage, expecting clients to throw
357  * away data after the nul.
358  */
359 static	int
cast_encrypt_str(key,str,len)360 cast_encrypt_str(key, str, len)
361 	crypt_key	*key;
362 	u_char	**str;
363 	int	*len;
364 {
365 	castkey	k;
366 	u_char	*s, *newstr;
367 	int	i;
368 	size_t	nlen;
369 
370 	/*
371 	 * pad the string to 64bit block boundary.  we use the same
372 	 * trick of DES does, and put the number of pad bytes (not
373 	 * inclusive) there are.  eg, a 47 byte string will become
374 	 * a 48 byte string with a '0' in the final byte, where as
375 	 * a 48 byte string will become a 56 byte string, with a '7'
376 	 * in the final byte, with garbage from 49 -> 55.
377 	 *
378 	 * note we allocate 8 bytes for the IV and generate it here.
379 	 */
380 	nlen = (*len + 8 + 8) & 0xfffffff8;	/* XXX int == 32 bits */
381 	newstr = (u_char *) new_malloc(nlen + 1);
382 	my_strcpy(newstr + 8, *str);
383 	/* XXX this can read upto 15 1 byte chunks.. lame */
384 	for (i = 0; i < 8; i++)
385 		newstr[i] = (u_char)GET_RANDOM_BYTE;
386 	for (i = *len + 8; i < nlen - 1; i++)
387 		newstr[i] = (u_char)GET_RANDOM_BYTE;
388 	newstr[nlen - 1] = nlen - *len - 1 - 8;
389 	newstr[nlen] = '\0';
390 
391 	/*
392 	 * fill in str for our parent.  note that we don't free the
393 	 * old str as it is the property of our caller (and in the
394 	 * only caller, it is an automatic variable).
395 	 */
396 	*str = newstr;
397 
398 	/*
399 	 * pity we can't do this once per session, but the current
400 	 * way encryption is done in ircii makes it hard. XXX fix me
401 	 */
402 	cast_setkey(&k, key->key, my_strlen(key->key));
403 
404 	/* encrypt each 64bit chunk */
405 	for (i = nlen, s = (u_char *)*str; i > 0; s += 8, i -= 8)
406 		cast_encrypt(&k, s, s, i == nlen);
407 
408 	/* set this so that our caller knows it has changed size */
409 	*len = nlen;
410 	(*str)[nlen] = '\0';
411 
412 	bzero(&k, sizeof k);	/* wipe our stack */
413 
414 	return (0);
415 }
416 
417 static	int
cast_decrypt_str(key,str,len)418 cast_decrypt_str(key, str, len)
419 	crypt_key	*key;
420 	u_char	**str;
421 	int	*len;
422 {
423 	castkey	k;
424 	u_char	*s;
425 	int	i;
426 
427 	*len &= 0xfffffff8; /* XXX int == 32 bits */
428 
429 	/*
430 	 * pity we can't do this once per session, but the current
431 	 * way encryption is done in ircii makes it hard. XXX fix me
432 	 */
433 	cast_setkey(&k, key->key, my_strlen(key->key));
434 
435 	for (i = *len, s = (u_char *)*str; i > 0; s += 8, i -= 8)
436 		cast_decrypt(&k, s, s, i == *len);
437 
438 	/* find the final byte */
439 	i = (*str)[*len - 1];
440 	if (i > 7)
441 		i = 7;
442 	*len = *len - 1 - 8 - i;
443 
444 	/* now remove the trash IV from the top */
445 	for (i = 0; i < *len; i++)
446 		(*str)[i] = (*str)[i+8];
447 	/* fill in our nul byte from the final byte of the data */
448 	(*str)[i] = 0;
449 
450 	bzero(&k, sizeof k);	/* wipe our stack */
451 
452 	return (0);
453 }
454