1 /*
2  * cast.c: CAST-128 bit encryption
3  *
4  * Written By Matthew Green.
5  *
6  * Copyright (c) 1998-2014 Matthew R. Green.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 IRCII_RCSID_NAMED("@(#)$eterna: cast.c,v 2.48 2020/11/17 08:24:32 mrg Exp $", cast_rcsid);
34 
35 static	int	cast_encrypt_str(crypt_key *, u_char **, size_t *);
36 static	int	cast_decrypt_str(crypt_key *, u_char **, size_t *);
37 
38 /* pull in the sboxes */
39 #include "cast_sbox.h"
40 
41 #include "assert.h"
42 
43 /* our structured cast key: 32 subkeys, and do we do 12 or 16 rounds? */
44 typedef struct {
45 	uint32_t rk[32];		/* prepared key */
46 	int	full16;			/* do 12 our 16 rounds? */
47 	uint32_t enc_oldr, enc_oldl;	/* state for encryption */
48 	uint32_t dec_oldr, dec_oldl;	/* state for decryption */
49 } castkey;
50 
51 static void cast_setkey(crypt_key *, size_t);
52 static void cast_encrypt(castkey *, u_char *, u_char *, int);
53 static void cast_decrypt(castkey *, u_char *, u_char *, int);
54 static void cast_clearstate_dec(castkey *k);
55 static void cast_clearstate_enc(castkey *k);
56 
57 /* get different 8 bit parts of a 32 bit variable */
58 #define E0(x)	((u_char) (x >> 24))
59 #define E1(x)	((u_char)((x >> 16) & 255))
60 #define E2(x)	((u_char)((x >> 8)  & 255))
61 #define E3(x)	((u_char)((x)       & 255))
62 
63 /* rotate left */
64 #define ROT(x, n) ( ((x)<<(n)) | ((x)>>(32-(n))) )
65 
66 /* CAST-128 needs three rounding functions */
67 #define R1(l, r, i) do { \
68 	I = ROT((k)->rk[(i)] + (r), (k)->rk[(i) + 16]); \
69 	l ^= ((cast_S1[E0(I)] ^ cast_S2[E1(I)]) - cast_S3[E2(I)]) \
70 	     + cast_S4[E3(I)]; \
71 } while (0)
72 
73 #define R2(l, r, i) do { \
74 	I = ROT((k)->rk[(i)] ^ (r), (k)->rk[(i) + 16]); \
75 	l ^= ((cast_S1[E0(I)] - cast_S2[E1(I)]) + cast_S3[E2(I)]) \
76 	     ^ cast_S4[E3(I)]; \
77 } while (0)
78 
79 #define R3(l, r, i) do { \
80 	I = ROT((k)->rk[(i)] - (r), (k)->rk[(i) + 16]); \
81 	l ^= ((cast_S1[E0(I)] + cast_S2[E1(I)]) ^ cast_S3[E2(I)]) \
82 	     - cast_S4[E3(I)]; \
83 } while (0)
84 
85 /* get 32 bits from the block, from the specified offset */
86 #define G32(s, o) \
87 	(((uint32_t)(s)[(o) + 0] << 24) | ((uint32_t)(s)[(o) + 1] << 16) | \
88 	 ((uint32_t)(s)[(o) + 2] << 8)  |  (uint32_t)(s)[(o) + 3])
89 
90 /*
91  * cast_encrypt:
92  *	- converts 8 bytes of data from src to dest using key k.
93  *	- note that we only do 12 rounds if we have a long enough
94  *	  key (80 or more bits).
95  */
96 static void
cast_encrypt(castkey * k,u_char * src,u_char * dest,int first)97 cast_encrypt(castkey *k, u_char *src, u_char *dest, int first)
98 {
99 	uint32_t I, l, r;
100 
101 	/*
102 	 * if this is the first encryption, we only want to
103 	 * setup internal state
104 	 */
105 	if (first)
106 	{
107 		k->enc_oldl = G32(src, 0);
108 		k->enc_oldr = G32(src, 4);
109 		return;
110 	}
111 
112 	/*
113 	 * split src into left and right parts, xoring the previous
114 	 * cipherblock as we go
115 	 */
116 	l = G32(src, 0) ^ k->enc_oldl;
117 	r = G32(src, 4) ^ k->enc_oldr;
118 
119 	/* do it */
120 	R1(l, r,  0);
121 	R2(r, l,  1);
122 	R3(l, r,  2);
123 	R1(r, l,  3);
124 	R2(l, r,  4);
125 	R3(r, l,  5);
126 	R1(l, r,  6);
127 	R2(r, l,  7);
128 	R3(l, r,  8);
129 	R1(r, l,  9);
130 	R2(l, r, 10);
131 	R3(r, l, 11);
132 	if (k->full16) {
133 		R1(l, r, 12);
134 		R2(r, l, 13);
135 		R3(l, r, 14);
136 		R1(r, l, 15);
137 	}
138 
139 	/* now put the left and right parts back into dest */
140 	dest[0] = E0(r);
141 	dest[1] = E1(r);
142 	dest[2] = E2(r);
143 	dest[3] = E3(r);
144 	dest[4] = E0(l);
145 	dest[5] = E1(l);
146 	dest[6] = E2(l);
147 	dest[7] = E3(l);
148 
149 	/* save the final cipherblock for the next block's encryption */
150 	k->enc_oldl = G32(dest, 0);
151 	k->enc_oldr = G32(dest, 4);
152 
153 	/* and clean up our stack */
154 	I = l = r = 0;
155 }
156 
157 /*
158  * cast_decrypt:
159  *	- unconverts 8 bytes of data from src to dest using key k
160  *	- note that we only do 12 rounds if we have a long enough
161  *	  key (80 or more bits).
162  */
163 static void
cast_decrypt(castkey * k,u_char * src,u_char * dest,int first)164 cast_decrypt(castkey *k, u_char *src, u_char *dest, int first)
165 {
166 	uint32_t new_oldr, new_oldl;
167 	uint32_t I, r, l;
168 
169 	/*
170 	 * if this is the first decryption, we only want to
171 	 * setup internal state
172 	 */
173 	if (first)
174 	{
175 		k->dec_oldl = G32(src, 0);
176 		k->dec_oldr = G32(src, 4);
177 		return;
178 	}
179 	new_oldl = G32(src, 0);
180 	new_oldr = G32(src, 4);
181 
182 	/* split src into left and right parts */
183 	r = G32(src, 0);
184 	l = G32(src, 4);
185 
186 	/* do it */
187 	if (k->full16) {
188 		R1(r, l, 15);
189 		R3(l, r, 14);
190 		R2(r, l, 13);
191 		R1(l, r, 12);
192 	}
193 	R3(r, l, 11);
194 	R2(l, r, 10);
195 	R1(r, l,  9);
196 	R3(l, r,  8);
197 	R2(r, l,  7);
198 	R1(l, r,  6);
199 	R3(r, l,  5);
200 	R2(l, r,  4);
201 	R1(r, l,  3);
202 	R3(l, r,  2);
203 	R2(r, l,  1);
204 	R1(l, r,  0);
205 
206 	/* now put the left and right parts back into dest */
207 	dest[0] = E0(l) ^ E0(k->dec_oldl);
208 	dest[1] = E1(l) ^ E1(k->dec_oldl);
209 	dest[2] = E2(l) ^ E2(k->dec_oldl);
210 	dest[3] = E3(l) ^ E3(k->dec_oldl);
211 	dest[4] = E0(r) ^ E0(k->dec_oldr);
212 	dest[5] = E1(r) ^ E1(k->dec_oldr);
213  	dest[6] = E2(r) ^ E2(k->dec_oldr);
214 	dest[7] = E3(r) ^ E3(k->dec_oldr);
215 
216 	/* save the final cipherblock for the next block's encryption */
217 	k->dec_oldr = new_oldr;
218 	k->dec_oldl = new_oldl;
219 
220 	/* and clean up our stack */
221 	I = l = r = 0;
222 	new_oldr = new_oldl = 0;
223 }
224 
225 /*
226  * Clear the encryption/decryption state.
227  */
228 static void
cast_clearstate_enc(castkey * k)229 cast_clearstate_enc(castkey *k)
230 {
231 	k->enc_oldr = k->enc_oldl = 0;
232 }
233 
234 static void
cast_clearstate_dec(castkey * k)235 cast_clearstate_dec(castkey *k)
236 {
237 	k->dec_oldr = k->dec_oldl = 0;
238 }
239 
240 /*
241  * cast_setkey:
242  *	- fill in key from the raw bytes in key for length len.
243  */
244 static void
cast_setkey(crypt_key * key,size_t len)245 cast_setkey(crypt_key *key, size_t len)
246 {
247 	castkey *k;
248 	uint32_t t[4], x[4], z[4];
249 	int i;
250 
251 	memset(&t, 0, sizeof t);
252 	memset(&z, 0, sizeof z);
253 
254 	if (key->cookie)
255 	{
256 		/*yell("cast_setkey: key-cookie not null; freeing.");*/
257 		new_free(&key->cookie);
258 	}
259 	key->cookie = k = new_malloc(sizeof *k);
260 
261 	cast_clearstate_enc(k);
262 	cast_clearstate_dec(k);
263 
264 	/* convert the key so we can use it ... */
265 	for (i = 0; i < 4; i++) {
266 		x[i] = 0;
267 		if ((i * 4 + 0) < len)
268 			x[i] = (uint32_t)key->key[i * 4 + 0] << 24;
269 		if ((i * 4 + 1) < len)
270 			x[i] |= (uint32_t)key->key[i * 4 + 1] << 16;
271 		if ((i * 4 + 2) < len)
272 			x[i] |= (uint32_t)key->key[i * 4 + 2] << 8;
273 		if ((i * 4 + 3) < len)
274 			x[i] |= (uint32_t)key->key[i * 4 + 3];
275 	}
276 
277 	/* if the key length is not sufficient, only do 12 rounds */
278 	k->full16 = (len > 10 ? 1 : 0);
279 
280 	/*
281 	 * generate our 32 subkeys (4 at a time, as we can).  used an
282 	 * idea from steve reid on how to collapse this code a little
283 	 * more than the fully expanded version .. (pity i found that
284 	 * later)
285 	 */
286 	for (i = 0; i < 32; i += 4) {
287 		switch (i & 4) {
288 		case 0:
289 			t[0] = z[0] = x[0] ^ cast_S5[E1(x[3])] ^ cast_S6[E3(x[3])] ^ cast_S7[E0(x[3])] ^ cast_S8[E2(x[3])] ^ cast_S7[E0(x[2])];
290 			t[1] = z[1] = x[2] ^ cast_S5[E0(z[0])] ^ cast_S6[E2(z[0])] ^ cast_S7[E1(z[0])] ^ cast_S8[E3(z[0])] ^ cast_S8[E2(x[2])];
291 			t[2] = z[2] = x[3] ^ cast_S5[E3(z[1])] ^ cast_S6[E2(z[1])] ^ cast_S7[E1(z[1])] ^ cast_S8[E0(z[1])] ^ cast_S5[E1(x[2])];
292 			t[3] = z[3] = x[1] ^ cast_S5[E2(z[2])] ^ cast_S6[E1(z[2])] ^ cast_S7[E3(z[2])] ^ cast_S8[E0(z[2])] ^ cast_S6[E3(x[2])];
293 			break;
294 		case 4:
295 			t[0] = x[0] = z[2] ^ cast_S5[E1(z[1])] ^ cast_S6[E3(z[1])] ^ cast_S7[E0(z[1])] ^ cast_S8[E2(z[1])] ^ cast_S7[E0(z[0])];
296 			t[1] = x[1] = z[0] ^ cast_S5[E0(x[0])] ^ cast_S6[E2(x[0])] ^ cast_S7[E1(x[0])] ^ cast_S8[E3(x[0])] ^ cast_S8[E2(z[0])];
297 			t[2] = x[2] = z[1] ^ cast_S5[E3(x[1])] ^ cast_S6[E2(x[1])] ^ cast_S7[E1(x[1])] ^ cast_S8[E0(x[1])] ^ cast_S5[E1(z[0])];
298 			t[3] = x[3] = z[3] ^ cast_S5[E2(x[2])] ^ cast_S6[E1(x[2])] ^ cast_S7[E3(x[2])] ^ cast_S8[E0(x[2])] ^ cast_S6[E3(z[0])];
299 			break;
300 		}
301 		switch (i & 12) {
302 		case 0:
303 		case 12:
304 			k->rk[i + 0] = cast_S5[E0(t[2])] ^ cast_S6[E1(t[2])] ^ cast_S7[E3(t[1])] ^ cast_S8[E2(t[1])];
305 			k->rk[i + 1] = cast_S5[E2(t[2])] ^ cast_S6[E3(t[2])] ^ cast_S7[E1(t[1])] ^ cast_S8[E0(t[1])];
306 			k->rk[i + 2] = cast_S5[E0(t[3])] ^ cast_S6[E1(t[3])] ^ cast_S7[E3(t[0])] ^ cast_S8[E2(t[0])];
307 			k->rk[i + 3] = cast_S5[E2(t[3])] ^ cast_S6[E3(t[3])] ^ cast_S7[E1(t[0])] ^ cast_S8[E0(t[0])];
308 			break;
309 		case 4:
310 		case 8:
311 			k->rk[i + 0] = cast_S5[E3(t[0])] ^ cast_S6[E2(t[0])] ^ cast_S7[E0(t[3])] ^ cast_S8[E1(t[3])];
312 			k->rk[i + 1] = cast_S5[E1(t[0])] ^ cast_S6[E0(t[0])] ^ cast_S7[E2(t[3])] ^ cast_S8[E3(t[3])];
313 			k->rk[i + 2] = cast_S5[E3(t[1])] ^ cast_S6[E2(t[1])] ^ cast_S7[E0(t[2])] ^ cast_S8[E1(t[2])];
314 			k->rk[i + 3] = cast_S5[E1(t[1])] ^ cast_S6[E0(t[1])] ^ cast_S7[E2(t[2])] ^ cast_S8[E3(t[2])];
315 			break;
316 		}
317 		switch (i & 12) {
318 		case 0:
319 			k->rk[i + 0] ^= cast_S5[E2(z[0])];
320 			k->rk[i + 1] ^= cast_S6[E2(z[1])];
321 			k->rk[i + 2] ^= cast_S7[E1(z[2])];
322 			k->rk[i + 3] ^= cast_S8[E0(z[3])];
323 			break;
324 		case 4:
325 			k->rk[i + 0] ^= cast_S5[E0(x[2])];
326 			k->rk[i + 1] ^= cast_S6[E1(x[3])];
327 			k->rk[i + 2] ^= cast_S7[E3(x[0])];
328 			k->rk[i + 3] ^= cast_S8[E3(x[1])];
329 			break;
330 		case 8:
331 			k->rk[i + 0] ^= cast_S5[E1(z[2])];
332 			k->rk[i + 1] ^= cast_S6[E0(z[3])];
333 			k->rk[i + 2] ^= cast_S7[E2(z[0])];
334 			k->rk[i + 3] ^= cast_S8[E2(z[1])];
335 			break;
336 		case 12:
337 			k->rk[i + 0] ^= cast_S5[E3(x[0])];
338 			k->rk[i + 1] ^= cast_S6[E3(x[1])];
339 			k->rk[i + 2] ^= cast_S7[E0(x[2])];
340 			k->rk[i + 3] ^= cast_S8[E1(x[3])];
341 			break;
342 		}
343 		if (i >= 16) {
344 			k->rk[i + 0] &= 31;
345 			k->rk[i + 1] &= 31;
346 			k->rk[i + 2] &= 31;
347 			k->rk[i + 3] &= 31;
348 		}
349 	}
350 
351 	/* and clean up our stack */
352 	for (i = 0; i < 4; i++)
353 		t[i] = x[i] = z[i] = 0;
354 }
355 
356 /*
357  * we implement cyclic block chaining mode here, where each previous
358  * encryption block (and a random initial vector sent with each message,
359  * for the first block) is exclusived-ORed with the plaintext before
360  * being encryptioned.  this avoids many problems.
361  */
362 
363 /*
364  * and here are the functions we pass to the crypt module.
365  *
366  * XXX: we copy non-64-bit-with-trailing-nul sized data into a new
367  * string, and fill the end with garbage, expecting clients to throw
368  * away data after the nul.
369  */
370 static	int
cast_encrypt_str(crypt_key * key,u_char ** str,size_t * len)371 cast_encrypt_str(crypt_key *key, u_char **str, size_t *len)
372 {
373 	u_char	*s, *newstr;
374 	int	i;
375 	size_t	nlen, padlen;
376 
377 	/*
378 	 * pad the string to 64bit block boundary.  we use the same
379 	 * trick of DES does, and put the number of pad bytes (not
380 	 * inclusive) there are.  eg, a 47 byte string will become
381 	 * a 48 byte string with a '0' in the final byte, where as
382 	 * a 48 byte string will become a 56 byte string, with a '7'
383 	 * in the final byte, with garbage from 49 -> 55.
384 	 *
385 	 * note we allocate 8 bytes for the IV and generate it here.
386 	 */
387 	nlen = (*len + 8 + 8) & ~7UL;
388 	newstr = new_malloc(nlen + 1);
389 	memmove(newstr + 8, *str, *len);
390 	crypt_get_random_data(newstr, 8);
391 	padlen = nlen - *len - 1 - 8;
392 	crypt_get_random_data(newstr + *len + 8, padlen);
393 	newstr[nlen - 1] = padlen;
394 	newstr[nlen] = '\0';
395 
396 	/*
397 	 * fill in str for our parent.  note that we don't free the
398 	 * old str as it is the property of our caller (and in the
399 	 * only caller, it is an automatic variable).
400 	 */
401 	*str = newstr;
402 
403 	if (key->cookie == NULL)
404 		cast_setkey(key, my_strlen(key->key));
405 
406 	/* encrypt each 64bit chunk */
407 	for (i = nlen, s = (u_char *)*str; i > 0; s += 8, i -= 8)
408 		cast_encrypt(key->cookie, s, s, i == nlen);
409 
410 	cast_clearstate_enc(key->cookie);
411 
412 	/* set this so that our caller knows it has changed size */
413 	*len = nlen;
414 	(*str)[nlen] = '\0';
415 
416 	return (0);
417 }
418 
419 static	int
cast_decrypt_str(crypt_key * key,u_char ** str,size_t * len)420 cast_decrypt_str(crypt_key *key, u_char **str, size_t *len)
421 {
422 	u_char	*s;
423 	size_t	i;
424 
425 	/*
426 	 * cast messages must be 8-byte aligned, so force this by
427 	 * ignoring anything beyond a full block.
428 	 */
429 	*len &= ~7UL;
430 
431 	if (key->cookie == NULL)
432 		cast_setkey(key, my_strlen(key->key));
433 
434 	for (i = *len, s = (u_char *)*str; i > 0; s += 8, i -= 8)
435 		cast_decrypt(key->cookie, s, s, i == *len);
436 	assert(i == 0);
437 
438 	cast_clearstate_dec(key->cookie);
439 
440 	/* find the final byte */
441 	i = (*str)[*len - 1];
442 	if (i > 7)
443 		i = 7;
444 	*len = *len - 1 - 8 - i;
445 
446 	/* now remove the trash IV from the top */
447 	for (i = 0; i < *len; i++)
448 		(*str)[i] = (*str)[i+8];
449 	/* fill in our nul byte from the final byte of the data */
450 	(*str)[i] = 0;
451 
452 	return (0);
453 }
454