xref: /netbsd/sys/crypto/cast128/cast128.c (revision 6550d01e)
1 /*	$NetBSD: cast128.c,v 1.9 2006/05/10 21:53:15 mrg Exp $	*/
2 /*      $OpenBSD: cast.c,v 1.2 2000/06/06 06:49:47 deraadt Exp $       */
3 
4 /*
5  *	CAST-128 in C
6  *	Written by Steve Reid <sreid@sea-to-sky.net>
7  *	100% Public Domain - no warranty
8  *	Released 1997.10.11
9  */
10 
11 #include <sys/cdefs.h>
12 __KERNEL_RCSID(0, "$NetBSD: cast128.c,v 1.9 2006/05/10 21:53:15 mrg Exp $");
13 
14 #include <sys/types.h>
15 #include <crypto/cast128/cast128.h>
16 #include <crypto/cast128/cast128sb.h>
17 
18 /* Macros to access 8-bit bytes out of a 32-bit word */
19 #define U_INT8_Ta(x) ( (u_int8_t) (x>>24) )
20 #define U_INT8_Tb(x) ( (u_int8_t) ((x>>16)&255) )
21 #define U_INT8_Tc(x) ( (u_int8_t) ((x>>8)&255) )
22 #define U_INT8_Td(x) ( (u_int8_t) ((x)&255) )
23 
24 /* Circular left shift */
25 #define ROL(x, n) ( ((x)<<(n)) | ((x)>>(32-(n))) )
26 
27 /* CAST-128 uses three different round functions */
28 #define F1(l, r, i) \
29 	t = ROL(key->xkey[i] + r, key->xkey[i+16]); \
30 	l ^= ((cast_sbox1[U_INT8_Ta(t)] ^ cast_sbox2[U_INT8_Tb(t)]) - \
31 	 cast_sbox3[U_INT8_Tc(t)]) + cast_sbox4[U_INT8_Td(t)];
32 #define F2(l, r, i) \
33 	t = ROL(key->xkey[i] ^ r, key->xkey[i+16]); \
34 	l ^= ((cast_sbox1[U_INT8_Ta(t)] - cast_sbox2[U_INT8_Tb(t)]) + \
35 	 cast_sbox3[U_INT8_Tc(t)]) ^ cast_sbox4[U_INT8_Td(t)];
36 #define F3(l, r, i) \
37 	t = ROL(key->xkey[i] - r, key->xkey[i+16]); \
38 	l ^= ((cast_sbox1[U_INT8_Ta(t)] + cast_sbox2[U_INT8_Tb(t)]) ^ \
39 	 cast_sbox3[U_INT8_Tc(t)]) - cast_sbox4[U_INT8_Td(t)];
40 
41 
42 /***** Encryption Function *****/
43 
44 void cast128_encrypt(const cast128_key* key, const u_int8_t* inblock,
45     u_int8_t* outblock)
46 {
47 u_int32_t t, l, r;
48 
49 	/* Get inblock into l,r */
50 	l = ((u_int32_t)inblock[0] << 24) | ((u_int32_t)inblock[1] << 16) |
51 	 ((u_int32_t)inblock[2] << 8) | (u_int32_t)inblock[3];
52 	r = ((u_int32_t)inblock[4] << 24) | ((u_int32_t)inblock[5] << 16) |
53 	 ((u_int32_t)inblock[6] << 8) | (u_int32_t)inblock[7];
54 	/* Do the work */
55 	F1(l, r,  0);
56 	F2(r, l,  1);
57 	F3(l, r,  2);
58 	F1(r, l,  3);
59 	F2(l, r,  4);
60 	F3(r, l,  5);
61 	F1(l, r,  6);
62 	F2(r, l,  7);
63 	F3(l, r,  8);
64 	F1(r, l,  9);
65 	F2(l, r, 10);
66 	F3(r, l, 11);
67 	/* Only do full 16 rounds if key length > 80 bits */
68 	if (key->rounds > 12) {
69 		F1(l, r, 12);
70 		F2(r, l, 13);
71 		F3(l, r, 14);
72 		F1(r, l, 15);
73 	}
74 	/* Put l,r into outblock */
75 	outblock[0] = U_INT8_Ta(r);
76 	outblock[1] = U_INT8_Tb(r);
77 	outblock[2] = U_INT8_Tc(r);
78 	outblock[3] = U_INT8_Td(r);
79 	outblock[4] = U_INT8_Ta(l);
80 	outblock[5] = U_INT8_Tb(l);
81 	outblock[6] = U_INT8_Tc(l);
82 	outblock[7] = U_INT8_Td(l);
83 	/* Wipe clean */
84 	t = l = r = 0;
85 }
86 
87 
88 /***** Decryption Function *****/
89 
90 void cast128_decrypt(const cast128_key* key, const u_int8_t* inblock,
91     u_int8_t* outblock)
92 {
93 u_int32_t t, l, r;
94 
95 	/* Get inblock into l,r */
96 	r = ((u_int32_t)inblock[0] << 24) | ((u_int32_t)inblock[1] << 16) |
97 	 ((u_int32_t)inblock[2] << 8) | (u_int32_t)inblock[3];
98 	l = ((u_int32_t)inblock[4] << 24) | ((u_int32_t)inblock[5] << 16) |
99 	 ((u_int32_t)inblock[6] << 8) | (u_int32_t)inblock[7];
100 	/* Do the work */
101 	/* Only do full 16 rounds if key length > 80 bits */
102 	if (key->rounds > 12) {
103 		F1(r, l, 15);
104 		F3(l, r, 14);
105 		F2(r, l, 13);
106 		F1(l, r, 12);
107 	}
108 	F3(r, l, 11);
109 	F2(l, r, 10);
110 	F1(r, l,  9);
111 	F3(l, r,  8);
112 	F2(r, l,  7);
113 	F1(l, r,  6);
114 	F3(r, l,  5);
115 	F2(l, r,  4);
116 	F1(r, l,  3);
117 	F3(l, r,  2);
118 	F2(r, l,  1);
119 	F1(l, r,  0);
120 	/* Put l,r into outblock */
121 	outblock[0] = U_INT8_Ta(l);
122 	outblock[1] = U_INT8_Tb(l);
123 	outblock[2] = U_INT8_Tc(l);
124 	outblock[3] = U_INT8_Td(l);
125 	outblock[4] = U_INT8_Ta(r);
126 	outblock[5] = U_INT8_Tb(r);
127 	outblock[6] = U_INT8_Tc(r);
128 	outblock[7] = U_INT8_Td(r);
129 	/* Wipe clean */
130 	t = l = r = 0;
131 }
132 
133 
134 /***** Key Schedual *****/
135 
136 void cast128_setkey(cast128_key* key, const u_int8_t* rawkey, int keybytes)
137 {
138 	u_int32_t t[4], z[4], x[4];
139 	int i;
140 
141 	/* Set number of rounds to 12 or 16, depending on key length */
142 	key->rounds = (keybytes <= 10 ? 12 : 16);
143 
144 	/* Copy key to workspace x */
145 	for (i = 0; i < 4; i++) {
146 		x[i] = 0;
147 		t[i] = z[i] = 0;	/* XXX gcc */
148 		if ((i*4+0) < keybytes) x[i] = (u_int32_t)rawkey[i*4+0] << 24;
149 		if ((i*4+1) < keybytes) x[i] |= (u_int32_t)rawkey[i*4+1] << 16;
150 		if ((i*4+2) < keybytes) x[i] |= (u_int32_t)rawkey[i*4+2] << 8;
151 		if ((i*4+3) < keybytes) x[i] |= (u_int32_t)rawkey[i*4+3];
152 	}
153 	/* Generate 32 subkeys, four at a time */
154 	for (i = 0; i < 32; i+=4) {
155 		switch (i & 4) {
156 		 case 0:
157 			t[0] = z[0] = x[0] ^ cast_sbox5[U_INT8_Tb(x[3])] ^
158 			 cast_sbox6[U_INT8_Td(x[3])] ^ cast_sbox7[U_INT8_Ta(x[3])] ^
159 			 cast_sbox8[U_INT8_Tc(x[3])] ^ cast_sbox7[U_INT8_Ta(x[2])];
160 			t[1] = z[1] = x[2] ^ cast_sbox5[U_INT8_Ta(z[0])] ^
161 			 cast_sbox6[U_INT8_Tc(z[0])] ^ cast_sbox7[U_INT8_Tb(z[0])] ^
162 			 cast_sbox8[U_INT8_Td(z[0])] ^ cast_sbox8[U_INT8_Tc(x[2])];
163 			t[2] = z[2] = x[3] ^ cast_sbox5[U_INT8_Td(z[1])] ^
164 			 cast_sbox6[U_INT8_Tc(z[1])] ^ cast_sbox7[U_INT8_Tb(z[1])] ^
165 			 cast_sbox8[U_INT8_Ta(z[1])] ^ cast_sbox5[U_INT8_Tb(x[2])];
166 			t[3] = z[3] = x[1] ^ cast_sbox5[U_INT8_Tc(z[2])] ^
167 			 cast_sbox6[U_INT8_Tb(z[2])] ^ cast_sbox7[U_INT8_Td(z[2])] ^
168 			 cast_sbox8[U_INT8_Ta(z[2])] ^ cast_sbox6[U_INT8_Td(x[2])];
169 			break;
170 		 case 4:
171 			t[0] = x[0] = z[2] ^ cast_sbox5[U_INT8_Tb(z[1])] ^
172 			 cast_sbox6[U_INT8_Td(z[1])] ^ cast_sbox7[U_INT8_Ta(z[1])] ^
173 			 cast_sbox8[U_INT8_Tc(z[1])] ^ cast_sbox7[U_INT8_Ta(z[0])];
174 			t[1] = x[1] = z[0] ^ cast_sbox5[U_INT8_Ta(x[0])] ^
175 			 cast_sbox6[U_INT8_Tc(x[0])] ^ cast_sbox7[U_INT8_Tb(x[0])] ^
176 			 cast_sbox8[U_INT8_Td(x[0])] ^ cast_sbox8[U_INT8_Tc(z[0])];
177 			t[2] = x[2] = z[1] ^ cast_sbox5[U_INT8_Td(x[1])] ^
178 			 cast_sbox6[U_INT8_Tc(x[1])] ^ cast_sbox7[U_INT8_Tb(x[1])] ^
179 			 cast_sbox8[U_INT8_Ta(x[1])] ^ cast_sbox5[U_INT8_Tb(z[0])];
180 			t[3] = x[3] = z[3] ^ cast_sbox5[U_INT8_Tc(x[2])] ^
181 			 cast_sbox6[U_INT8_Tb(x[2])] ^ cast_sbox7[U_INT8_Td(x[2])] ^
182 			 cast_sbox8[U_INT8_Ta(x[2])] ^ cast_sbox6[U_INT8_Td(z[0])];
183 			break;
184 		}
185 		switch (i & 12) {
186 		 case 0:
187 		 case 12:
188 			key->xkey[i+0] = cast_sbox5[U_INT8_Ta(t[2])] ^ cast_sbox6[U_INT8_Tb(t[2])] ^
189 			 cast_sbox7[U_INT8_Td(t[1])] ^ cast_sbox8[U_INT8_Tc(t[1])];
190 			key->xkey[i+1] = cast_sbox5[U_INT8_Tc(t[2])] ^ cast_sbox6[U_INT8_Td(t[2])] ^
191 			 cast_sbox7[U_INT8_Tb(t[1])] ^ cast_sbox8[U_INT8_Ta(t[1])];
192 			key->xkey[i+2] = cast_sbox5[U_INT8_Ta(t[3])] ^ cast_sbox6[U_INT8_Tb(t[3])] ^
193 			 cast_sbox7[U_INT8_Td(t[0])] ^ cast_sbox8[U_INT8_Tc(t[0])];
194 			key->xkey[i+3] = cast_sbox5[U_INT8_Tc(t[3])] ^ cast_sbox6[U_INT8_Td(t[3])] ^
195 			 cast_sbox7[U_INT8_Tb(t[0])] ^ cast_sbox8[U_INT8_Ta(t[0])];
196 			break;
197 		 case 4:
198 		 case 8:
199 			key->xkey[i+0] = cast_sbox5[U_INT8_Td(t[0])] ^ cast_sbox6[U_INT8_Tc(t[0])] ^
200 			 cast_sbox7[U_INT8_Ta(t[3])] ^ cast_sbox8[U_INT8_Tb(t[3])];
201 			key->xkey[i+1] = cast_sbox5[U_INT8_Tb(t[0])] ^ cast_sbox6[U_INT8_Ta(t[0])] ^
202 			 cast_sbox7[U_INT8_Tc(t[3])] ^ cast_sbox8[U_INT8_Td(t[3])];
203 			key->xkey[i+2] = cast_sbox5[U_INT8_Td(t[1])] ^ cast_sbox6[U_INT8_Tc(t[1])] ^
204 			 cast_sbox7[U_INT8_Ta(t[2])] ^ cast_sbox8[U_INT8_Tb(t[2])];
205 			key->xkey[i+3] = cast_sbox5[U_INT8_Tb(t[1])] ^ cast_sbox6[U_INT8_Ta(t[1])] ^
206 			 cast_sbox7[U_INT8_Tc(t[2])] ^ cast_sbox8[U_INT8_Td(t[2])];
207 			break;
208 		}
209 		switch (i & 12) {
210 		 case 0:
211 			key->xkey[i+0] ^= cast_sbox5[U_INT8_Tc(z[0])];
212 			key->xkey[i+1] ^= cast_sbox6[U_INT8_Tc(z[1])];
213 			key->xkey[i+2] ^= cast_sbox7[U_INT8_Tb(z[2])];
214 			key->xkey[i+3] ^= cast_sbox8[U_INT8_Ta(z[3])];
215 			break;
216 		 case 4:
217 			key->xkey[i+0] ^= cast_sbox5[U_INT8_Ta(x[2])];
218 			key->xkey[i+1] ^= cast_sbox6[U_INT8_Tb(x[3])];
219 			key->xkey[i+2] ^= cast_sbox7[U_INT8_Td(x[0])];
220 			key->xkey[i+3] ^= cast_sbox8[U_INT8_Td(x[1])];
221 			break;
222 		 case 8:
223 			key->xkey[i+0] ^= cast_sbox5[U_INT8_Tb(z[2])];
224 			key->xkey[i+1] ^= cast_sbox6[U_INT8_Ta(z[3])];
225 			key->xkey[i+2] ^= cast_sbox7[U_INT8_Tc(z[0])];
226 			key->xkey[i+3] ^= cast_sbox8[U_INT8_Tc(z[1])];
227 			break;
228 		 case 12:
229 			key->xkey[i+0] ^= cast_sbox5[U_INT8_Td(x[0])];
230 			key->xkey[i+1] ^= cast_sbox6[U_INT8_Td(x[1])];
231 			key->xkey[i+2] ^= cast_sbox7[U_INT8_Ta(x[2])];
232 			key->xkey[i+3] ^= cast_sbox8[U_INT8_Tb(x[3])];
233 			break;
234 		}
235 		if (i >= 16) {
236 			key->xkey[i+0] &= 31;
237 			key->xkey[i+1] &= 31;
238 			key->xkey[i+2] &= 31;
239 			key->xkey[i+3] &= 31;
240 		}
241 	}
242 	/* Wipe clean */
243 	for (i = 0; i < 4; i++) {
244 		t[i] = x[i] = z[i] = 0;
245 	}
246 }
247 
248 /* Made in Canada */
249