1 /* desCode.h
2  *
3  */
4 
5 /*	des - fast & portable DES encryption & decryption.
6  *	Copyright (C) 1992  Dana L. How
7  *	Please see the file `descore.README' for the complete copyright notice.
8  */
9 
10 #include "des.h"
11 
12 /* optional customization:
13  * the idea here is to alter the code so it will still run correctly
14  * on any machine,  but the quickest on the specific machine in mind.
15  * note that these silly tweaks can give you a 15%-20% speed improvement
16  * on the sparc -- it's probably even more significant on the 68000. */
17 
18 /* take care of machines with incredibly few registers */
19 #if	defined(i386)
20 #define	REGISTER		/* only x, y, z will be declared register */
21 #else
22 #define	REGISTER	register
23 #endif	/* i386 */
24 
25 /* is auto inc/dec faster than 7bit unsigned indexing? */
26 #if	defined(vax) || defined(mc68000)
27 #define	FIXR		r += 32;
28 #define	FIXS		s +=  8;
29 #define	PREV(v,o)	*--v
30 #define	NEXT(v,o)	*v++
31 #else
32 #define	FIXR
33 #define	FIXS
34 #define	PREV(v,o)	v[o]
35 #define	NEXT(v,o)	v[o]
36 #endif
37 
38 /* if no machine type, default is indexing, 6 registers and cheap literals */
39 #if	!defined(i386) && !defined(vax) && !defined(mc68000) && !defined(sparc)
40 #define	vax
41 #endif
42 
43 
44 /* handle a compiler which can't reallocate registers */
45 /* The BYTE type is used as parameter for the encrypt/decrypt functions.
46  * It's pretty bad to have the function prototypes depend on
47  * a macro definition that the users of the function doesn't
48  * know about. /Niels */
49 #if	0			/* didn't feel like deleting */
50 #define	SREGFREE	; s = (uint8_t *) D
51 #define	DEST		s
52 #define	D		m0
53 #define	BYTE		uint32_t
54 #else
55 #define	SREGFREE
56 #define	DEST		d
57 #define	D		d
58 #define	BYTE		uint8_t
59 #endif
60 
61 /* handle constants in the optimal way for 386 & vax */
62 /* 386: we declare 3 register variables (see above) and use 3 more variables;
63  * vax: we use 6 variables, all declared register;
64  * we assume address literals are cheap & unrestricted;
65  * we assume immediate constants are cheap & unrestricted. */
66 #if	defined(i386) || defined(vax)
67 #define	MQ0	 des_bigmap
68 #define	MQ1	(des_bigmap +  64)
69 #define	MQ2	(des_bigmap + 128)
70 #define	MQ3	(des_bigmap + 192)
71 #define	HQ0(z)				/*	z |= 0x01000000L; */
72 #define	HQ2(z)				/*	z |= 0x03000200L; */
73 #define	LQ0(z)	0xFCFC & z
74 #define	LQ1(z)	0xFCFC & z
75 #define	LQ2(z)	0xFCFC & z
76 #define	LQ3(z)	0xFCFC & z
77 #define	SQ	16
78 #define	MS0	 des_keymap
79 #define	MS1	(des_keymap +  64)
80 #define	MS2	(des_keymap + 128)
81 #define	MS3	(des_keymap + 192)
82 #define	MS4	(des_keymap + 256)
83 #define	MS5	(des_keymap + 320)
84 #define	MS6	(des_keymap + 384)
85 #define	MS7	(des_keymap + 448)
86 #define	HS(z)
87 #define	LS0(z)	0xFC & z
88 #define	LS1(z)	0xFC & z
89 #define	LS2(z)	0xFC & z
90 #define	LS3(z)	0xFC & z
91 #define	REGQUICK
92 #define	SETQUICK
93 #define	REGSMALL
94 #define	SETSMALL
95 #endif	/* defined(i386) || defined(vax) */
96 
97 /* handle constants in the optimal way for mc68000 */
98 /* in addition to the core 6 variables, we declare 3 registers holding constants
99  * and 4 registers holding address literals.
100  * at most 6 data values and 5 address values are actively used at once.
101  * we assume address literals are so expensive we never use them;
102  * we assume constant index offsets > 127 are expensive, so they are not used.
103  * we assume all constants are expensive and put them in registers,
104  * including shift counts greater than 8. */
105 #if	defined(mc68000)
106 #define	MQ0	m0
107 #define	MQ1	m1
108 #define	MQ2	m2
109 #define	MQ3	m3
110 #define	HQ0(z)
111 #define	HQ2(z)
112 #define	LQ0(z)	k0 & z
113 #define	LQ1(z)	k0 & z
114 #define	LQ2(z)	k0 & z
115 #define	LQ3(z)	k0 & z
116 #define	SQ	k1
117 #define	MS0	m0
118 #define	MS1	m0
119 #define	MS2	m1
120 #define	MS3	m1
121 #define	MS4	m2
122 #define	MS5	m2
123 #define	MS6	m3
124 #define	MS7	m3
125 #define	HS(z)	z |= k0;
126 #define	LS0(z)	k1 & z
127 #define	LS1(z)	k2 & z
128 #define	LS2(z)	k1 & z
129 #define	LS3(z)	k2 & z
130 #define	REGQUICK				\
131 	register uint32_t k0, k1;		\
132 	register uint32_t *m0, *m1, *m2, *m3;
133 #define	SETQUICK				\
134 	; k0 = 0xFCFC				\
135 	; k1 = 16				\
136 	/*k2 = 28 to speed up ROL */		\
137 	; m0 = des_bigmap			\
138 	; m1 = m0 + 64				\
139 	; m2 = m1 + 64				\
140 	; m3 = m2 + 64
141 #define	REGSMALL				\
142 	register uint32_t k0, k1, k2;		\
143 	register uint32_t *m0, *m1, *m2, *m3;
144 #define	SETSMALL				\
145 	; k0 = 0x01000100L			\
146 	; k1 = 0x0FC				\
147 	; k2 = 0x1FC				\
148 	; m0 = des_keymap			\
149 	; m1 = m0 + 128				\
150 	; m2 = m1 + 128				\
151 	; m3 = m2 + 128
152 #endif	/* defined(mc68000) */
153 
154 /* handle constants in the optimal way for sparc */
155 /* in addition to the core 6 variables, we either declare:
156  * 4 registers holding address literals and 1 register holding a constant, or
157  * 8 registers holding address literals.
158  * up to 14 register variables are declared (sparc has %i0-%i5, %l0-%l7).
159  * we assume address literals are so expensive we never use them;
160  * we assume any constant with >10 bits is expensive and put it in a register,
161  * and any other is cheap and is coded in-line. */
162 #if	defined(sparc)
163 #define	MQ0	m0
164 #define	MQ1	m1
165 #define	MQ2	m2
166 #define	MQ3	m3
167 #define	HQ0(z)
168 #define	HQ2(z)
169 #define	LQ0(z)	k0 & z
170 #define	LQ1(z)	k0 & z
171 #define	LQ2(z)	k0 & z
172 #define	LQ3(z)	k0 & z
173 #define	SQ	16
174 #define	MS0	m0
175 #define	MS1	m1
176 #define	MS2	m2
177 #define	MS3	m3
178 #define	MS4	m4
179 #define	MS5	m5
180 #define	MS6	m6
181 #define	MS7	m7
182 #define	HS(z)
183 #define	LS0(z)	0xFC & z
184 #define	LS1(z)	0xFC & z
185 #define	LS2(z)	0xFC & z
186 #define	LS3(z)	0xFC & z
187 #define	REGQUICK				\
188 	register uint32_t k0;			\
189 	register uint32_t *m0, *m1, *m2, *m3;
190 #define	SETQUICK				\
191 	; k0 = 0xFCFC				\
192 	; m0 = des_bigmap			\
193 	; m1 = m0 + 64				\
194 	; m2 = m1 + 64				\
195 	; m3 = m2 + 64
196 #define	REGSMALL				\
197 	register uint32_t *m0, *m1, *m2, *m3, *m4, *m5, *m6, *m7;
198 #define	SETSMALL				\
199 	; m0 = des_keymap			\
200 	; m1 = m0 + 64				\
201 	; m2 = m1 + 64				\
202 	; m3 = m2 + 64				\
203 	; m4 = m3 + 64				\
204 	; m5 = m4 + 64				\
205 	; m6 = m5 + 64				\
206 	; m7 = m6 + 64
207 #endif	/* defined(sparc) */
208 
209 
210 /* some basic stuff */
211 
212 /* generate addresses from a base and an index */
213 /* FIXME: This is used only as *ADD(msi,lsi(z)) or *ADD(mqi,lqi(z)).
214  * Why not use plain indexing instead? /Niels */
215 #define	ADD(b,x)	(uint32_t *) ((uint8_t *)b + (x))
216 
217 /* low level rotate operations */
218 #define	NOP(d,c,o)
219 #define	ROL(d,c,o)	d = d << c | d >> o
220 #define	ROR(d,c,o)	d = d >> c | d << o
221 #define	ROL1(d)		ROL(d, 1, 31)
222 #define	ROR1(d)		ROR(d, 1, 31)
223 
224 /* elementary swap for doing IP/FP */
225 #define	SWAP(x,y,m,b)				\
226 	z  = ((x >> b) ^ y) & m;		\
227 	x ^= z << b;				\
228 	y ^= z
229 
230 
231 /* the following macros contain all the important code fragments */
232 
233 /* load input data, then setup special registers holding constants */
234 #define	TEMPQUICK(LOAD)				\
235 	REGQUICK				\
236 	LOAD()					\
237 	SETQUICK
238 #define	TEMPSMALL(LOAD)				\
239 	REGSMALL				\
240 	LOAD()					\
241 	SETSMALL
242 
243 /* load data */
244 #define	LOADDATA(x,y)				\
245 	FIXS					\
246 	y  = PREV(s, 7); y<<= 8;		\
247 	y |= PREV(s, 6); y<<= 8;		\
248 	y |= PREV(s, 5); y<<= 8;		\
249 	y |= PREV(s, 4);			\
250 	x  = PREV(s, 3); x<<= 8;		\
251 	x |= PREV(s, 2); x<<= 8;		\
252 	x |= PREV(s, 1); x<<= 8;		\
253 	x |= PREV(s, 0)				\
254 	SREGFREE
255 /* load data without initial permutation and put into efficient position */
256 #define	LOADCORE()				\
257 	LOADDATA(x, y);				\
258 	ROR1(x);				\
259 	ROR1(y)
260 /* load data, do the initial permutation and put into efficient position */
261 #define	LOADFIPS()				\
262 	LOADDATA(y, x);				\
263 	SWAP(x, y, 0x0F0F0F0FL, 004);		\
264 	SWAP(y, x, 0x0000FFFFL, 020);		\
265 	SWAP(x, y, 0x33333333L, 002);		\
266 	SWAP(y, x, 0x00FF00FFL, 010);		\
267 	ROR1(x);				\
268 	z  = (x ^ y) & 0x55555555L;		\
269 	y ^= z;					\
270 	x ^= z;					\
271 	ROR1(y)
272 
273 
274 /* core encryption/decryption operations */
275 /* S box mapping and P perm */
276 #define	KEYMAPSMALL(x,z,mq0,mq1,hq,lq0,lq1,sq,ms0,ms1,ms2,ms3,hs,ls0,ls1,ls2,ls3)\
277 	hs(z)					\
278 	x ^= *ADD(ms3, ls3(z));			\
279 	z>>= 8;					\
280 	x ^= *ADD(ms2, ls2(z));			\
281 	z>>= 8;					\
282 	x ^= *ADD(ms1, ls1(z));			\
283 	z>>= 8;					\
284 	x ^= *ADD(ms0, ls0(z))
285 /* alternate version: use 64k of tables */
286 #define	KEYMAPQUICK(x,z,mq0,mq1,hq,lq0,lq1,sq,ms0,ms1,ms2,ms3,hs,ls0,ls1,ls2,ls3)\
287 	hq(z)					\
288 	x ^= *ADD(mq0, lq0(z));			\
289 	z>>= sq;				\
290 	x ^= *ADD(mq1, lq1(z))
291 /* apply 24 key bits and do the odd  s boxes */
292 #define	S7S1(x,y,z,r,m,KEYMAP,LOAD)		\
293 	z  = LOAD(r, m);			\
294 	z ^= y;					\
295 	KEYMAP(x,z,MQ0,MQ1,HQ0,LQ0,LQ1,SQ,MS0,MS1,MS2,MS3,HS,LS0,LS1,LS2,LS3)
296 /* apply 24 key bits and do the even s boxes */
297 #define	S6S0(x,y,z,r,m,KEYMAP,LOAD)		\
298 	z  = LOAD(r, m);			\
299 	z ^= y;					\
300 	ROL(z, 4, 28);				\
301 	KEYMAP(x,z,MQ2,MQ3,HQ2,LQ2,LQ3,SQ,MS4,MS5,MS6,MS7,HS,LS0,LS1,LS2,LS3)
302 /* actual iterations.  equivalent except for UPDATE & swapping m and n */
303 #define	ENCR(x,y,z,r,m,n,KEYMAP)		\
304 	S7S1(x,y,z,r,m,KEYMAP,NEXT);		\
305 	S6S0(x,y,z,r,n,KEYMAP,NEXT)
306 #define	DECR(x,y,z,r,m,n,KEYMAP)		\
307 	S6S0(x,y,z,r,m,KEYMAP,PREV);		\
308 	S7S1(x,y,z,r,n,KEYMAP,PREV)
309 
310 /* write out result in correct byte order */
311 #define	SAVEDATA(x,y)				\
312 	NEXT(DEST, 0) = x; x>>= 8;		\
313 	NEXT(DEST, 1) = x; x>>= 8;		\
314 	NEXT(DEST, 2) = x; x>>= 8;		\
315 	NEXT(DEST, 3) = x;			\
316 	NEXT(DEST, 4) = y; y>>= 8;		\
317 	NEXT(DEST, 5) = y; y>>= 8;		\
318 	NEXT(DEST, 6) = y; y>>= 8;		\
319 	NEXT(DEST, 7) = y
320 /* write out result */
321 #define	SAVECORE()				\
322 	ROL1(x);				\
323 	ROL1(y);				\
324 	SAVEDATA(y, x)
325 /* do final permutation and write out result */
326 #define	SAVEFIPS()				\
327 	ROL1(x);				\
328 	z  = (x ^ y) & 0x55555555L;		\
329 	y ^= z;					\
330 	x ^= z;					\
331 	ROL1(y);				\
332 	SWAP(x, y, 0x00FF00FFL, 010);		\
333 	SWAP(y, x, 0x33333333L, 002);		\
334 	SWAP(x, y, 0x0000FFFFL, 020);		\
335 	SWAP(y, x, 0x0F0F0F0FL, 004);		\
336 	SAVEDATA(x, y)
337 
338 
339 /* the following macros contain the encryption/decryption skeletons */
340 
341 #define	ENCRYPT(NAME, TEMP, LOAD, KEYMAP, SAVE)	\
342 						\
343 void						\
344 NAME(REGISTER BYTE *D,				\
345      REGISTER const uint32_t *r,		\
346      REGISTER const uint8_t *s)			\
347 {						\
348 	register uint32_t x, y, z;		\
349 						\
350 	/* declare temps & load data */		\
351 	TEMP(LOAD);				\
352 						\
353 	/* do the 16 iterations */		\
354 	ENCR(x,y,z,r, 0, 1,KEYMAP);		\
355 	ENCR(y,x,z,r, 2, 3,KEYMAP);		\
356 	ENCR(x,y,z,r, 4, 5,KEYMAP);		\
357 	ENCR(y,x,z,r, 6, 7,KEYMAP);		\
358 	ENCR(x,y,z,r, 8, 9,KEYMAP);		\
359 	ENCR(y,x,z,r,10,11,KEYMAP);		\
360 	ENCR(x,y,z,r,12,13,KEYMAP);		\
361 	ENCR(y,x,z,r,14,15,KEYMAP);		\
362 	ENCR(x,y,z,r,16,17,KEYMAP);		\
363 	ENCR(y,x,z,r,18,19,KEYMAP);		\
364 	ENCR(x,y,z,r,20,21,KEYMAP);		\
365 	ENCR(y,x,z,r,22,23,KEYMAP);		\
366 	ENCR(x,y,z,r,24,25,KEYMAP);		\
367 	ENCR(y,x,z,r,26,27,KEYMAP);		\
368 	ENCR(x,y,z,r,28,29,KEYMAP);		\
369 	ENCR(y,x,z,r,30,31,KEYMAP);		\
370 						\
371 	/* save result */			\
372 	SAVE();					\
373 						\
374 	return;					\
375 }
376 
377 #define	DECRYPT(NAME, TEMP, LOAD, KEYMAP, SAVE)	\
378 						\
379 void						\
380 NAME(REGISTER BYTE *D,				\
381      REGISTER const uint32_t *r,		\
382      REGISTER const uint8_t *s)			\
383 {						\
384 	register uint32_t x, y, z;		\
385 						\
386 	/* declare temps & load data */		\
387 	TEMP(LOAD);				\
388 						\
389 	/* do the 16 iterations */		\
390 	FIXR					\
391 	DECR(x,y,z,r,31,30,KEYMAP);		\
392 	DECR(y,x,z,r,29,28,KEYMAP);		\
393 	DECR(x,y,z,r,27,26,KEYMAP);		\
394 	DECR(y,x,z,r,25,24,KEYMAP);		\
395 	DECR(x,y,z,r,23,22,KEYMAP);		\
396 	DECR(y,x,z,r,21,20,KEYMAP);		\
397 	DECR(x,y,z,r,19,18,KEYMAP);		\
398 	DECR(y,x,z,r,17,16,KEYMAP);		\
399 	DECR(x,y,z,r,15,14,KEYMAP);		\
400 	DECR(y,x,z,r,13,12,KEYMAP);		\
401 	DECR(x,y,z,r,11,10,KEYMAP);		\
402 	DECR(y,x,z,r, 9, 8,KEYMAP);		\
403 	DECR(x,y,z,r, 7, 6,KEYMAP);		\
404 	DECR(y,x,z,r, 5, 4,KEYMAP);		\
405 	DECR(x,y,z,r, 3, 2,KEYMAP);		\
406 	DECR(y,x,z,r, 1, 0,KEYMAP);		\
407 						\
408 	/* save result */			\
409 	SAVE();					\
410 						\
411 	return;					\
412 }
413