1 /* desCode.h 2 * 3 */ 4 5 /* des - fast & portable DES encryption & decryption. 6 * Copyright (C) 1992 Dana L. How 7 * Please see the file `descore.README' for the complete copyright notice. 8 */ 9 10 #include "des.h" 11 12 /* optional customization: 13 * the idea here is to alter the code so it will still run correctly 14 * on any machine, but the quickest on the specific machine in mind. 15 * note that these silly tweaks can give you a 15%-20% speed improvement 16 * on the sparc -- it's probably even more significant on the 68000. */ 17 18 /* take care of machines with incredibly few registers */ 19 #if defined(i386) 20 #define REGISTER /* only x, y, z will be declared register */ 21 #else 22 #define REGISTER register 23 #endif /* i386 */ 24 25 /* is auto inc/dec faster than 7bit unsigned indexing? */ 26 #if defined(vax) || defined(mc68000) 27 #define FIXR r += 32; 28 #define FIXS s += 8; 29 #define PREV(v,o) *--v 30 #define NEXT(v,o) *v++ 31 #else 32 #define FIXR 33 #define FIXS 34 #define PREV(v,o) v[o] 35 #define NEXT(v,o) v[o] 36 #endif 37 38 /* if no machine type, default is indexing, 6 registers and cheap literals */ 39 #if !defined(i386) && !defined(vax) && !defined(mc68000) && !defined(sparc) 40 #define vax 41 #endif 42 43 44 /* handle a compiler which can't reallocate registers */ 45 /* The BYTE type is used as parameter for the encrypt/decrypt functions. 46 * It's pretty bad to have the function prototypes depend on 47 * a macro definition that the users of the function doesn't 48 * know about. /Niels */ 49 #if 0 /* didn't feel like deleting */ 50 #define SREGFREE ; s = (uint8_t *) D 51 #define DEST s 52 #define D m0 53 #define BYTE uint32_t 54 #else 55 #define SREGFREE 56 #define DEST d 57 #define D d 58 #define BYTE uint8_t 59 #endif 60 61 /* handle constants in the optimal way for 386 & vax */ 62 /* 386: we declare 3 register variables (see above) and use 3 more variables; 63 * vax: we use 6 variables, all declared register; 64 * we assume address literals are cheap & unrestricted; 65 * we assume immediate constants are cheap & unrestricted. */ 66 #if defined(i386) || defined(vax) 67 #define MQ0 des_bigmap 68 #define MQ1 (des_bigmap + 64) 69 #define MQ2 (des_bigmap + 128) 70 #define MQ3 (des_bigmap + 192) 71 #define HQ0(z) /* z |= 0x01000000L; */ 72 #define HQ2(z) /* z |= 0x03000200L; */ 73 #define LQ0(z) 0xFCFC & z 74 #define LQ1(z) 0xFCFC & z 75 #define LQ2(z) 0xFCFC & z 76 #define LQ3(z) 0xFCFC & z 77 #define SQ 16 78 #define MS0 des_keymap 79 #define MS1 (des_keymap + 64) 80 #define MS2 (des_keymap + 128) 81 #define MS3 (des_keymap + 192) 82 #define MS4 (des_keymap + 256) 83 #define MS5 (des_keymap + 320) 84 #define MS6 (des_keymap + 384) 85 #define MS7 (des_keymap + 448) 86 #define HS(z) 87 #define LS0(z) 0xFC & z 88 #define LS1(z) 0xFC & z 89 #define LS2(z) 0xFC & z 90 #define LS3(z) 0xFC & z 91 #define REGQUICK 92 #define SETQUICK 93 #define REGSMALL 94 #define SETSMALL 95 #endif /* defined(i386) || defined(vax) */ 96 97 /* handle constants in the optimal way for mc68000 */ 98 /* in addition to the core 6 variables, we declare 3 registers holding constants 99 * and 4 registers holding address literals. 100 * at most 6 data values and 5 address values are actively used at once. 101 * we assume address literals are so expensive we never use them; 102 * we assume constant index offsets > 127 are expensive, so they are not used. 103 * we assume all constants are expensive and put them in registers, 104 * including shift counts greater than 8. */ 105 #if defined(mc68000) 106 #define MQ0 m0 107 #define MQ1 m1 108 #define MQ2 m2 109 #define MQ3 m3 110 #define HQ0(z) 111 #define HQ2(z) 112 #define LQ0(z) k0 & z 113 #define LQ1(z) k0 & z 114 #define LQ2(z) k0 & z 115 #define LQ3(z) k0 & z 116 #define SQ k1 117 #define MS0 m0 118 #define MS1 m0 119 #define MS2 m1 120 #define MS3 m1 121 #define MS4 m2 122 #define MS5 m2 123 #define MS6 m3 124 #define MS7 m3 125 #define HS(z) z |= k0; 126 #define LS0(z) k1 & z 127 #define LS1(z) k2 & z 128 #define LS2(z) k1 & z 129 #define LS3(z) k2 & z 130 #define REGQUICK \ 131 register uint32_t k0, k1; \ 132 register uint32_t *m0, *m1, *m2, *m3; 133 #define SETQUICK \ 134 ; k0 = 0xFCFC \ 135 ; k1 = 16 \ 136 /*k2 = 28 to speed up ROL */ \ 137 ; m0 = des_bigmap \ 138 ; m1 = m0 + 64 \ 139 ; m2 = m1 + 64 \ 140 ; m3 = m2 + 64 141 #define REGSMALL \ 142 register uint32_t k0, k1, k2; \ 143 register uint32_t *m0, *m1, *m2, *m3; 144 #define SETSMALL \ 145 ; k0 = 0x01000100L \ 146 ; k1 = 0x0FC \ 147 ; k2 = 0x1FC \ 148 ; m0 = des_keymap \ 149 ; m1 = m0 + 128 \ 150 ; m2 = m1 + 128 \ 151 ; m3 = m2 + 128 152 #endif /* defined(mc68000) */ 153 154 /* handle constants in the optimal way for sparc */ 155 /* in addition to the core 6 variables, we either declare: 156 * 4 registers holding address literals and 1 register holding a constant, or 157 * 8 registers holding address literals. 158 * up to 14 register variables are declared (sparc has %i0-%i5, %l0-%l7). 159 * we assume address literals are so expensive we never use them; 160 * we assume any constant with >10 bits is expensive and put it in a register, 161 * and any other is cheap and is coded in-line. */ 162 #if defined(sparc) 163 #define MQ0 m0 164 #define MQ1 m1 165 #define MQ2 m2 166 #define MQ3 m3 167 #define HQ0(z) 168 #define HQ2(z) 169 #define LQ0(z) k0 & z 170 #define LQ1(z) k0 & z 171 #define LQ2(z) k0 & z 172 #define LQ3(z) k0 & z 173 #define SQ 16 174 #define MS0 m0 175 #define MS1 m1 176 #define MS2 m2 177 #define MS3 m3 178 #define MS4 m4 179 #define MS5 m5 180 #define MS6 m6 181 #define MS7 m7 182 #define HS(z) 183 #define LS0(z) 0xFC & z 184 #define LS1(z) 0xFC & z 185 #define LS2(z) 0xFC & z 186 #define LS3(z) 0xFC & z 187 #define REGQUICK \ 188 register uint32_t k0; \ 189 register uint32_t *m0, *m1, *m2, *m3; 190 #define SETQUICK \ 191 ; k0 = 0xFCFC \ 192 ; m0 = des_bigmap \ 193 ; m1 = m0 + 64 \ 194 ; m2 = m1 + 64 \ 195 ; m3 = m2 + 64 196 #define REGSMALL \ 197 register uint32_t *m0, *m1, *m2, *m3, *m4, *m5, *m6, *m7; 198 #define SETSMALL \ 199 ; m0 = des_keymap \ 200 ; m1 = m0 + 64 \ 201 ; m2 = m1 + 64 \ 202 ; m3 = m2 + 64 \ 203 ; m4 = m3 + 64 \ 204 ; m5 = m4 + 64 \ 205 ; m6 = m5 + 64 \ 206 ; m7 = m6 + 64 207 #endif /* defined(sparc) */ 208 209 210 /* some basic stuff */ 211 212 /* generate addresses from a base and an index */ 213 /* FIXME: This is used only as *ADD(msi,lsi(z)) or *ADD(mqi,lqi(z)). 214 * Why not use plain indexing instead? /Niels */ 215 #define ADD(b,x) (uint32_t *) ((uint8_t *)b + (x)) 216 217 /* low level rotate operations */ 218 #define NOP(d,c,o) 219 #define ROL(d,c,o) d = d << c | d >> o 220 #define ROR(d,c,o) d = d >> c | d << o 221 #define ROL1(d) ROL(d, 1, 31) 222 #define ROR1(d) ROR(d, 1, 31) 223 224 /* elementary swap for doing IP/FP */ 225 #define SWAP(x,y,m,b) \ 226 z = ((x >> b) ^ y) & m; \ 227 x ^= z << b; \ 228 y ^= z 229 230 231 /* the following macros contain all the important code fragments */ 232 233 /* load input data, then setup special registers holding constants */ 234 #define TEMPQUICK(LOAD) \ 235 REGQUICK \ 236 LOAD() \ 237 SETQUICK 238 #define TEMPSMALL(LOAD) \ 239 REGSMALL \ 240 LOAD() \ 241 SETSMALL 242 243 /* load data */ 244 #define LOADDATA(x,y) \ 245 FIXS \ 246 y = PREV(s, 7); y<<= 8; \ 247 y |= PREV(s, 6); y<<= 8; \ 248 y |= PREV(s, 5); y<<= 8; \ 249 y |= PREV(s, 4); \ 250 x = PREV(s, 3); x<<= 8; \ 251 x |= PREV(s, 2); x<<= 8; \ 252 x |= PREV(s, 1); x<<= 8; \ 253 x |= PREV(s, 0) \ 254 SREGFREE 255 /* load data without initial permutation and put into efficient position */ 256 #define LOADCORE() \ 257 LOADDATA(x, y); \ 258 ROR1(x); \ 259 ROR1(y) 260 /* load data, do the initial permutation and put into efficient position */ 261 #define LOADFIPS() \ 262 LOADDATA(y, x); \ 263 SWAP(x, y, 0x0F0F0F0FL, 004); \ 264 SWAP(y, x, 0x0000FFFFL, 020); \ 265 SWAP(x, y, 0x33333333L, 002); \ 266 SWAP(y, x, 0x00FF00FFL, 010); \ 267 ROR1(x); \ 268 z = (x ^ y) & 0x55555555L; \ 269 y ^= z; \ 270 x ^= z; \ 271 ROR1(y) 272 273 274 /* core encryption/decryption operations */ 275 /* S box mapping and P perm */ 276 #define KEYMAPSMALL(x,z,mq0,mq1,hq,lq0,lq1,sq,ms0,ms1,ms2,ms3,hs,ls0,ls1,ls2,ls3)\ 277 hs(z) \ 278 x ^= *ADD(ms3, ls3(z)); \ 279 z>>= 8; \ 280 x ^= *ADD(ms2, ls2(z)); \ 281 z>>= 8; \ 282 x ^= *ADD(ms1, ls1(z)); \ 283 z>>= 8; \ 284 x ^= *ADD(ms0, ls0(z)) 285 /* alternate version: use 64k of tables */ 286 #define KEYMAPQUICK(x,z,mq0,mq1,hq,lq0,lq1,sq,ms0,ms1,ms2,ms3,hs,ls0,ls1,ls2,ls3)\ 287 hq(z) \ 288 x ^= *ADD(mq0, lq0(z)); \ 289 z>>= sq; \ 290 x ^= *ADD(mq1, lq1(z)) 291 /* apply 24 key bits and do the odd s boxes */ 292 #define S7S1(x,y,z,r,m,KEYMAP,LOAD) \ 293 z = LOAD(r, m); \ 294 z ^= y; \ 295 KEYMAP(x,z,MQ0,MQ1,HQ0,LQ0,LQ1,SQ,MS0,MS1,MS2,MS3,HS,LS0,LS1,LS2,LS3) 296 /* apply 24 key bits and do the even s boxes */ 297 #define S6S0(x,y,z,r,m,KEYMAP,LOAD) \ 298 z = LOAD(r, m); \ 299 z ^= y; \ 300 ROL(z, 4, 28); \ 301 KEYMAP(x,z,MQ2,MQ3,HQ2,LQ2,LQ3,SQ,MS4,MS5,MS6,MS7,HS,LS0,LS1,LS2,LS3) 302 /* actual iterations. equivalent except for UPDATE & swapping m and n */ 303 #define ENCR(x,y,z,r,m,n,KEYMAP) \ 304 S7S1(x,y,z,r,m,KEYMAP,NEXT); \ 305 S6S0(x,y,z,r,n,KEYMAP,NEXT) 306 #define DECR(x,y,z,r,m,n,KEYMAP) \ 307 S6S0(x,y,z,r,m,KEYMAP,PREV); \ 308 S7S1(x,y,z,r,n,KEYMAP,PREV) 309 310 /* write out result in correct byte order */ 311 #define SAVEDATA(x,y) \ 312 NEXT(DEST, 0) = x; x>>= 8; \ 313 NEXT(DEST, 1) = x; x>>= 8; \ 314 NEXT(DEST, 2) = x; x>>= 8; \ 315 NEXT(DEST, 3) = x; \ 316 NEXT(DEST, 4) = y; y>>= 8; \ 317 NEXT(DEST, 5) = y; y>>= 8; \ 318 NEXT(DEST, 6) = y; y>>= 8; \ 319 NEXT(DEST, 7) = y 320 /* write out result */ 321 #define SAVECORE() \ 322 ROL1(x); \ 323 ROL1(y); \ 324 SAVEDATA(y, x) 325 /* do final permutation and write out result */ 326 #define SAVEFIPS() \ 327 ROL1(x); \ 328 z = (x ^ y) & 0x55555555L; \ 329 y ^= z; \ 330 x ^= z; \ 331 ROL1(y); \ 332 SWAP(x, y, 0x00FF00FFL, 010); \ 333 SWAP(y, x, 0x33333333L, 002); \ 334 SWAP(x, y, 0x0000FFFFL, 020); \ 335 SWAP(y, x, 0x0F0F0F0FL, 004); \ 336 SAVEDATA(x, y) 337 338 339 /* the following macros contain the encryption/decryption skeletons */ 340 341 #define ENCRYPT(NAME, TEMP, LOAD, KEYMAP, SAVE) \ 342 \ 343 void \ 344 NAME(REGISTER BYTE *D, \ 345 REGISTER const uint32_t *r, \ 346 REGISTER const uint8_t *s) \ 347 { \ 348 register uint32_t x, y, z; \ 349 \ 350 /* declare temps & load data */ \ 351 TEMP(LOAD); \ 352 \ 353 /* do the 16 iterations */ \ 354 ENCR(x,y,z,r, 0, 1,KEYMAP); \ 355 ENCR(y,x,z,r, 2, 3,KEYMAP); \ 356 ENCR(x,y,z,r, 4, 5,KEYMAP); \ 357 ENCR(y,x,z,r, 6, 7,KEYMAP); \ 358 ENCR(x,y,z,r, 8, 9,KEYMAP); \ 359 ENCR(y,x,z,r,10,11,KEYMAP); \ 360 ENCR(x,y,z,r,12,13,KEYMAP); \ 361 ENCR(y,x,z,r,14,15,KEYMAP); \ 362 ENCR(x,y,z,r,16,17,KEYMAP); \ 363 ENCR(y,x,z,r,18,19,KEYMAP); \ 364 ENCR(x,y,z,r,20,21,KEYMAP); \ 365 ENCR(y,x,z,r,22,23,KEYMAP); \ 366 ENCR(x,y,z,r,24,25,KEYMAP); \ 367 ENCR(y,x,z,r,26,27,KEYMAP); \ 368 ENCR(x,y,z,r,28,29,KEYMAP); \ 369 ENCR(y,x,z,r,30,31,KEYMAP); \ 370 \ 371 /* save result */ \ 372 SAVE(); \ 373 \ 374 return; \ 375 } 376 377 #define DECRYPT(NAME, TEMP, LOAD, KEYMAP, SAVE) \ 378 \ 379 void \ 380 NAME(REGISTER BYTE *D, \ 381 REGISTER const uint32_t *r, \ 382 REGISTER const uint8_t *s) \ 383 { \ 384 register uint32_t x, y, z; \ 385 \ 386 /* declare temps & load data */ \ 387 TEMP(LOAD); \ 388 \ 389 /* do the 16 iterations */ \ 390 FIXR \ 391 DECR(x,y,z,r,31,30,KEYMAP); \ 392 DECR(y,x,z,r,29,28,KEYMAP); \ 393 DECR(x,y,z,r,27,26,KEYMAP); \ 394 DECR(y,x,z,r,25,24,KEYMAP); \ 395 DECR(x,y,z,r,23,22,KEYMAP); \ 396 DECR(y,x,z,r,21,20,KEYMAP); \ 397 DECR(x,y,z,r,19,18,KEYMAP); \ 398 DECR(y,x,z,r,17,16,KEYMAP); \ 399 DECR(x,y,z,r,15,14,KEYMAP); \ 400 DECR(y,x,z,r,13,12,KEYMAP); \ 401 DECR(x,y,z,r,11,10,KEYMAP); \ 402 DECR(y,x,z,r, 9, 8,KEYMAP); \ 403 DECR(x,y,z,r, 7, 6,KEYMAP); \ 404 DECR(y,x,z,r, 5, 4,KEYMAP); \ 405 DECR(x,y,z,r, 3, 2,KEYMAP); \ 406 DECR(y,x,z,r, 1, 0,KEYMAP); \ 407 \ 408 /* save result */ \ 409 SAVE(); \ 410 \ 411 return; \ 412 } 413