1/* 2 * This file contains the core of a bitslice DES implementation for x86/SSE2. 3 * It is part of John the Ripper password cracker, 4 * Copyright (c) 2000-2001,2005,2006,2008,2011,2012,2015 by Solar Designer 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted. (This is a heavily cut-down "BSD license".) 7 * 8 * Gate counts per S-box: 49 44 46 33 48 46 46 41 9 * Average: 44.125 10 * 11 * The Boolean expressions corresponding to DES S-boxes have been generated 12 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's 13 * John the Ripper password cracker: http://www.openwall.com/john/ 14 * Being mathematical formulas, they are not copyrighted and are free for reuse 15 * by anyone. 16 * 17 * The x86/SSE2 code for the S-boxes was generated by Solar Designer using a 18 * Perl script and then hand-optimized - originally for MMX, then converted to 19 * SSE2. Instruction scheduling was not re-done for SSE2-capable CPUs yet; 20 * doing so may provide further speedup. 21 * 22 * The effort has been sponsored by Rapid7: http://www.rapid7.com 23 * 24 * ...with changes in the jumbo patch, by Alain Espinosa (starting with a 25 * comment further down this file) and magnum. 26 * 27 * Addition of single DES encryption with no salt by Deepika Dutta Mishra 28 * <dipikadutta at gmail.com> in 2012, no rights reserved. 29 */ 30 31#include "arch.h" 32 33/* 34 * Some broken systems don't offer section alignments larger than 4 bytes, 35 * while for the SSE code we need at least a 16 byte alignment. ALIGN_FIX 36 * is here to work around this issue when we happen to get bad addresses. 37 */ 38#ifndef ALIGN_FIX 39#ifdef ALIGN_LOG 40#define DO_ALIGN(log) .align log 41#else 42#define DO_ALIGN(log) .align 1 << log 43#endif 44#else 45#ifdef ALIGN_LOG 46#define DO_ALIGN(log) .align log; .space ALIGN_FIX 47#else 48#define DO_ALIGN(log) .align 1 << log; .space ALIGN_FIX 49#endif 50#endif 51 52#if DES_BS_ASM 53 54#ifdef UNDERSCORES 55#define DES_bs_all _DES_bs_all 56#define DES_bs_init_asm _DES_bs_init_asm 57#define DES_bs_crypt _DES_bs_crypt 58#define DES_bs_crypt_25 _DES_bs_crypt_25 59#define DES_bs_crypt_LM _DES_bs_crypt_LM 60#define DES_bs_crypt_plain _DES_bs_crypt_plain 61#define DES_bs_P _DES_bs_P 62#endif 63 64#ifdef __sun 65/* Sun's assembler doesn't recognize .space */ 66#define DO_SPACE(size) .zero size 67#else 68/* Mac OS X assembler doesn't recognize .zero */ 69#define DO_SPACE(size) .space size 70#endif 71 72/* Sun's assembler can't multiply, but at least it can add... */ 73#define nptr(n) n+n+n+n 74#define nvec(n) n+n+n+n+n+n+n+n+n+n+n+n+n+n+n+n 75 76#ifdef BSD 77.data 78#else 79.bss 80#endif 81 82.globl DES_bs_P 83DO_ALIGN(6) 84DES_bs_P: 85DO_SPACE(nvec(64)) 86 87.globl DES_bs_all 88DO_ALIGN(6) 89DES_bs_all: 90DES_bs_all_KSp: 91DO_SPACE(nptr(0x300)) 92DES_bs_all_KS_p: 93DES_bs_all_KS_v: 94DO_SPACE(nvec(0x300)) 95DES_bs_all_E: 96DO_SPACE(nptr(96)) 97DES_bs_all_K: 98DO_SPACE(nvec(56)) 99DES_bs_all_B: 100DO_SPACE(nvec(64)) 101DES_bs_all_tmp: 102DO_SPACE(nvec(16)) 103DES_bs_all_xkeys: 104DO_SPACE(nvec(64)) 105DES_bs_all_pxkeys: 106DO_SPACE(nptr(128)) 107DES_bs_all_keys_changed: 108DO_SPACE(4) 109DES_bs_all_salt: 110DO_SPACE(4) 111DES_bs_all_Ens: 112DO_SPACE(nptr(48)) 113 114#define E(i) DES_bs_all_E+nptr(i) 115#define B(i) DES_bs_all_B+nvec(i) 116#define tmp_at(i) DES_bs_all_tmp+nvec(i) 117#define P(i) DES_bs_P+nvec(i) 118#define pnot tmp_at(0) 119 120#define S1(out1, out2, out3, out4) \ 121 movdqa %xmm0,tmp_at(1); \ 122 movdqa %xmm5,%xmm7; \ 123 movdqa %xmm4,tmp_at(4); \ 124 movdqa %xmm2,%xmm6; \ 125 movdqa %xmm1,tmp_at(2); \ 126 por %xmm2,%xmm7; \ 127 movdqa %xmm3,tmp_at(3); \ 128 pxor %xmm0,%xmm6; \ 129 movdqa %xmm7,tmp_at(5); \ 130 movdqa %xmm6,%xmm1; \ 131 pandn %xmm0,%xmm4; \ 132 pand %xmm7,%xmm1; \ 133 movdqa %xmm1,%xmm7; \ 134 por %xmm5,%xmm7; \ 135 pxor %xmm3,%xmm1; \ 136 pxor %xmm4,%xmm3; \ 137 movdqa %xmm1,tmp_at(6); \ 138 movdqa %xmm3,%xmm1; \ 139 pandn tmp_at(6),%xmm3; \ 140 movdqa %xmm3,tmp_at(7); \ 141 movdqa %xmm5,%xmm3; \ 142 por %xmm0,%xmm5; \ 143 pxor tmp_at(4),%xmm3; \ 144 movdqa %xmm3,tmp_at(8); \ 145 movdqa %xmm5,%xmm0; \ 146 pandn %xmm3,%xmm6; \ 147 pxor %xmm2,%xmm3; \ 148 pandn %xmm2,%xmm4; \ 149 pandn %xmm1,%xmm3; \ 150 pxor %xmm3,%xmm7; \ 151 movdqa tmp_at(7),%xmm3; \ 152 pandn tmp_at(3),%xmm5; \ 153 por %xmm7,%xmm0; \ 154 pandn %xmm7,%xmm3; \ 155 movdqa %xmm3,tmp_at(9); \ 156 pand tmp_at(5),%xmm7; \ 157 movdqa tmp_at(6),%xmm3; \ 158 movdqa %xmm0,%xmm2; \ 159 pxor %xmm1,%xmm2; \ 160 pandn tmp_at(4),%xmm3; \ 161 pandn %xmm2,%xmm4; \ 162 movdqa tmp_at(2),%xmm2; \ 163 pxor %xmm4,%xmm7; \ 164 pxor tmp_at(8),%xmm4; \ 165 pxor %xmm3,%xmm5; \ 166 por %xmm3,%xmm4; \ 167 pxor tmp_at(1),%xmm4; \ 168 pxor %xmm0,%xmm3; \ 169 pandn %xmm3,%xmm2; \ 170 pxor tmp_at(5),%xmm0; \ 171 movdqa tmp_at(7),%xmm3; \ 172 por tmp_at(2),%xmm3; \ 173 pxor pnot,%xmm7; \ 174 pxor out1,%xmm3; \ 175 pxor %xmm7,%xmm2; \ 176 pxor tmp_at(5),%xmm4; \ 177 pxor out3,%xmm2; \ 178 pxor %xmm4,%xmm7; \ 179 pxor %xmm7,%xmm3; \ 180 movdqa %xmm3,out1; \ 181 por %xmm6,%xmm5; \ 182 por tmp_at(8),%xmm7; \ 183 por %xmm5,%xmm0; \ 184 pxor out2,%xmm7; \ 185 pxor %xmm4,%xmm0; \ 186 pxor %xmm0,%xmm7; \ 187 por tmp_at(4),%xmm1; \ 188 movdqa tmp_at(2),%xmm3; \ 189 pand tmp_at(9),%xmm4; \ 190 pandn %xmm1,%xmm0; \ 191 pxor %xmm0,%xmm4; \ 192 por tmp_at(9),%xmm3; \ 193 por tmp_at(2),%xmm4; \ 194 movdqa %xmm2,out3; \ 195 pxor %xmm3,%xmm7; \ 196 pxor %xmm5,%xmm4; \ 197 pxor out4,%xmm4; \ 198 movdqa %xmm7,out2; \ 199 movdqa %xmm4,out4 200 201#define S2(out1, out2, out3, out4) \ 202 movdqa %xmm2,tmp_at(2); \ 203 movdqa %xmm1,tmp_at(1); \ 204 movdqa %xmm5,%xmm2; \ 205 movdqa %xmm4,tmp_at(4); \ 206 pandn %xmm0,%xmm2; \ 207 movdqa %xmm3,tmp_at(3); \ 208 pandn %xmm4,%xmm2; \ 209 movdqa %xmm0,%xmm6; \ 210 movdqa %xmm2,%xmm7; \ 211 pxor pnot,%xmm0; \ 212 por %xmm1,%xmm7; \ 213 pxor %xmm4,%xmm1; \ 214 movdqa %xmm7,tmp_at(5); \ 215 pand %xmm1,%xmm6; \ 216 movdqa %xmm5,%xmm7; \ 217 pxor %xmm4,%xmm6; \ 218 pandn %xmm1,%xmm7; \ 219 movdqa %xmm3,%xmm4; \ 220 pxor %xmm7,%xmm2; \ 221 pandn %xmm6,%xmm7; \ 222 pxor %xmm5,%xmm1; \ 223 movdqa %xmm7,tmp_at(7); \ 224 movdqa %xmm5,%xmm7; \ 225 pand tmp_at(2),%xmm5; \ 226 pand tmp_at(5),%xmm2; \ 227 movdqa %xmm5,tmp_at(8); \ 228 pandn %xmm2,%xmm5; \ 229 pand tmp_at(2),%xmm2; \ 230 movdqa tmp_at(8),%xmm7; \ 231 pandn tmp_at(3),%xmm5; \ 232 pandn %xmm1,%xmm7; \ 233 pxor %xmm2,%xmm0; \ 234 movdqa %xmm7,%xmm3; \ 235 pxor %xmm0,%xmm3; \ 236 pxor out2,%xmm5; \ 237 pandn tmp_at(1),%xmm7; \ 238 pxor %xmm6,%xmm7; \ 239 pxor %xmm3,%xmm5; \ 240 movdqa %xmm7,%xmm6; \ 241 movdqa %xmm5,out2; \ 242 movdqa tmp_at(7),%xmm5; \ 243 pandn tmp_at(5),%xmm4; \ 244 pandn %xmm0,%xmm6; \ 245 pxor tmp_at(5),%xmm3; \ 246 movdqa %xmm1,%xmm0; \ 247 pxor %xmm4,%xmm6; \ 248 pxor tmp_at(2),%xmm0; \ 249 pxor %xmm0,%xmm6; \ 250 movdqa %xmm0,%xmm4; \ 251 pxor out1,%xmm6; \ 252 pandn tmp_at(1),%xmm0; \ 253 pxor tmp_at(4),%xmm2; \ 254 pxor %xmm3,%xmm0; \ 255 movdqa %xmm6,out1; \ 256 por %xmm1,%xmm3; \ 257 por tmp_at(8),%xmm0; \ 258 pxor %xmm4,%xmm0; \ 259 movdqa %xmm0,%xmm4; \ 260 pandn tmp_at(2),%xmm0; \ 261 movdqa tmp_at(3),%xmm6; \ 262 pxor tmp_at(7),%xmm0; \ 263 por %xmm7,%xmm0; \ 264 por %xmm6,%xmm5; \ 265 pxor %xmm0,%xmm2; \ 266 pandn %xmm2,%xmm7; \ 267 por %xmm2,%xmm6; \ 268 pxor out4,%xmm7; \ 269 pxor %xmm4,%xmm6; \ 270 pxor out3,%xmm6; \ 271 pxor %xmm5,%xmm7; \ 272 pxor %xmm3,%xmm7; \ 273 movdqa %xmm6,out3; \ 274 movdqa %xmm7,out4 275 276#define S3(out1, out2, out3, out4) \ 277 movdqa %xmm0,tmp_at(1); \ 278 movdqa %xmm1,tmp_at(2); \ 279 movdqa %xmm0,%xmm7; \ 280 pandn %xmm0,%xmm1; \ 281 movdqa %xmm2,tmp_at(3); \ 282 movdqa %xmm5,%xmm0; \ 283 pxor %xmm2,%xmm0; \ 284 movdqa %xmm4,tmp_at(4); \ 285 movdqa %xmm5,%xmm2; \ 286 por %xmm0,%xmm1; \ 287 pxor %xmm3,%xmm2; \ 288 movdqa %xmm0,%xmm4; \ 289 movdqa %xmm5,%xmm6; \ 290 pandn %xmm2,%xmm7; \ 291 pxor tmp_at(2),%xmm4; \ 292 movdqa %xmm7,tmp_at(5); \ 293 pxor %xmm1,%xmm7; \ 294 pandn %xmm4,%xmm6; \ 295 movdqa %xmm7,tmp_at(6); \ 296 pxor %xmm6,%xmm1; \ 297 pand %xmm0,%xmm2; \ 298 movdqa %xmm1,%xmm6; \ 299 movdqa %xmm3,%xmm0; \ 300 pandn %xmm7,%xmm6; \ 301 pand %xmm5,%xmm7; \ 302 pand %xmm3,%xmm5; \ 303 por %xmm3,%xmm7; \ 304 pand tmp_at(1),%xmm7; \ 305 movdqa tmp_at(4),%xmm3; \ 306 pandn tmp_at(6),%xmm3; \ 307 pxor %xmm4,%xmm7; \ 308 pxor tmp_at(1),%xmm0; \ 309 movdqa %xmm7,tmp_at(7); \ 310 pxor %xmm3,%xmm7; \ 311 movdqa tmp_at(2),%xmm3; \ 312 pxor out4,%xmm7; \ 313 pxor %xmm0,%xmm1; \ 314 movdqa %xmm7,out4; \ 315 movdqa tmp_at(3),%xmm7; \ 316 por tmp_at(3),%xmm1; \ 317 pandn %xmm1,%xmm2; \ 318 por tmp_at(5),%xmm0; \ 319 movdqa %xmm0,%xmm1; \ 320 pandn %xmm5,%xmm3; \ 321 pandn tmp_at(7),%xmm1; \ 322 por %xmm4,%xmm5; \ 323 pxor %xmm3,%xmm1; \ 324 por tmp_at(2),%xmm7; \ 325 movdqa tmp_at(3),%xmm3; \ 326 pandn %xmm1,%xmm3; \ 327 pxor %xmm4,%xmm0; \ 328 pandn %xmm5,%xmm3; \ 329 movdqa tmp_at(4),%xmm5; \ 330 pxor tmp_at(1),%xmm3; \ 331 pand %xmm2,%xmm5; \ 332 pxor pnot,%xmm0; \ 333 pxor %xmm5,%xmm3; \ 334 movdqa %xmm7,%xmm5; \ 335 pxor out2,%xmm3; \ 336 pandn tmp_at(4),%xmm6; \ 337 pandn tmp_at(6),%xmm7; \ 338 pxor %xmm0,%xmm6; \ 339 movdqa %xmm3,out2; \ 340 pxor tmp_at(1),%xmm2; \ 341 por tmp_at(4),%xmm1; \ 342 por %xmm2,%xmm0; \ 343 pxor tmp_at(6),%xmm5; \ 344 pxor %xmm1,%xmm0; \ 345 pxor out1,%xmm6; \ 346 pxor out3,%xmm5; \ 347 pxor tmp_at(7),%xmm0; \ 348 pxor %xmm7,%xmm6; \ 349 pxor %xmm5,%xmm0; \ 350 movdqa %xmm6,out1; \ 351 movdqa %xmm0,out3 352 353#define S4(out1, out2, out3, out4) \ 354 movdqa %xmm1,%xmm7; \ 355 pxor %xmm2,%xmm0; \ 356 por %xmm3,%xmm1; \ 357 pxor %xmm4,%xmm2; \ 358 movdqa %xmm5,tmp_at(2); \ 359 pxor %xmm4,%xmm1; \ 360 movdqa %xmm7,%xmm6; \ 361 movdqa %xmm7,%xmm5; \ 362 pandn %xmm2,%xmm7; \ 363 pandn %xmm2,%xmm1; \ 364 por %xmm7,%xmm4; \ 365 pxor %xmm3,%xmm7; \ 366 movdqa %xmm7,%xmm6; \ 367 por %xmm0,%xmm7; \ 368 pxor %xmm5,%xmm3; \ 369 movdqa %xmm1,tmp_at(3); \ 370 pandn %xmm7,%xmm1; \ 371 movdqa %xmm1,%xmm7; \ 372 pxor %xmm5,%xmm1; \ 373 pand %xmm1,%xmm6; \ 374 movdqa %xmm6,%xmm5; \ 375 pxor %xmm1,%xmm0; \ 376 pandn %xmm2,%xmm6; \ 377 pandn %xmm0,%xmm6; \ 378 pxor %xmm0,%xmm4; \ 379 movdqa %xmm3,%xmm0; \ 380 pandn %xmm4,%xmm3; \ 381 movdqa tmp_at(2),%xmm2; \ 382 pxor %xmm7,%xmm3; \ 383 pxor tmp_at(3),%xmm6; \ 384 movdqa %xmm6,%xmm7; \ 385 pandn %xmm2,%xmm6; \ 386 pxor out1,%xmm6; \ 387 pandn %xmm7,%xmm2; \ 388 pxor out2,%xmm2; \ 389 pxor %xmm3,%xmm6; \ 390 pxor pnot,%xmm3; \ 391 pxor %xmm3,%xmm2; \ 392 pxor %xmm7,%xmm3; \ 393 movdqa %xmm6,out1; \ 394 pandn %xmm3,%xmm0; \ 395 por %xmm5,%xmm0; \ 396 movdqa %xmm2,out2; \ 397 movdqa tmp_at(2),%xmm3; \ 398 por %xmm1,%xmm3; \ 399 pand tmp_at(2),%xmm1; \ 400 pxor %xmm4,%xmm0; \ 401 pxor %xmm0,%xmm3; \ 402 pxor out3,%xmm3; \ 403 pxor %xmm1,%xmm0; \ 404 movdqa %xmm3,out3; \ 405 pxor out4,%xmm0; \ 406 movdqa %xmm0,out4 407 408#define S5(out1, out2, out3, out4) \ 409 movdqa %xmm2,tmp_at(3); \ 410 movdqa %xmm0,tmp_at(1); \ 411 por %xmm0,%xmm2; \ 412 movdqa %xmm5,%xmm6; \ 413 movdqa %xmm2,tmp_at(4); \ 414 pandn %xmm2,%xmm5; \ 415 movdqa %xmm2,%xmm7; \ 416 movdqa %xmm5,%xmm2; \ 417 pxor %xmm0,%xmm5; \ 418 movdqa %xmm3,%xmm7; \ 419 movdqa %xmm5,tmp_at(5); \ 420 pxor tmp_at(3),%xmm5; \ 421 movdqa %xmm1,tmp_at(2); \ 422 por %xmm5,%xmm0; \ 423 por %xmm3,%xmm5; \ 424 pandn %xmm2,%xmm3; \ 425 pxor tmp_at(3),%xmm3; \ 426 movdqa %xmm3,tmp_at(6); \ 427 movdqa %xmm0,%xmm1; \ 428 pand %xmm4,%xmm3; \ 429 pxor %xmm0,%xmm3; \ 430 pand %xmm7,%xmm0; \ 431 pxor %xmm7,%xmm3; \ 432 movdqa %xmm3,tmp_at(3); \ 433 pxor %xmm3,%xmm6; \ 434 movdqa %xmm6,%xmm2; \ 435 por tmp_at(5),%xmm6; \ 436 movdqa %xmm6,%xmm3; \ 437 pand %xmm4,%xmm6; \ 438 movdqa %xmm6,tmp_at(7); \ 439 pxor tmp_at(5),%xmm6; \ 440 pxor %xmm6,%xmm0; \ 441 movdqa tmp_at(1),%xmm6; \ 442 movdqa %xmm0,tmp_at(8); \ 443 pandn %xmm3,%xmm6; \ 444 movdqa tmp_at(2),%xmm0; \ 445 movdqa %xmm6,%xmm3; \ 446 pxor tmp_at(6),%xmm6; \ 447 pxor %xmm5,%xmm4; \ 448 pandn %xmm4,%xmm6; \ 449 pxor pnot,%xmm6; \ 450 pandn %xmm6,%xmm0; \ 451 pxor tmp_at(3),%xmm0; \ 452 movdqa tmp_at(7),%xmm6; \ 453 pandn tmp_at(6),%xmm6; \ 454 pxor out3,%xmm0; \ 455 pxor %xmm4,%xmm3; \ 456 movdqa %xmm0,out3; \ 457 por tmp_at(8),%xmm3; \ 458 movdqa tmp_at(6),%xmm0; \ 459 pandn %xmm3,%xmm6; \ 460 pand tmp_at(6),%xmm1; \ 461 pand %xmm6,%xmm2; \ 462 movdqa %xmm6,%xmm3; \ 463 pandn %xmm5,%xmm6; \ 464 pxor %xmm4,%xmm2; \ 465 por %xmm2,%xmm1; \ 466 pxor tmp_at(4),%xmm3; \ 467 pxor tmp_at(7),%xmm1; \ 468 pand %xmm2,%xmm7; \ 469 pand tmp_at(2),%xmm1; \ 470 pxor tmp_at(1),%xmm7; \ 471 pxor tmp_at(8),%xmm1; \ 472 pxor %xmm7,%xmm3; \ 473 por tmp_at(2),%xmm6; \ 474 pxor out4,%xmm1; \ 475 movdqa %xmm1,out4; \ 476 pxor %xmm5,%xmm0; \ 477 pxor tmp_at(5),%xmm2; \ 478 pxor %xmm3,%xmm6; \ 479 pandn %xmm0,%xmm3; \ 480 pand tmp_at(2),%xmm5; \ 481 pxor %xmm2,%xmm3; \ 482 pxor out2,%xmm5; \ 483 pxor %xmm5,%xmm3; \ 484 pxor out1,%xmm6; \ 485 movdqa %xmm3,out2; \ 486 movdqa %xmm6,out1 487 488#define S6(out1, out2, out3, out4) \ 489 movdqa %xmm4,tmp_at(2); \ 490 pxor %xmm1,%xmm4; \ 491 movdqa %xmm5,tmp_at(3); \ 492 por %xmm1,%xmm5; \ 493 movdqa %xmm2,%xmm7; \ 494 pand %xmm0,%xmm5; \ 495 pxor %xmm0,%xmm2; \ 496 movdqa %xmm0,tmp_at(1); \ 497 pxor %xmm5,%xmm4; \ 498 movdqa %xmm4,tmp_at(4); \ 499 pxor tmp_at(3),%xmm4; \ 500 movdqa %xmm4,%xmm6; \ 501 pandn tmp_at(2),%xmm4; \ 502 pand %xmm0,%xmm6; \ 503 movdqa %xmm6,tmp_at(5); \ 504 pxor %xmm1,%xmm6; \ 505 movdqa %xmm6,tmp_at(6); \ 506 por %xmm2,%xmm6; \ 507 movdqa %xmm6,tmp_at(7); \ 508 pxor tmp_at(4),%xmm6; \ 509 movdqa %xmm6,%xmm0; \ 510 pand %xmm7,%xmm6; \ 511 movdqa %xmm6,tmp_at(8); \ 512 movdqa tmp_at(3),%xmm6; \ 513 por %xmm1,%xmm2; \ 514 pandn tmp_at(8),%xmm6; \ 515 movdqa %xmm6,tmp_at(9); \ 516 movdqa tmp_at(6),%xmm6; \ 517 por %xmm4,%xmm6; \ 518 movdqa %xmm6,tmp_at(6); \ 519 pxor tmp_at(9),%xmm6; \ 520 movdqa %xmm6,tmp_at(10); \ 521 pand %xmm3,%xmm6; \ 522 pxor out4,%xmm6; \ 523 pxor %xmm0,%xmm6; \ 524 por tmp_at(1),%xmm0; \ 525 movdqa %xmm6,out4; \ 526 movdqa tmp_at(7),%xmm6; \ 527 pxor %xmm1,%xmm6; \ 528 movdqa %xmm3,%xmm1; \ 529 movdqa %xmm6,tmp_at(7); \ 530 pandn tmp_at(3),%xmm6; \ 531 pxor %xmm7,%xmm6; \ 532 movdqa tmp_at(8),%xmm7; \ 533 movdqa %xmm6,tmp_at(12); \ 534 pandn tmp_at(2),%xmm7; \ 535 pand tmp_at(6),%xmm0; \ 536 por %xmm6,%xmm7; \ 537 pxor %xmm6,%xmm0; \ 538 movdqa tmp_at(9),%xmm6; \ 539 por %xmm3,%xmm4; \ 540 pandn %xmm0,%xmm6; \ 541 por %xmm7,%xmm5; \ 542 pxor %xmm4,%xmm6; \ 543 pxor tmp_at(4),%xmm0; \ 544 pxor out3,%xmm6; \ 545 pxor %xmm2,%xmm5; \ 546 movdqa %xmm6,out3; \ 547 movdqa tmp_at(5),%xmm6; \ 548 pandn tmp_at(2),%xmm0; \ 549 pxor pnot,%xmm2; \ 550 pxor tmp_at(7),%xmm2; \ 551 pxor tmp_at(3),%xmm6; \ 552 pxor out2,%xmm5; \ 553 movdqa tmp_at(12),%xmm4; \ 554 pxor %xmm2,%xmm0; \ 555 pxor tmp_at(1),%xmm4; \ 556 pxor tmp_at(10),%xmm5; \ 557 pand %xmm6,%xmm4; \ 558 pandn %xmm0,%xmm3; \ 559 pxor out1,%xmm4; \ 560 pandn %xmm7,%xmm1; \ 561 pxor tmp_at(8),%xmm4; \ 562 pxor %xmm2,%xmm1; \ 563 pxor %xmm3,%xmm5; \ 564 movdqa %xmm5,out2; \ 565 pxor %xmm1,%xmm4; \ 566 movdqa %xmm4,out1 567 568#define S7(out1, out2, out3, out4) \ 569 movdqa %xmm0,tmp_at(1); \ 570 movdqa %xmm4,tmp_at(3); \ 571 movdqa %xmm4,%xmm0; \ 572 pxor %xmm3,%xmm4; \ 573 movdqa %xmm5,tmp_at(4); \ 574 movdqa %xmm4,%xmm7; \ 575 movdqa %xmm3,tmp_at(2); \ 576 pxor %xmm2,%xmm4; \ 577 movdqa %xmm4,tmp_at(5); \ 578 pand %xmm5,%xmm4; \ 579 movdqa %xmm7,%xmm5; \ 580 pxor tmp_at(4),%xmm5; \ 581 pand %xmm3,%xmm7; \ 582 movdqa %xmm7,tmp_at(6); \ 583 movdqa %xmm7,%xmm6; \ 584 pxor %xmm1,%xmm7; \ 585 pand tmp_at(4),%xmm6; \ 586 pxor %xmm2,%xmm6; \ 587 movdqa %xmm7,tmp_at(7); \ 588 movdqa tmp_at(1),%xmm3; \ 589 movdqa %xmm6,%xmm0; \ 590 por %xmm7,%xmm6; \ 591 pand %xmm4,%xmm7; \ 592 pxor %xmm5,%xmm6; \ 593 pandn %xmm3,%xmm7; \ 594 pxor %xmm4,%xmm0; \ 595 pxor out4,%xmm7; \ 596 pxor %xmm5,%xmm4; \ 597 pxor %xmm6,%xmm7; \ 598 movdqa %xmm7,out4; \ 599 pandn tmp_at(2),%xmm4; \ 600 por tmp_at(6),%xmm6; \ 601 movdqa tmp_at(5),%xmm7; \ 602 pandn tmp_at(3),%xmm7; \ 603 pandn tmp_at(7),%xmm4; \ 604 movdqa %xmm7,tmp_at(9); \ 605 por tmp_at(7),%xmm7; \ 606 pandn tmp_at(5),%xmm5; \ 607 pxor %xmm0,%xmm7; \ 608 pxor tmp_at(3),%xmm0; \ 609 pxor %xmm4,%xmm0; \ 610 movdqa tmp_at(1),%xmm4; \ 611 pand %xmm0,%xmm2; \ 612 por %xmm2,%xmm6; \ 613 pxor %xmm5,%xmm6; \ 614 pandn %xmm6,%xmm3; \ 615 movdqa %xmm6,%xmm5; \ 616 pxor %xmm7,%xmm3; \ 617 pxor %xmm6,%xmm7; \ 618 por %xmm0,%xmm6; \ 619 pxor out1,%xmm3; \ 620 pand tmp_at(4),%xmm6; \ 621 pxor pnot,%xmm5; \ 622 pand %xmm6,%xmm1; \ 623 pxor out3,%xmm0; \ 624 pxor %xmm7,%xmm1; \ 625 movdqa %xmm3,out1; \ 626 movdqa %xmm4,%xmm3; \ 627 pxor tmp_at(3),%xmm7; \ 628 por %xmm1,%xmm2; \ 629 pxor %xmm6,%xmm2; \ 630 por %xmm2,%xmm7; \ 631 pand %xmm7,%xmm4; \ 632 pxor %xmm6,%xmm7; \ 633 por tmp_at(9),%xmm7; \ 634 pxor %xmm5,%xmm7; \ 635 pxor out2,%xmm1; \ 636 pandn %xmm7,%xmm3; \ 637 pxor %xmm4,%xmm0; \ 638 movdqa %xmm0,out3; \ 639 pxor %xmm3,%xmm1; \ 640 movdqa %xmm1,out2 641 642#define S8(out1, out2, out3, out4) \ 643 movdqa %xmm2,%xmm7; \ 644 movdqa %xmm1,tmp_at(1); \ 645 pandn %xmm2,%xmm1; \ 646 movdqa %xmm2,tmp_at(2); \ 647 pandn %xmm4,%xmm2; \ 648 movdqa %xmm3,tmp_at(3); \ 649 pxor %xmm3,%xmm2; \ 650 movdqa %xmm4,tmp_at(4); \ 651 movdqa %xmm1,%xmm3; \ 652 movdqa %xmm5,tmp_at(5); \ 653 movdqa %xmm2,%xmm4; \ 654 movdqa %xmm2,%xmm5; \ 655 pandn tmp_at(1),%xmm4; \ 656 pand %xmm0,%xmm2; \ 657 pandn tmp_at(1),%xmm7; \ 658 pandn %xmm2,%xmm1; \ 659 pxor tmp_at(4),%xmm7; \ 660 movdqa %xmm4,%xmm6; \ 661 por %xmm0,%xmm4; \ 662 movdqa %xmm7,tmp_at(6); \ 663 pand %xmm4,%xmm7; \ 664 pxor pnot,%xmm5; \ 665 por %xmm7,%xmm2; \ 666 pxor %xmm7,%xmm5; \ 667 pandn tmp_at(2),%xmm4; \ 668 movdqa tmp_at(5),%xmm7; \ 669 pxor %xmm4,%xmm5; \ 670 por %xmm1,%xmm7; \ 671 pxor %xmm5,%xmm3; \ 672 pxor %xmm3,%xmm7; \ 673 pxor %xmm0,%xmm3; \ 674 pxor out2,%xmm7; \ 675 movdqa %xmm7,out2; \ 676 pxor tmp_at(1),%xmm5; \ 677 movdqa %xmm3,%xmm4; \ 678 pand tmp_at(4),%xmm3; \ 679 pxor %xmm5,%xmm3; \ 680 por tmp_at(3),%xmm5; \ 681 pxor %xmm3,%xmm6; \ 682 pxor tmp_at(6),%xmm5; \ 683 pxor %xmm2,%xmm3; \ 684 pxor %xmm6,%xmm5; \ 685 por tmp_at(1),%xmm3; \ 686 pxor %xmm5,%xmm0; \ 687 pxor %xmm4,%xmm3; \ 688 por tmp_at(3),%xmm4; \ 689 pxor tmp_at(4),%xmm3; \ 690 pand tmp_at(5),%xmm2; \ 691 pandn %xmm3,%xmm4; \ 692 pand tmp_at(5),%xmm0; \ 693 pxor %xmm6,%xmm0; \ 694 por %xmm1,%xmm4; \ 695 pxor out4,%xmm0; \ 696 pxor %xmm4,%xmm5; \ 697 pxor out3,%xmm2; \ 698 por tmp_at(5),%xmm5; \ 699 pxor out1,%xmm5; \ 700 pxor %xmm3,%xmm2; \ 701 pxor %xmm6,%xmm5; \ 702 movdqa %xmm0,out4; \ 703 movdqa %xmm2,out3; \ 704 movdqa %xmm5,out1 705 706#define a1 %xmm0 707#define a2 %xmm1 708#define a3 %xmm2 709#define a4 %xmm3 710#define a5 %xmm4 711#define a6 %xmm5 712 713#define zero %xmm5 714 715#define DES_bs_clear_block_8(i) \ 716 movdqa zero,B(i); \ 717 movdqa zero,B(i + 1); \ 718 movdqa zero,B(i + 2); \ 719 movdqa zero,B(i + 3); \ 720 movdqa zero,B(i + 4); \ 721 movdqa zero,B(i + 5); \ 722 movdqa zero,B(i + 6); \ 723 movdqa zero,B(i + 7) 724 725#define DES_bs_clear_block \ 726 DES_bs_clear_block_8(0); \ 727 DES_bs_clear_block_8(8); \ 728 DES_bs_clear_block_8(16); \ 729 DES_bs_clear_block_8(24); \ 730 DES_bs_clear_block_8(32); \ 731 DES_bs_clear_block_8(40); \ 732 DES_bs_clear_block_8(48); \ 733 DES_bs_clear_block_8(56) 734 735#define k_ptr %edx 736#define K(i) nvec(i)(k_ptr) 737#define k(i) nptr(i)(k_ptr) 738 739#define tmp1 %ecx 740#define tmp2 %esi 741 742#define xor_E(i) \ 743 movl E(i),tmp1; \ 744 movdqa K(i),a1; \ 745 movl E(i + 1),tmp2; \ 746 movdqa K(i + 1),a2; \ 747 pxor (tmp1),a1; \ 748 pxor (tmp2),a2; \ 749 movl E(i + 2),tmp1; \ 750 movdqa K(i + 2),a3; \ 751 movl E(i + 3),tmp2; \ 752 movdqa K(i + 3),a4; \ 753 pxor (tmp1),a3; \ 754 pxor (tmp2),a4; \ 755 movl E(i + 4),tmp1; \ 756 movdqa K(i + 4),a5; \ 757 movl E(i + 5),tmp2; \ 758 movdqa K(i + 5),a6; \ 759 pxor (tmp1),a5; \ 760 pxor (tmp2),a6 761 762#define xor_B(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, b6, k6) \ 763 movdqa B(b1),a1; \ 764 movdqa B(b2),a2; \ 765 pxor K(k1),a1; \ 766 movdqa B(b3),a3; \ 767 pxor K(k2),a2; \ 768 movdqa B(b4),a4; \ 769 pxor K(k3),a3; \ 770 movdqa B(b5),a5; \ 771 pxor K(k4),a4; \ 772 movdqa B(b6),a6; \ 773 pxor K(k5),a5; \ 774 pxor K(k6),a6 775 776#define xor_B_KS_p_prefix(b1, k1, b2, k2, b3, k3, b4, k4, k6) \ 777 movl k(k1),tmp1; \ 778 movl k(k2),tmp2; \ 779 movdqa B(b1),a1; \ 780 movdqa B(b2),a2; \ 781 pxor (tmp1),a1; \ 782 movl k(k3),tmp1; \ 783 pxor (tmp2),a2; \ 784 movl k(k4),tmp2; \ 785 movdqa B(b3),a3; \ 786 movdqa B(b4),a4; \ 787 pxor (tmp1),a3; \ 788 movl k(k6),tmp1; \ 789 pxor (tmp2),a4 790 791#define xor_B_KS_p_suffix(b5, k5) \ 792 movl k(k5),tmp2; \ 793 movdqa B(b5),a5; \ 794 pxor (tmp1),a6; \ 795 pxor (tmp2),a5 796 797#define xor_B_KS_p(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, b6, k6) \ 798 xor_B_KS_p_prefix(b1, k1, b2, k2, b3, k3, b4, k4, k6); \ 799 movdqa B(b6),a6; \ 800 xor_B_KS_p_suffix(b5, k5) 801 802#define xor_B_KS_p_special(b1, k1, b2, k2, b3, k3, b4, k4, b5, k5, k6) \ 803 xor_B_KS_p_prefix(b1, k1, b2, k2, b3, k3, b4, k4, k6); \ 804 xor_B_KS_p_suffix(b5, k5) 805 806#define mask01 tmp_at(15) 807 808#define v_ptr %eax 809#define V(i) nvec(i)(v_ptr) 810 811#if 1 812#define SHLB1(reg) paddb reg,reg 813#else 814#define SHLB1(reg) psllq $1,reg 815#endif 816 817#define FINALIZE_NEXT_KEY_BITS_0_6 \ 818 movdqa mask01,%xmm7; \ 819\ 820 movdqa V(0),%xmm0; \ 821 movdqa V(1),%xmm1; \ 822 movdqa V(2),%xmm2; \ 823 movdqa V(3),%xmm3; \ 824 pand %xmm7,%xmm0; \ 825 pand %xmm7,%xmm1; \ 826 pand %xmm7,%xmm2; \ 827 pand %xmm7,%xmm3; \ 828 SHLB1(%xmm1); \ 829 psllq $2,%xmm2; \ 830 psllq $3,%xmm3; \ 831 por %xmm0,%xmm1; \ 832 por %xmm2,%xmm3; \ 833 movdqa V(4),%xmm4; \ 834 movdqa V(5),%xmm5; \ 835 por %xmm1,%xmm3; \ 836 pand %xmm7,%xmm4; \ 837 pand %xmm7,%xmm5; \ 838 movdqa V(6),%xmm6; \ 839 movdqa V(7),%xmm0; \ 840 psllq $4,%xmm4; \ 841 pand %xmm7,%xmm6; \ 842 pand %xmm7,%xmm0; \ 843 psllq $5,%xmm5; \ 844 psllq $6,%xmm6; \ 845 psllq $7,%xmm0; \ 846 por %xmm4,%xmm5; \ 847 por %xmm6,%xmm3; \ 848 por %xmm5,%xmm0; \ 849 movdqa V(1),%xmm1; \ 850 por %xmm3,%xmm0; \ 851 SHLB1(%xmm7); \ 852 movdqa %xmm0,K(0); \ 853\ 854 movdqa V(0),%xmm0; \ 855 movdqa V(2),%xmm2; \ 856 movdqa V(3),%xmm3; \ 857 pand %xmm7,%xmm0; \ 858 pand %xmm7,%xmm1; \ 859 pand %xmm7,%xmm2; \ 860 pand %xmm7,%xmm3; \ 861 psrlq $1,%xmm0; \ 862 SHLB1(%xmm2); \ 863 psllq $2,%xmm3; \ 864 por %xmm0,%xmm1; \ 865 por %xmm2,%xmm3; \ 866 movdqa V(4),%xmm4; \ 867 movdqa V(5),%xmm5; \ 868 por %xmm1,%xmm3; \ 869 pand %xmm7,%xmm4; \ 870 pand %xmm7,%xmm5; \ 871 movdqa V(6),%xmm6; \ 872 movdqa V(7),%xmm0; \ 873 psllq $3,%xmm4; \ 874 pand %xmm7,%xmm6; \ 875 pand %xmm7,%xmm0; \ 876 psllq $4,%xmm5; \ 877 psllq $5,%xmm6; \ 878 psllq $6,%xmm0; \ 879 por %xmm4,%xmm5; \ 880 por %xmm6,%xmm3; \ 881 por %xmm5,%xmm0; \ 882 movdqa V(1),%xmm1; \ 883 por %xmm3,%xmm0; \ 884 SHLB1(%xmm7); \ 885 movdqa %xmm0,K(1); \ 886\ 887 movdqa V(0),%xmm0; \ 888 movdqa V(2),%xmm2; \ 889 movdqa V(3),%xmm3; \ 890 pand %xmm7,%xmm0; \ 891 pand %xmm7,%xmm1; \ 892 pand %xmm7,%xmm2; \ 893 pand %xmm7,%xmm3; \ 894 psrlq $2,%xmm0; \ 895 psrlq $1,%xmm1; \ 896 SHLB1(%xmm3); \ 897 por %xmm0,%xmm1; \ 898 por %xmm2,%xmm3; \ 899 movdqa V(4),%xmm4; \ 900 movdqa V(5),%xmm5; \ 901 por %xmm1,%xmm3; \ 902 pand %xmm7,%xmm4; \ 903 pand %xmm7,%xmm5; \ 904 movdqa V(6),%xmm6; \ 905 movdqa V(7),%xmm0; \ 906 psllq $2,%xmm4; \ 907 pand %xmm7,%xmm6; \ 908 pand %xmm7,%xmm0; \ 909 psllq $3,%xmm5; \ 910 psllq $4,%xmm6; \ 911 psllq $5,%xmm0; \ 912 por %xmm4,%xmm5; \ 913 por %xmm6,%xmm3; \ 914 por %xmm5,%xmm0; \ 915 movdqa V(1),%xmm1; \ 916 por %xmm3,%xmm0; \ 917 SHLB1(%xmm7); \ 918 movdqa %xmm0,K(2); \ 919\ 920 movdqa V(0),%xmm0; \ 921 movdqa V(2),%xmm2; \ 922 movdqa V(3),%xmm3; \ 923 pand %xmm7,%xmm0; \ 924 pand %xmm7,%xmm1; \ 925 pand %xmm7,%xmm2; \ 926 pand %xmm7,%xmm3; \ 927 psrlq $3,%xmm0; \ 928 psrlq $2,%xmm1; \ 929 psrlq $1,%xmm2; \ 930 por %xmm0,%xmm1; \ 931 por %xmm2,%xmm3; \ 932 movdqa V(4),%xmm4; \ 933 movdqa V(5),%xmm5; \ 934 por %xmm1,%xmm3; \ 935 pand %xmm7,%xmm4; \ 936 pand %xmm7,%xmm5; \ 937 movdqa V(6),%xmm6; \ 938 movdqa V(7),%xmm0; \ 939 SHLB1(%xmm4); \ 940 pand %xmm7,%xmm6; \ 941 pand %xmm7,%xmm0; \ 942 psllq $2,%xmm5; \ 943 psllq $3,%xmm6; \ 944 psllq $4,%xmm0; \ 945 por %xmm4,%xmm5; \ 946 por %xmm6,%xmm3; \ 947 por %xmm5,%xmm0; \ 948 movdqa V(1),%xmm1; \ 949 por %xmm3,%xmm0; \ 950 SHLB1(%xmm7); \ 951 movdqa %xmm0,K(3); \ 952\ 953 movdqa V(0),%xmm0; \ 954 movdqa V(2),%xmm2; \ 955 movdqa V(3),%xmm3; \ 956 pand %xmm7,%xmm0; \ 957 pand %xmm7,%xmm1; \ 958 pand %xmm7,%xmm2; \ 959 pand %xmm7,%xmm3; \ 960 psrlq $4,%xmm0; \ 961 psrlq $3,%xmm1; \ 962 psrlq $2,%xmm2; \ 963 psrlq $1,%xmm3; \ 964 por %xmm0,%xmm1; \ 965 por %xmm2,%xmm3; \ 966 movdqa V(4),%xmm4; \ 967 movdqa V(5),%xmm5; \ 968 por %xmm1,%xmm3; \ 969 pand %xmm7,%xmm4; \ 970 pand %xmm7,%xmm5; \ 971 movdqa V(6),%xmm6; \ 972 movdqa V(7),%xmm0; \ 973 pand %xmm7,%xmm6; \ 974 pand %xmm7,%xmm0; \ 975 SHLB1(%xmm5); \ 976 psllq $2,%xmm6; \ 977 psllq $3,%xmm0; \ 978 por %xmm4,%xmm5; \ 979 por %xmm6,%xmm3; \ 980 por %xmm5,%xmm0; \ 981 movdqa V(1),%xmm1; \ 982 por %xmm3,%xmm0; \ 983 SHLB1(%xmm7); \ 984 movdqa %xmm0,K(4); \ 985\ 986 movdqa V(0),%xmm0; \ 987 movdqa V(2),%xmm2; \ 988 movdqa V(3),%xmm3; \ 989 pand %xmm7,%xmm0; \ 990 pand %xmm7,%xmm1; \ 991 pand %xmm7,%xmm2; \ 992 pand %xmm7,%xmm3; \ 993 psrlq $5,%xmm0; \ 994 psrlq $4,%xmm1; \ 995 psrlq $3,%xmm2; \ 996 psrlq $2,%xmm3; \ 997 por %xmm0,%xmm1; \ 998 por %xmm2,%xmm3; \ 999 movdqa V(4),%xmm4; \ 1000 movdqa V(5),%xmm5; \ 1001 por %xmm1,%xmm3; \ 1002 pand %xmm7,%xmm4; \ 1003 pand %xmm7,%xmm5; \ 1004 movdqa V(6),%xmm6; \ 1005 movdqa V(7),%xmm0; \ 1006 psrlq $1,%xmm4; \ 1007 pand %xmm7,%xmm6; \ 1008 pand %xmm7,%xmm0; \ 1009 SHLB1(%xmm6); \ 1010 psllq $2,%xmm0; \ 1011 por %xmm4,%xmm5; \ 1012 por %xmm6,%xmm3; \ 1013 por %xmm5,%xmm0; \ 1014 movdqa V(1),%xmm1; \ 1015 por %xmm3,%xmm0; \ 1016 SHLB1(%xmm7); \ 1017 movdqa %xmm0,K(5); \ 1018\ 1019 movdqa V(0),%xmm0; \ 1020 movdqa V(2),%xmm2; \ 1021 movdqa V(3),%xmm3; \ 1022 pand %xmm7,%xmm0; \ 1023 pand %xmm7,%xmm1; \ 1024 pand %xmm7,%xmm2; \ 1025 pand %xmm7,%xmm3; \ 1026 psrlq $6,%xmm0; \ 1027 psrlq $5,%xmm1; \ 1028 psrlq $4,%xmm2; \ 1029 psrlq $3,%xmm3; \ 1030 por %xmm0,%xmm1; \ 1031 por %xmm2,%xmm3; \ 1032 movdqa V(4),%xmm4; \ 1033 movdqa V(5),%xmm5; \ 1034 por %xmm1,%xmm3; \ 1035 pand %xmm7,%xmm4; \ 1036 pand %xmm7,%xmm5; \ 1037 movdqa V(6),%xmm6; \ 1038 movdqa V(7),%xmm0; \ 1039 psrlq $2,%xmm4; \ 1040 pand %xmm7,%xmm6; \ 1041 pand %xmm7,%xmm0; \ 1042 psrlq $1,%xmm5; \ 1043 SHLB1(%xmm0); \ 1044 por %xmm4,%xmm5; \ 1045 por %xmm6,%xmm3; \ 1046 por %xmm5,%xmm0; \ 1047 por %xmm3,%xmm0; \ 1048 movdqa %xmm0,K(6) 1049 1050.text 1051 1052DO_ALIGN(6) 1053.globl DES_bs_init_asm 1054DES_bs_init_asm: 1055 pcmpeqd %xmm0,%xmm0 1056 movdqa %xmm0,pnot 1057 paddb %xmm0,%xmm0 1058 pxor pnot,%xmm0 1059 movdqa %xmm0,mask01 1060 ret 1061 1062#define rounds_and_swapped %ebp 1063#define iterations %eax 1064 1065DO_ALIGN(6) 1066.globl DES_bs_crypt 1067DES_bs_crypt: 1068 cmpl $0,DES_bs_all_keys_changed 1069 jz DES_bs_crypt_body 1070 call DES_bs_finalize_keys 1071DES_bs_crypt_body: 1072 movl 4(%esp),iterations 1073 pxor zero,zero 1074 pushl %ebp 1075 pushl %esi 1076 movl $DES_bs_all_KS_v,k_ptr 1077 DES_bs_clear_block 1078 movl $8,rounds_and_swapped 1079DES_bs_crypt_start: 1080 xor_E(0) 1081 S1(B(40), B(48), B(54), B(62)) 1082 xor_E(6) 1083 S2(B(44), B(59), B(33), B(49)) 1084 xor_E(12) 1085 S3(B(55), B(47), B(61), B(37)) 1086 xor_E(18) 1087 S4(B(57), B(51), B(41), B(32)) 1088 xor_E(24) 1089 S5(B(39), B(45), B(56), B(34)) 1090 xor_E(30) 1091 S6(B(35), B(60), B(42), B(50)) 1092 xor_E(36) 1093 S7(B(63), B(43), B(53), B(38)) 1094 xor_E(42) 1095 S8(B(36), B(58), B(46), B(52)) 1096 cmpl $0x100,rounds_and_swapped 1097 je DES_bs_crypt_next 1098DES_bs_crypt_swap: 1099 xor_E(48) 1100 S1(B(8), B(16), B(22), B(30)) 1101 xor_E(54) 1102 S2(B(12), B(27), B(1), B(17)) 1103 xor_E(60) 1104 S3(B(23), B(15), B(29), B(5)) 1105 xor_E(66) 1106 S4(B(25), B(19), B(9), B(0)) 1107 xor_E(72) 1108 S5(B(7), B(13), B(24), B(2)) 1109 xor_E(78) 1110 S6(B(3), B(28), B(10), B(18)) 1111 xor_E(84) 1112 S7(B(31), B(11), B(21), B(6)) 1113 xor_E(90) 1114 addl $nvec(96),k_ptr 1115 S8(B(4), B(26), B(14), B(20)) 1116 decl rounds_and_swapped 1117 jnz DES_bs_crypt_start 1118 subl $nvec(0x300+48),k_ptr 1119 movl $0x108,rounds_and_swapped 1120 decl iterations 1121 jnz DES_bs_crypt_swap 1122 popl %esi 1123 popl %ebp 1124 ret 1125DES_bs_crypt_next: 1126 subl $nvec(0x300-48),k_ptr 1127 movl $8,rounds_and_swapped 1128 decl iterations 1129 jnz DES_bs_crypt_start 1130 popl %esi 1131 popl %ebp 1132 ret 1133 1134DO_ALIGN(6) 1135.globl DES_bs_crypt_25 1136DES_bs_crypt_25: 1137 cmpl $0,DES_bs_all_keys_changed 1138 jnz DES_bs_finalize_keys_25 1139DES_bs_crypt_25_body: 1140 pxor zero,zero 1141 pushl %ebp 1142 pushl %esi 1143 movl $DES_bs_all_KS_v,k_ptr 1144 DES_bs_clear_block 1145 movl $8,rounds_and_swapped 1146 movl $25,iterations 1147DES_bs_crypt_25_start: 1148 xor_E(0) 1149 S1(B(40), B(48), B(54), B(62)) 1150 xor_E(6) 1151 S2(B(44), B(59), B(33), B(49)) 1152 xor_B(7, 12, 8, 13, 9, 14, 10, 15, 11, 16, 12, 17) 1153 S3(B(55), B(47), B(61), B(37)) 1154 xor_B(11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 23) 1155 S4(B(57), B(51), B(41), B(32)) 1156 xor_E(24) 1157 S5(B(39), B(45), B(56), B(34)) 1158 xor_E(30) 1159 S6(B(35), B(60), B(42), B(50)) 1160 xor_B(23, 36, 24, 37, 25, 38, 26, 39, 27, 40, 28, 41) 1161 S7(B(63), B(43), B(53), B(38)) 1162 xor_B(27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 0, 47) 1163 S8(B(36), B(58), B(46), B(52)) 1164 cmpl $0x100,rounds_and_swapped 1165 je DES_bs_crypt_25_next 1166DES_bs_crypt_25_swap: 1167 xor_E(48) 1168 S1(B(8), B(16), B(22), B(30)) 1169 xor_E(54) 1170 S2(B(12), B(27), B(1), B(17)) 1171 xor_B(39, 60, 40, 61, 41, 62, 42, 63, 43, 64, 44, 65) 1172 S3(B(23), B(15), B(29), B(5)) 1173 xor_B(43, 66, 44, 67, 45, 68, 46, 69, 47, 70, 48, 71) 1174 S4(B(25), B(19), B(9), B(0)) 1175 xor_E(72) 1176 S5(B(7), B(13), B(24), B(2)) 1177 xor_E(78) 1178 S6(B(3), B(28), B(10), B(18)) 1179 xor_B(55, 84, 56, 85, 57, 86, 58, 87, 59, 88, 60, 89) 1180 S7(B(31), B(11), B(21), B(6)) 1181 xor_B(59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 32, 95) 1182 S8(B(4), B(26), B(14), B(20)) 1183 addl $nvec(96),k_ptr 1184 decl rounds_and_swapped 1185 jnz DES_bs_crypt_25_start 1186 subl $nvec(0x300+48),k_ptr 1187 movl $0x108,rounds_and_swapped 1188 decl iterations 1189 jnz DES_bs_crypt_25_swap 1190 popl %esi 1191 popl %ebp 1192 ret 1193DES_bs_crypt_25_next: 1194 subl $nvec(0x300-48),k_ptr 1195 movl $8,rounds_and_swapped 1196 decl iterations 1197 jmp DES_bs_crypt_25_start 1198 1199DES_bs_finalize_keys_25: 1200 pushl $DES_bs_crypt_25_body 1201DES_bs_finalize_keys: 1202 movl $DES_bs_all_xkeys,v_ptr 1203 movl $DES_bs_all_K,k_ptr 1204 movl $0,DES_bs_all_keys_changed 1205DES_bs_finalize_keys_main_loop: 1206 FINALIZE_NEXT_KEY_BITS_0_6 1207 addl $nvec(7),k_ptr 1208 addl $nvec(8),v_ptr 1209 cmpl $DES_bs_all_K+nvec(56),k_ptr 1210 jb DES_bs_finalize_keys_main_loop 1211 pushl %esi 1212 movl $DES_bs_all_KSp,k_ptr 1213 movl $DES_bs_all_KS_v,v_ptr 1214DES_bs_finalize_keys_expand_loop: 1215 movl k(0),tmp1 1216 movl k(1),tmp2 1217 movdqa (tmp1),%xmm0 1218 movdqa (tmp2),%xmm1 1219 movl k(2),tmp1 1220 movl k(3),tmp2 1221 movdqa %xmm0,V(0) 1222 movdqa %xmm1,V(1) 1223 movdqa (tmp1),%xmm0 1224 movdqa (tmp2),%xmm1 1225 movl k(4),tmp1 1226 movl k(5),tmp2 1227 movdqa %xmm0,V(2) 1228 movdqa %xmm1,V(3) 1229 movdqa (tmp1),%xmm0 1230 movdqa (tmp2),%xmm1 1231 movl k(6),tmp1 1232 movl k(7),tmp2 1233 movdqa %xmm0,V(4) 1234 movdqa %xmm1,V(5) 1235 movdqa (tmp1),%xmm0 1236 movdqa (tmp2),%xmm1 1237 addl $nptr(8),k_ptr 1238 movdqa %xmm0,V(6) 1239 movdqa %xmm1,V(7) 1240 addl $nvec(8),v_ptr 1241 cmpl $DES_bs_all_KSp+nptr(0x300),k_ptr 1242 jb DES_bs_finalize_keys_expand_loop 1243 popl %esi 1244 ret 1245 1246#define ones %xmm1 1247 1248#define rounds %eax 1249 1250DO_ALIGN(6) 1251.globl DES_bs_crypt_LM 1252DES_bs_crypt_LM: 1253 movl $DES_bs_all_xkeys,v_ptr 1254 movl $DES_bs_all_K,k_ptr 1255DES_bs_finalize_keys_LM_loop: 1256 FINALIZE_NEXT_KEY_BITS_0_6 1257# bit 7 1258 SHLB1(%xmm7) 1259 movdqa V(0),%xmm0 1260 movdqa V(1),%xmm1 1261 movdqa V(2),%xmm2 1262 movdqa V(3),%xmm3 1263 pand %xmm7,%xmm0 1264 pand %xmm7,%xmm1 1265 pand %xmm7,%xmm2 1266 pand %xmm7,%xmm3 1267 psrlq $7,%xmm0 1268 psrlq $6,%xmm1 1269 psrlq $5,%xmm2 1270 psrlq $4,%xmm3 1271 por %xmm0,%xmm1 1272 por %xmm2,%xmm3 1273 movdqa V(4),%xmm4 1274 movdqa V(5),%xmm5 1275 por %xmm1,%xmm3 1276 pand %xmm7,%xmm4 1277 pand %xmm7,%xmm5 1278 movdqa V(6),%xmm6 1279 movdqa V(7),%xmm0 1280 psrlq $3,%xmm4 1281 pand %xmm7,%xmm6 1282 pand %xmm7,%xmm0 1283 psrlq $2,%xmm5 1284 psrlq $1,%xmm6 1285 por %xmm4,%xmm5 1286 por %xmm6,%xmm3 1287 por %xmm5,%xmm0 1288 addl $nvec(8),v_ptr 1289 por %xmm3,%xmm0 1290 movdqa %xmm0,K(7) 1291 addl $nvec(8),k_ptr 1292 cmpl $DES_bs_all_K+nvec(56),k_ptr 1293 jb DES_bs_finalize_keys_LM_loop 1294 1295 pxor zero,zero 1296 pushl %esi 1297 pcmpeqd ones,ones 1298 movl $DES_bs_all_KS_p,k_ptr 1299 movdqa zero,B(0) 1300 movdqa zero,B(1) 1301 movdqa zero,B(2) 1302 movdqa zero,B(3) 1303 movdqa zero,B(4) 1304 movdqa zero,B(5) 1305 movdqa zero,B(6) 1306 movdqa zero,B(7) 1307 movdqa ones,B(8) 1308 movdqa ones,B(9) 1309 movdqa ones,B(10) 1310 movdqa zero,B(11) 1311 movdqa ones,B(12) 1312 movdqa zero,B(13) 1313 movdqa zero,B(14) 1314 movdqa zero,B(15) 1315 movdqa zero,B(16) 1316 movdqa zero,B(17) 1317 movdqa zero,B(18) 1318 movdqa zero,B(19) 1319 movdqa zero,B(20) 1320 movdqa zero,B(21) 1321 movdqa zero,B(22) 1322 movdqa ones,B(23) 1323 movdqa zero,B(24) 1324 movdqa zero,B(25) 1325 movdqa ones,B(26) 1326 movdqa zero,B(27) 1327 movdqa zero,B(28) 1328 movdqa ones,B(29) 1329 movdqa ones,B(30) 1330 movdqa ones,B(31) 1331 movdqa zero,B(32) 1332 movdqa zero,B(33) 1333 movdqa zero,B(34) 1334 movdqa ones,B(35) 1335 movdqa zero,B(36) 1336 movdqa ones,B(37) 1337 movdqa ones,B(38) 1338 movdqa ones,B(39) 1339 movdqa zero,B(40) 1340 movdqa zero,B(41) 1341 movdqa zero,B(42) 1342 movdqa zero,B(43) 1343 movdqa zero,B(44) 1344 movdqa ones,B(45) 1345 movdqa zero,B(46) 1346 movdqa zero,B(47) 1347 movdqa ones,B(48) 1348 movdqa ones,B(49) 1349 movdqa zero,B(50) 1350 movdqa zero,B(51) 1351 movdqa zero,B(52) 1352 movdqa zero,B(53) 1353 movdqa ones,B(54) 1354 movdqa zero,B(55) 1355 movdqa ones,B(56) 1356 movdqa zero,B(57) 1357 movdqa ones,B(58) 1358 movdqa zero,B(59) 1359 movdqa ones,B(60) 1360 movdqa ones,B(61) 1361 movdqa ones,B(62) 1362 movdqa ones,B(63) 1363 movl $8,rounds 1364DES_bs_crypt_LM_loop: 1365 xor_B_KS_p_special(31, 0, 0, 1, 1, 2, 2, 3, 3, 4, 5) 1366 S1(B(40), B(48), B(54), B(62)) 1367 xor_B_KS_p(3, 6, 4, 7, 5, 8, 6, 9, 7, 10, 8, 11) 1368 S2(B(44), B(59), B(33), B(49)) 1369 xor_B_KS_p(7, 12, 8, 13, 9, 14, 10, 15, 11, 16, 12, 17) 1370 S3(B(55), B(47), B(61), B(37)) 1371 xor_B_KS_p(11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 23) 1372 S4(B(57), B(51), B(41), B(32)) 1373 xor_B_KS_p(15, 24, 16, 25, 17, 26, 18, 27, 19, 28, 20, 29) 1374 S5(B(39), B(45), B(56), B(34)) 1375 xor_B_KS_p(19, 30, 20, 31, 21, 32, 22, 33, 23, 34, 24, 35) 1376 S6(B(35), B(60), B(42), B(50)) 1377 xor_B_KS_p(23, 36, 24, 37, 25, 38, 26, 39, 27, 40, 28, 41) 1378 S7(B(63), B(43), B(53), B(38)) 1379 xor_B_KS_p(27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 0, 47) 1380 S8(B(36), B(58), B(46), B(52)) 1381 xor_B_KS_p_special(63, 48, 32, 49, 33, 50, 34, 51, 35, 52, 53) 1382 S1(B(8), B(16), B(22), B(30)) 1383 xor_B_KS_p(35, 54, 36, 55, 37, 56, 38, 57, 39, 58, 40, 59) 1384 S2(B(12), B(27), B(1), B(17)) 1385 xor_B_KS_p(39, 60, 40, 61, 41, 62, 42, 63, 43, 64, 44, 65) 1386 S3(B(23), B(15), B(29), B(5)) 1387 xor_B_KS_p(43, 66, 44, 67, 45, 68, 46, 69, 47, 70, 48, 71) 1388 S4(B(25), B(19), B(9), B(0)) 1389 xor_B_KS_p(47, 72, 48, 73, 49, 74, 50, 75, 51, 76, 52, 77) 1390 S5(B(7), B(13), B(24), B(2)) 1391 xor_B_KS_p(51, 78, 52, 79, 53, 80, 54, 81, 55, 82, 56, 83) 1392 S6(B(3), B(28), B(10), B(18)) 1393 xor_B_KS_p(55, 84, 56, 85, 57, 86, 58, 87, 59, 88, 60, 89) 1394 S7(B(31), B(11), B(21), B(6)) 1395 xor_B_KS_p(59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 32, 95) 1396 addl $nptr(96),k_ptr 1397 S8(B(4), B(26), B(14), B(20)) 1398 decl rounds 1399 jnz DES_bs_crypt_LM_loop 1400 movl 8(%esp),%eax 1401 popl %esi 1402 movl (%eax),%eax 1403 ret 1404 1405#define rounds %eax 1406 1407DO_ALIGN(6) 1408.globl DES_bs_crypt_plain 1409DES_bs_crypt_plain: 1410 movl $DES_bs_all_xkeys,v_ptr 1411 movl $DES_bs_all_K,k_ptr 1412 movdqa P(0),%xmm4 1413 movdqa %xmm4,B(0) 1414 movdqa P(1),%xmm4 1415 movdqa %xmm4,B(1) 1416 movdqa P(2),%xmm4 1417 movdqa %xmm4,B(2) 1418 movdqa P(3),%xmm4 1419 movdqa %xmm4,B(3) 1420 movdqa P(4),%xmm4 1421 movdqa %xmm4,B(4) 1422 movdqa P(5),%xmm4 1423 movdqa %xmm4,B(5) 1424 movdqa P(6),%xmm4 1425 movdqa %xmm4,B(6) 1426 movdqa P(7),%xmm4 1427 movdqa %xmm4,B(7) 1428 movdqa P(8),%xmm4 1429 movdqa %xmm4,B(8) 1430 movdqa P(9),%xmm4 1431 movdqa %xmm4,B(9) 1432 movdqa P(10),%xmm4 1433 movdqa %xmm4,B(10) 1434 movdqa P(11),%xmm4 1435 movdqa %xmm4,B(11) 1436 movdqa P(12),%xmm4 1437 movdqa %xmm4,B(12) 1438 movdqa P(13),%xmm4 1439 movdqa %xmm4,B(13) 1440 movdqa P(14),%xmm4 1441 movdqa %xmm4,B(14) 1442 movdqa P(15),%xmm4 1443 movdqa %xmm4,B(15) 1444 movdqa P(16),%xmm4 1445 movdqa %xmm4,B(16) 1446 movdqa P(17),%xmm4 1447 movdqa %xmm4,B(17) 1448 movdqa P(18),%xmm4 1449 movdqa %xmm4,B(18) 1450 movdqa P(19),%xmm4 1451 movdqa %xmm4,B(19) 1452 movdqa P(20),%xmm4 1453 movdqa %xmm4,B(20) 1454 movdqa P(21),%xmm4 1455 movdqa %xmm4,B(21) 1456 movdqa P(22),%xmm4 1457 movdqa %xmm4,B(22) 1458 movdqa P(23),%xmm4 1459 movdqa %xmm4,B(23) 1460 movdqa P(24),%xmm4 1461 movdqa %xmm4,B(24) 1462 movdqa P(25),%xmm4 1463 movdqa %xmm4,B(25) 1464 movdqa P(26),%xmm4 1465 movdqa %xmm4,B(26) 1466 movdqa P(27),%xmm4 1467 movdqa %xmm4,B(27) 1468 movdqa P(28),%xmm4 1469 movdqa %xmm4,B(28) 1470 movdqa P(29),%xmm4 1471 movdqa %xmm4,B(29) 1472 movdqa P(30),%xmm4 1473 movdqa %xmm4,B(30) 1474 movdqa P(31),%xmm4 1475 movdqa %xmm4,B(31) 1476 movdqa P(32),%xmm4 1477 movdqa %xmm4,B(32) 1478 movdqa P(33),%xmm4 1479 movdqa %xmm4,B(33) 1480 movdqa P(34),%xmm4 1481 movdqa %xmm4,B(34) 1482 movdqa P(35),%xmm4 1483 movdqa %xmm4,B(35) 1484 movdqa P(36),%xmm4 1485 movdqa %xmm4,B(36) 1486 movdqa P(37),%xmm4 1487 movdqa %xmm4,B(37) 1488 movdqa P(38),%xmm4 1489 movdqa %xmm4,B(38) 1490 movdqa P(39),%xmm4 1491 movdqa %xmm4,B(39) 1492 movdqa P(40),%xmm4 1493 movdqa %xmm4,B(40) 1494 movdqa P(41),%xmm4 1495 movdqa %xmm4,B(41) 1496 movdqa P(42),%xmm4 1497 movdqa %xmm4,B(42) 1498 movdqa P(43),%xmm4 1499 movdqa %xmm4,B(43) 1500 movdqa P(44),%xmm4 1501 movdqa %xmm4,B(44) 1502 movdqa P(45),%xmm4 1503 movdqa %xmm4,B(45) 1504 movdqa P(46),%xmm4 1505 movdqa %xmm4,B(46) 1506 movdqa P(47),%xmm4 1507 movdqa %xmm4,B(47) 1508 movdqa P(48),%xmm4 1509 movdqa %xmm4,B(48) 1510 movdqa P(49),%xmm4 1511 movdqa %xmm4,B(49) 1512 movdqa P(50),%xmm4 1513 movdqa %xmm4,B(50) 1514 movdqa P(51),%xmm4 1515 movdqa %xmm4,B(51) 1516 movdqa P(52),%xmm4 1517 movdqa %xmm4,B(52) 1518 movdqa P(53),%xmm4 1519 movdqa %xmm4,B(53) 1520 movdqa P(54),%xmm4 1521 movdqa %xmm4,B(54) 1522 movdqa P(55),%xmm4 1523 movdqa %xmm4,B(55) 1524 movdqa P(56),%xmm4 1525 movdqa %xmm4,B(56) 1526 movdqa P(57),%xmm4 1527 movdqa %xmm4,B(57) 1528 movdqa P(58),%xmm4 1529 movdqa %xmm4,B(58) 1530 movdqa P(59),%xmm4 1531 movdqa %xmm4,B(59) 1532 movdqa P(60),%xmm4 1533 movdqa %xmm4,B(60) 1534 movdqa P(61),%xmm4 1535 movdqa %xmm4,B(61) 1536 movdqa P(62),%xmm4 1537 movdqa %xmm4,B(62) 1538 movdqa P(63),%xmm4 1539 movdqa %xmm4,B(63) 1540 1541DES_bs_finalize_keys_plain_loop: 1542 FINALIZE_NEXT_KEY_BITS_0_6 1543 addl $nvec(7),k_ptr 1544 addl $nvec(8),v_ptr 1545 cmpl $DES_bs_all_K+nvec(56),k_ptr 1546 jb DES_bs_finalize_keys_plain_loop 1547 pushl %esi 1548 movl $DES_bs_all_KS_p,k_ptr 1549 movl $DES_bs_all_KS_v,v_ptr 1550 movl $8,rounds 1551DES_bs_crypt_plain_loop: 1552 xor_B_KS_p(31, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5) 1553 S1(B(40), B(48), B(54), B(62)) 1554 xor_B_KS_p(3, 6, 4, 7, 5, 8, 6, 9, 7, 10, 8, 11) 1555 S2(B(44), B(59), B(33), B(49)) 1556 xor_B_KS_p(7, 12, 8, 13, 9, 14, 10, 15, 11, 16, 12, 17) 1557 S3(B(55), B(47), B(61), B(37)) 1558 xor_B_KS_p(11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 23) 1559 S4(B(57), B(51), B(41), B(32)) 1560 xor_B_KS_p(15, 24, 16, 25, 17, 26, 18, 27, 19, 28, 20, 29) 1561 S5(B(39), B(45), B(56), B(34)) 1562 xor_B_KS_p(19, 30, 20, 31, 21, 32, 22, 33, 23, 34, 24, 35) 1563 S6(B(35), B(60), B(42), B(50)) 1564 xor_B_KS_p(23, 36, 24, 37, 25, 38, 26, 39, 27, 40, 28, 41) 1565 S7(B(63), B(43), B(53), B(38)) 1566 xor_B_KS_p(27, 42, 28, 43, 29, 44, 30, 45, 31, 46, 0, 47) 1567 S8(B(36), B(58), B(46), B(52)) 1568 xor_B_KS_p(63, 48, 32, 49, 33, 50, 34, 51, 35, 52, 36, 53) 1569 S1(B(8), B(16), B(22), B(30)) 1570 xor_B_KS_p(35, 54, 36, 55, 37, 56, 38, 57, 39, 58, 40, 59) 1571 S2(B(12), B(27), B(1), B(17)) 1572 xor_B_KS_p(39, 60, 40, 61, 41, 62, 42, 63, 43, 64, 44, 65) 1573 S3(B(23), B(15), B(29), B(5)) 1574 xor_B_KS_p(43, 66, 44, 67, 45, 68, 46, 69, 47, 70, 48, 71) 1575 S4(B(25), B(19), B(9), B(0)) 1576 xor_B_KS_p(47, 72, 48, 73, 49, 74, 50, 75, 51, 76, 52, 77) 1577 S5(B(7), B(13), B(24), B(2)) 1578 xor_B_KS_p(51, 78, 52, 79, 53, 80, 54, 81, 55, 82, 56, 83) 1579 S6(B(3), B(28), B(10), B(18)) 1580 xor_B_KS_p(55, 84, 56, 85, 57, 86, 58, 87, 59, 88, 60, 89) 1581 S7(B(31), B(11), B(21), B(6)) 1582 xor_B_KS_p(59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 32, 95) 1583 addl $nptr(96),k_ptr 1584 S8(B(4), B(26), B(14), B(20)) 1585 decl rounds 1586 jnz DES_bs_crypt_plain_loop 1587 popl %esi 1588 ret 1589 1590 1591#endif 1592 1593#if defined(__ELF__) && defined(__linux__) 1594.section .note.GNU-stack,"",@progbits 1595#endif 1596