1! des_enc.m4 2! des_enc.S (generated from des_enc.m4) 3! 4! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file. 5! 6! Version 1.0. 32-bit version. 7! 8! June 8, 2000. 9! 10! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation 11! by Andy Polyakov. 12! 13! January 1, 2003. 14! 15! Assembler version: Copyright Svend Olaf Mikkelsen. 16! 17! Original C code: Copyright Eric A. Young. 18! 19! This code can be freely used by LibDES/SSLeay/OpenSSL users. 20! 21! The LibDES/SSLeay/OpenSSL copyright notices must be respected. 22! 23! This version can be redistributed. 24! 25! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S 26! 27! Global registers 1 to 5 are used. This is the same as done by the 28! cc compiler. The UltraSPARC load/store little endian feature is used. 29! 30! Instruction grouping often refers to one CPU cycle. 31! 32! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S 33! 34! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S 35! 36! Performance improvement according to './apps/openssl speed des' 37! 38! 32-bit build: 39! 23% faster than cc-5.2 -xarch=v8plus -xO5 40! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 41! 64-bit build: 42! 50% faster than cc-5.2 -xarch=v9 -xO5 43! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 44! 45 46.ident "des_enc.m4 2.1" 47 48#if defined(__SUNPRO_C) && defined(__sparcv9) 49# define ABI64 /* They've said -xarch=v9 at command line */ 50#elif defined(__GNUC__) && defined(__arch64__) 51# define ABI64 /* They've said -m64 at command line */ 52#endif 53 54#ifdef ABI64 55 .register %g2,#scratch 56 .register %g3,#scratch 57# define FRAME -192 58# define BIAS 2047 59# define LDPTR ldx 60# define STPTR stx 61# define ARG0 128 62# define ARGSZ 8 63# ifndef OPENSSL_SYSNAME_ULTRASPARC 64# define OPENSSL_SYSNAME_ULTRASPARC 65# endif 66#else 67# define FRAME -96 68# define BIAS 0 69# define LDPTR ld 70# define STPTR st 71# define ARG0 68 72# define ARGSZ 4 73#endif 74 75#define LOOPS 7 76 77#define global0 %g0 78#define global1 %g1 79#define global2 %g2 80#define global3 %g3 81#define global4 %g4 82#define global5 %g5 83 84#define local0 %l0 85#define local1 %l1 86#define local2 %l2 87#define local3 %l3 88#define local4 %l4 89#define local5 %l5 90#define local7 %l6 91#define local6 %l7 92 93#define in0 %i0 94#define in1 %i1 95#define in2 %i2 96#define in3 %i3 97#define in4 %i4 98#define in5 %i5 99#define in6 %i6 100#define in7 %i7 101 102#define out0 %o0 103#define out1 %o1 104#define out2 %o2 105#define out3 %o3 106#define out4 %o4 107#define out5 %o5 108#define out6 %o6 109#define out7 %o7 110 111#define stub stb 112 113changequote({,}) 114 115 116! Macro definitions: 117 118 119! {ip_macro} 120! 121! The logic used in initial and final permutations is the same as in 122! the C code. The permutations are done with a clever shift, xor, and 123! technique. 124! 125! The macro also loads address sbox 1 to 5 to global 1 to 5, address 126! sbox 6 to local6, and addres sbox 8 to out3. 127! 128! Rotates the halfs 3 left to bring the sbox bits in convenient positions. 129! 130! Loads key first round from address in parameter 5 to out0, out1. 131! 132! After the the original LibDES initial permutation, the resulting left 133! is in the variable initially used for right and vice versa. The macro 134! implements the possibility to keep the halfs in the original registers. 135! 136! parameter 1 left 137! parameter 2 right 138! parameter 3 result left (modify in first round) 139! parameter 4 result right (use in first round) 140! parameter 5 key address 141! parameter 6 1/2 for include encryption/decryption 142! parameter 7 1 for move in1 to in3 143! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 144! parameter 9 1 for load ks3 and ks2 to in4 and in3 145 146define(ip_macro, { 147 148! {ip_macro} 149! $1 $2 $4 $3 $5 $6 $7 $8 $9 150 151 ld [out2+256], local1 152 srl $2, 4, local4 153 154 xor local4, $1, local4 155 ifelse($7,1,{mov in1, in3},{nop}) 156 157 ld [out2+260], local2 158 and local4, local1, local4 159 ifelse($8,1,{mov in3, in4},{}) 160 ifelse($8,2,{mov in4, in3},{}) 161 162 ld [out2+280], out4 ! loop counter 163 sll local4, 4, local1 164 xor $1, local4, $1 165 166 ld [out2+264], local3 167 srl $1, 16, local4 168 xor $2, local1, $2 169 170 ifelse($9,1,{LDPTR KS3, in4},{}) 171 xor local4, $2, local4 172 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr 173 174 ifelse($9,1,{LDPTR KS2, in3},{}) 175 and local4, local2, local4 176 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr 177 178 sll local4, 16, local1 179 xor $2, local4, $2 180 181 srl $2, 2, local4 182 xor $1, local1, $1 183 184 sethi %hi(16711680), local5 185 xor local4, $1, local4 186 187 and local4, local3, local4 188 or local5, 255, local5 189 190 sll local4, 2, local2 191 xor $1, local4, $1 192 193 srl $1, 8, local4 194 xor $2, local2, $2 195 196 xor local4, $2, local4 197 add global1, 768, global4 198 199 and local4, local5, local4 200 add global1, 1024, global5 201 202 ld [out2+272], local7 203 sll local4, 8, local1 204 xor $2, local4, $2 205 206 srl $2, 1, local4 207 xor $1, local1, $1 208 209 ld [$5], out0 ! key 7531 210 xor local4, $1, local4 211 add global1, 256, global2 212 213 ld [$5+4], out1 ! key 8642 214 and local4, local7, local4 215 add global1, 512, global3 216 217 sll local4, 1, local1 218 xor $1, local4, $1 219 220 sll $1, 3, local3 221 xor $2, local1, $2 222 223 sll $2, 3, local2 224 add global1, 1280, local6 ! address sbox 8 225 226 srl $1, 29, local4 227 add global1, 1792, out3 ! address sbox 8 228 229 srl $2, 29, local1 230 or local4, local3, $4 231 232 or local2, local1, $3 233 234 ifelse($6, 1, { 235 236 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 237 or local2, local1, $3 238 xor $4, out0, local1 239 240 call .des_enc.1 241 and local1, 252, local1 242 243 },{}) 244 245 ifelse($6, 2, { 246 247 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 248 or local2, local1, $3 249 xor $4, out0, local1 250 251 call .des_dec.1 252 and local1, 252, local1 253 254 },{}) 255}) 256 257 258! {rounds_macro} 259! 260! The logic used in the DES rounds is the same as in the C code, 261! except that calculations for sbox 1 and sbox 5 begin before 262! the previous round is finished. 263! 264! In each round one half (work) is modified based on key and the 265! other half (use). 266! 267! In this version we do two rounds in a loop repeated 7 times 268! and two rounds seperately. 269! 270! One half has the bits for the sboxes in the following positions: 271! 272! 777777xx555555xx333333xx111111xx 273! 274! 88xx666666xx444444xx222222xx8888 275! 276! The bits for each sbox are xor-ed with the key bits for that box. 277! The above xx bits are cleared, and the result used for lookup in 278! the sbox table. Each sbox entry contains the 4 output bits permuted 279! into 32 bits according to the P permutation. 280! 281! In the description of DES, left and right are switched after 282! each round, except after last round. In this code the original 283! left and right are kept in the same register in all rounds, meaning 284! that after the 16 rounds the result for right is in the register 285! originally used for left. 286! 287! parameter 1 first work (left in first round) 288! parameter 2 first use (right in first round) 289! parameter 3 enc/dec 1/-1 290! parameter 4 loop label 291! parameter 5 key address register 292! parameter 6 optional address for key next encryption/decryption 293! parameter 7 not empty for include retl 294! 295! also compares in2 to 8 296 297define(rounds_macro, { 298 299! {rounds_macro} 300! $1 $2 $3 $4 $5 $6 $7 $8 $9 301 302 xor $2, out0, local1 303 304 ld [out2+284], local5 ! 0x0000FC00 305 ba $4 306 and local1, 252, local1 307 308 .align 32 309 310$4: 311 ! local6 is address sbox 6 312 ! out3 is address sbox 8 313 ! out4 is loop counter 314 315 ld [global1+local1], local1 316 xor $2, out1, out1 ! 8642 317 xor $2, out0, out0 ! 7531 318 fmovs %f0, %f0 ! fxor used for alignment 319 320 srl out1, 4, local0 ! rotate 4 right 321 and out0, local5, local3 ! 3 322 fmovs %f0, %f0 323 324 ld [$5+$3*8], local7 ! key 7531 next round 325 srl local3, 8, local3 ! 3 326 and local0, 252, local2 ! 2 327 fmovs %f0, %f0 328 329 ld [global3+local3],local3 ! 3 330 sll out1, 28, out1 ! rotate 331 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 332 333 ld [global2+local2], local2 ! 2 334 srl out0, 24, local1 ! 7 335 or out1, local0, out1 ! rotate 336 337 ldub [out2+local1], local1 ! 7 (and 0xFC) 338 srl out1, 24, local0 ! 8 339 and out1, local5, local4 ! 4 340 341 ldub [out2+local0], local0 ! 8 (and 0xFC) 342 srl local4, 8, local4 ! 4 343 xor $1, local2, $1 ! 2 finished local2 now sbox 6 344 345 ld [global4+local4],local4 ! 4 346 srl out1, 16, local2 ! 6 347 xor $1, local3, $1 ! 3 finished local3 now sbox 5 348 349 ld [out3+local0],local0 ! 8 350 and local2, 252, local2 ! 6 351 add global1, 1536, local5 ! address sbox 7 352 353 ld [local6+local2], local2 ! 6 354 srl out0, 16, local3 ! 5 355 xor $1, local4, $1 ! 4 finished 356 357 ld [local5+local1],local1 ! 7 358 and local3, 252, local3 ! 5 359 xor $1, local0, $1 ! 8 finished 360 361 ld [global5+local3],local3 ! 5 362 xor $1, local2, $1 ! 6 finished 363 subcc out4, 1, out4 364 365 ld [$5+$3*8+4], out0 ! key 8642 next round 366 xor $1, local7, local2 ! sbox 5 next round 367 xor $1, local1, $1 ! 7 finished 368 369 srl local2, 16, local2 ! sbox 5 next round 370 xor $1, local3, $1 ! 5 finished 371 372 ld [$5+$3*16+4], out1 ! key 8642 next round again 373 and local2, 252, local2 ! sbox5 next round 374! next round 375 xor $1, local7, local7 ! 7531 376 377 ld [global5+local2], local2 ! 5 378 srl local7, 24, local3 ! 7 379 xor $1, out0, out0 ! 8642 380 381 ldub [out2+local3], local3 ! 7 (and 0xFC) 382 srl out0, 4, local0 ! rotate 4 right 383 and local7, 252, local1 ! 1 384 385 sll out0, 28, out0 ! rotate 386 xor $2, local2, $2 ! 5 finished local2 used 387 388 srl local0, 8, local4 ! 4 389 and local0, 252, local2 ! 2 390 ld [local5+local3], local3 ! 7 391 392 srl local0, 16, local5 ! 6 393 or out0, local0, out0 ! rotate 394 ld [global2+local2], local2 ! 2 395 396 srl out0, 24, local0 397 ld [$5+$3*16], out0 ! key 7531 next round 398 and local4, 252, local4 ! 4 399 400 and local5, 252, local5 ! 6 401 ld [global4+local4], local4 ! 4 402 xor $2, local3, $2 ! 7 finished local3 used 403 404 and local0, 252, local0 ! 8 405 ld [local6+local5], local5 ! 6 406 xor $2, local2, $2 ! 2 finished local2 now sbox 3 407 408 srl local7, 8, local2 ! 3 start 409 ld [out3+local0], local0 ! 8 410 xor $2, local4, $2 ! 4 finished 411 412 and local2, 252, local2 ! 3 413 ld [global1+local1], local1 ! 1 414 xor $2, local5, $2 ! 6 finished local5 used 415 416 ld [global3+local2], local2 ! 3 417 xor $2, local0, $2 ! 8 finished 418 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer 419 420 ld [out2+284], local5 ! 0x0000FC00 421 xor $2, out0, local4 ! sbox 1 next round 422 xor $2, local1, $2 ! 1 finished 423 424 xor $2, local2, $2 ! 3 finished 425#ifdef OPENSSL_SYSNAME_ULTRASPARC 426 bne,pt %icc, $4 427#else 428 bne $4 429#endif 430 and local4, 252, local1 ! sbox 1 next round 431 432! two rounds more: 433 434 ld [global1+local1], local1 435 xor $2, out1, out1 436 xor $2, out0, out0 437 438 srl out1, 4, local0 ! rotate 439 and out0, local5, local3 440 441 ld [$5+$3*8], local7 ! key 7531 442 srl local3, 8, local3 443 and local0, 252, local2 444 445 ld [global3+local3],local3 446 sll out1, 28, out1 ! rotate 447 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 448 449 ld [global2+local2], local2 450 srl out0, 24, local1 451 or out1, local0, out1 ! rotate 452 453 ldub [out2+local1], local1 454 srl out1, 24, local0 455 and out1, local5, local4 456 457 ldub [out2+local0], local0 458 srl local4, 8, local4 459 xor $1, local2, $1 ! 2 finished local2 now sbox 6 460 461 ld [global4+local4],local4 462 srl out1, 16, local2 463 xor $1, local3, $1 ! 3 finished local3 now sbox 5 464 465 ld [out3+local0],local0 466 and local2, 252, local2 467 add global1, 1536, local5 ! address sbox 7 468 469 ld [local6+local2], local2 470 srl out0, 16, local3 471 xor $1, local4, $1 ! 4 finished 472 473 ld [local5+local1],local1 474 and local3, 252, local3 475 xor $1, local0, $1 476 477 ld [global5+local3],local3 478 xor $1, local2, $1 ! 6 finished 479 cmp in2, 8 480 481 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter 482 xor $1, local7, local2 ! sbox 5 next round 483 xor $1, local1, $1 ! 7 finished 484 485 ld [$5+$3*8+4], out0 486 srl local2, 16, local2 ! sbox 5 next round 487 xor $1, local3, $1 ! 5 finished 488 489 and local2, 252, local2 490! next round (two rounds more) 491 xor $1, local7, local7 ! 7531 492 493 ld [global5+local2], local2 494 srl local7, 24, local3 495 xor $1, out0, out0 ! 8642 496 497 ldub [out2+local3], local3 498 srl out0, 4, local0 ! rotate 499 and local7, 252, local1 500 501 sll out0, 28, out0 ! rotate 502 xor $2, local2, $2 ! 5 finished local2 used 503 504 srl local0, 8, local4 505 and local0, 252, local2 506 ld [local5+local3], local3 507 508 srl local0, 16, local5 509 or out0, local0, out0 ! rotate 510 ld [global2+local2], local2 511 512 srl out0, 24, local0 513 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption 514 and local4, 252, local4 515 516 and local5, 252, local5 517 ld [global4+local4], local4 518 xor $2, local3, $2 ! 7 finished local3 used 519 520 and local0, 252, local0 521 ld [local6+local5], local5 522 xor $2, local2, $2 ! 2 finished local2 now sbox 3 523 524 srl local7, 8, local2 ! 3 start 525 ld [out3+local0], local0 526 xor $2, local4, $2 527 528 and local2, 252, local2 529 ld [global1+local1], local1 530 xor $2, local5, $2 ! 6 finished local5 used 531 532 ld [global3+local2], local2 533 srl $1, 3, local3 534 xor $2, local0, $2 535 536 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption 537 sll $1, 29, local4 538 xor $2, local1, $2 539 540 ifelse($7,{}, {}, {retl}) 541 xor $2, local2, $2 542}) 543 544 545! {fp_macro} 546! 547! parameter 1 right (original left) 548! parameter 2 left (original right) 549! parameter 3 1 for optional store to [in0] 550! parameter 4 1 for load input/output address to local5/7 551! 552! The final permutation logic switches the halfes, meaning that 553! left and right ends up the the registers originally used. 554 555define(fp_macro, { 556 557! {fp_macro} 558! $1 $2 $3 $4 $5 $6 $7 $8 $9 559 560 ! initially undo the rotate 3 left done after initial permutation 561 ! original left is received shifted 3 right and 29 left in local3/4 562 563 sll $2, 29, local1 564 or local3, local4, $1 565 566 srl $2, 3, $2 567 sethi %hi(0x55555555), local2 568 569 or $2, local1, $2 570 or local2, %lo(0x55555555), local2 571 572 srl $2, 1, local3 573 sethi %hi(0x00ff00ff), local1 574 xor local3, $1, local3 575 or local1, %lo(0x00ff00ff), local1 576 and local3, local2, local3 577 sethi %hi(0x33333333), local4 578 sll local3, 1, local2 579 580 xor $1, local3, $1 581 582 srl $1, 8, local3 583 xor $2, local2, $2 584 xor local3, $2, local3 585 or local4, %lo(0x33333333), local4 586 and local3, local1, local3 587 sethi %hi(0x0000ffff), local1 588 sll local3, 8, local2 589 590 xor $2, local3, $2 591 592 srl $2, 2, local3 593 xor $1, local2, $1 594 xor local3, $1, local3 595 or local1, %lo(0x0000ffff), local1 596 and local3, local4, local3 597 sethi %hi(0x0f0f0f0f), local4 598 sll local3, 2, local2 599 600 ifelse($4,1, {LDPTR INPUT, local5}) 601 xor $1, local3, $1 602 603 ifelse($4,1, {LDPTR OUTPUT, local7}) 604 srl $1, 16, local3 605 xor $2, local2, $2 606 xor local3, $2, local3 607 or local4, %lo(0x0f0f0f0f), local4 608 and local3, local1, local3 609 sll local3, 16, local2 610 611 xor $2, local3, local1 612 613 srl local1, 4, local3 614 xor $1, local2, $1 615 xor local3, $1, local3 616 and local3, local4, local3 617 sll local3, 4, local2 618 619 xor $1, local3, $1 620 621 ! optional store: 622 623 ifelse($3,1, {st $1, [in0]}) 624 625 xor local1, local2, $2 626 627 ifelse($3,1, {st $2, [in0+4]}) 628 629}) 630 631 632! {fp_ip_macro} 633! 634! Does initial permutation for next block mixed with 635! final permutation for current block. 636! 637! parameter 1 original left 638! parameter 2 original right 639! parameter 3 left ip 640! parameter 4 right ip 641! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 642! 2: mov in4 to in3 643! 644! also adds -8 to length in2 and loads loop counter to out4 645 646define(fp_ip_macro, { 647 648! {fp_ip_macro} 649! $1 $2 $3 $4 $5 $6 $7 $8 $9 650 651 define({temp1},{out4}) 652 define({temp2},{local3}) 653 654 define({ip1},{local1}) 655 define({ip2},{local2}) 656 define({ip4},{local4}) 657 define({ip5},{local5}) 658 659 ! $1 in local3, local4 660 661 ld [out2+256], ip1 662 sll out5, 29, temp1 663 or local3, local4, $1 664 665 srl out5, 3, $2 666 ifelse($5,2,{mov in4, in3}) 667 668 ld [out2+272], ip5 669 srl $4, 4, local0 670 or $2, temp1, $2 671 672 srl $2, 1, temp1 673 xor temp1, $1, temp1 674 675 and temp1, ip5, temp1 676 xor local0, $3, local0 677 678 sll temp1, 1, temp2 679 xor $1, temp1, $1 680 681 and local0, ip1, local0 682 add in2, -8, in2 683 684 sll local0, 4, local7 685 xor $3, local0, $3 686 687 ld [out2+268], ip4 688 srl $1, 8, temp1 689 xor $2, temp2, $2 690 ld [out2+260], ip2 691 srl $3, 16, local0 692 xor $4, local7, $4 693 xor temp1, $2, temp1 694 xor local0, $4, local0 695 and temp1, ip4, temp1 696 and local0, ip2, local0 697 sll temp1, 8, temp2 698 xor $2, temp1, $2 699 sll local0, 16, local7 700 xor $4, local0, $4 701 702 srl $2, 2, temp1 703 xor $1, temp2, $1 704 705 ld [out2+264], temp2 ! ip3 706 srl $4, 2, local0 707 xor $3, local7, $3 708 xor temp1, $1, temp1 709 xor local0, $3, local0 710 and temp1, temp2, temp1 711 and local0, temp2, local0 712 sll temp1, 2, temp2 713 xor $1, temp1, $1 714 sll local0, 2, local7 715 xor $3, local0, $3 716 717 srl $1, 16, temp1 718 xor $2, temp2, $2 719 srl $3, 8, local0 720 xor $4, local7, $4 721 xor temp1, $2, temp1 722 xor local0, $4, local0 723 and temp1, ip2, temp1 724 and local0, ip4, local0 725 sll temp1, 16, temp2 726 xor $2, temp1, local4 727 sll local0, 8, local7 728 xor $4, local0, $4 729 730 srl $4, 1, local0 731 xor $3, local7, $3 732 733 srl local4, 4, temp1 734 xor local0, $3, local0 735 736 xor $1, temp2, $1 737 and local0, ip5, local0 738 739 sll local0, 1, local7 740 xor temp1, $1, temp1 741 742 xor $3, local0, $3 743 xor $4, local7, $4 744 745 sll $3, 3, local5 746 and temp1, ip1, temp1 747 748 sll temp1, 4, temp2 749 xor $1, temp1, $1 750 751 ifelse($5,1,{LDPTR KS2, in4}) 752 sll $4, 3, local2 753 xor local4, temp2, $2 754 755 ! reload since used as temporar: 756 757 ld [out2+280], out4 ! loop counter 758 759 srl $3, 29, local0 760 ifelse($5,1,{add in4, 120, in4}) 761 762 ifelse($5,1,{LDPTR KS1, in3}) 763 srl $4, 29, local7 764 765 or local0, local5, $4 766 or local2, local7, $3 767 768}) 769 770 771 772! {load_little_endian} 773! 774! parameter 1 address 775! parameter 2 destination left 776! parameter 3 destination right 777! parameter 4 temporar 778! parameter 5 label 779 780define(load_little_endian, { 781 782! {load_little_endian} 783! $1 $2 $3 $4 $5 $6 $7 $8 $9 784 785 ! first in memory to rightmost in register 786 787#ifdef OPENSSL_SYSNAME_ULTRASPARC 788 andcc $1, 3, global0 789 bne,pn %icc, $5 790 nop 791 792 lda [$1] 0x88, $2 793 add $1, 4, $4 794 795 ba,pt %icc, $5a 796 lda [$4] 0x88, $3 797#endif 798 799$5: 800 ldub [$1+3], $2 801 802 ldub [$1+2], $4 803 sll $2, 8, $2 804 or $2, $4, $2 805 806 ldub [$1+1], $4 807 sll $2, 8, $2 808 or $2, $4, $2 809 810 ldub [$1+0], $4 811 sll $2, 8, $2 812 or $2, $4, $2 813 814 815 ldub [$1+3+4], $3 816 817 ldub [$1+2+4], $4 818 sll $3, 8, $3 819 or $3, $4, $3 820 821 ldub [$1+1+4], $4 822 sll $3, 8, $3 823 or $3, $4, $3 824 825 ldub [$1+0+4], $4 826 sll $3, 8, $3 827 or $3, $4, $3 828$5a: 829 830}) 831 832 833! {load_little_endian_inc} 834! 835! parameter 1 address 836! parameter 2 destination left 837! parameter 3 destination right 838! parameter 4 temporar 839! parameter 4 label 840! 841! adds 8 to address 842 843define(load_little_endian_inc, { 844 845! {load_little_endian_inc} 846! $1 $2 $3 $4 $5 $6 $7 $8 $9 847 848 ! first in memory to rightmost in register 849 850#ifdef OPENSSL_SYSNAME_ULTRASPARC 851 andcc $1, 3, global0 852 bne,pn %icc, $5 853 nop 854 855 lda [$1] 0x88, $2 856 add $1, 4, $1 857 858 lda [$1] 0x88, $3 859 ba,pt %icc, $5a 860 add $1, 4, $1 861#endif 862 863$5: 864 ldub [$1+3], $2 865 866 ldub [$1+2], $4 867 sll $2, 8, $2 868 or $2, $4, $2 869 870 ldub [$1+1], $4 871 sll $2, 8, $2 872 or $2, $4, $2 873 874 ldub [$1+0], $4 875 sll $2, 8, $2 876 or $2, $4, $2 877 878 ldub [$1+3+4], $3 879 add $1, 8, $1 880 881 ldub [$1+2+4-8], $4 882 sll $3, 8, $3 883 or $3, $4, $3 884 885 ldub [$1+1+4-8], $4 886 sll $3, 8, $3 887 or $3, $4, $3 888 889 ldub [$1+0+4-8], $4 890 sll $3, 8, $3 891 or $3, $4, $3 892$5a: 893 894}) 895 896 897! {load_n_bytes} 898! 899! Loads 1 to 7 bytes little endian 900! Remaining bytes are zeroed. 901! 902! parameter 1 address 903! parameter 2 length 904! parameter 3 destination register left 905! parameter 4 destination register right 906! parameter 5 temp 907! parameter 6 temp2 908! parameter 7 label 909! parameter 8 return label 910 911define(load_n_bytes, { 912 913! {load_n_bytes} 914! $1 $2 $5 $6 $7 $8 $7 $8 $9 915 916$7.0: call .+8 917 sll $2, 2, $6 918 919 add %o7,$7.jmp.table-$7.0,$5 920 921 add $5, $6, $5 922 mov 0, $4 923 924 ld [$5], $5 925 926 jmp %o7+$5 927 mov 0, $3 928 929$7.7: 930 ldub [$1+6], $5 931 sll $5, 16, $5 932 or $3, $5, $3 933$7.6: 934 ldub [$1+5], $5 935 sll $5, 8, $5 936 or $3, $5, $3 937$7.5: 938 ldub [$1+4], $5 939 or $3, $5, $3 940$7.4: 941 ldub [$1+3], $5 942 sll $5, 24, $5 943 or $4, $5, $4 944$7.3: 945 ldub [$1+2], $5 946 sll $5, 16, $5 947 or $4, $5, $4 948$7.2: 949 ldub [$1+1], $5 950 sll $5, 8, $5 951 or $4, $5, $4 952$7.1: 953 ldub [$1+0], $5 954 ba $8 955 or $4, $5, $4 956 957 .align 4 958 959$7.jmp.table: 960 .word 0 961 .word $7.1-$7.0 962 .word $7.2-$7.0 963 .word $7.3-$7.0 964 .word $7.4-$7.0 965 .word $7.5-$7.0 966 .word $7.6-$7.0 967 .word $7.7-$7.0 968}) 969 970 971! {store_little_endian} 972! 973! parameter 1 address 974! parameter 2 source left 975! parameter 3 source right 976! parameter 4 temporar 977 978define(store_little_endian, { 979 980! {store_little_endian} 981! $1 $2 $3 $4 $5 $6 $7 $8 $9 982 983 ! rightmost in register to first in memory 984 985#ifdef OPENSSL_SYSNAME_ULTRASPARC 986 andcc $1, 3, global0 987 bne,pn %icc, $5 988 nop 989 990 sta $2, [$1] 0x88 991 add $1, 4, $4 992 993 ba,pt %icc, $5a 994 sta $3, [$4] 0x88 995#endif 996 997$5: 998 and $2, 255, $4 999 stub $4, [$1+0] 1000 1001 srl $2, 8, $4 1002 and $4, 255, $4 1003 stub $4, [$1+1] 1004 1005 srl $2, 16, $4 1006 and $4, 255, $4 1007 stub $4, [$1+2] 1008 1009 srl $2, 24, $4 1010 stub $4, [$1+3] 1011 1012 1013 and $3, 255, $4 1014 stub $4, [$1+0+4] 1015 1016 srl $3, 8, $4 1017 and $4, 255, $4 1018 stub $4, [$1+1+4] 1019 1020 srl $3, 16, $4 1021 and $4, 255, $4 1022 stub $4, [$1+2+4] 1023 1024 srl $3, 24, $4 1025 stub $4, [$1+3+4] 1026 1027$5a: 1028 1029}) 1030 1031 1032! {store_n_bytes} 1033! 1034! Stores 1 to 7 bytes little endian 1035! 1036! parameter 1 address 1037! parameter 2 length 1038! parameter 3 source register left 1039! parameter 4 source register right 1040! parameter 5 temp 1041! parameter 6 temp2 1042! parameter 7 label 1043! parameter 8 return label 1044 1045define(store_n_bytes, { 1046 1047! {store_n_bytes} 1048! $1 $2 $5 $6 $7 $8 $7 $8 $9 1049 1050$7.0: call .+8 1051 sll $2, 2, $6 1052 1053 add %o7,$7.jmp.table-$7.0,$5 1054 1055 add $5, $6, $5 1056 1057 ld [$5], $5 1058 1059 jmp %o7+$5 1060 nop 1061 1062$7.7: 1063 srl $3, 16, $5 1064 and $5, 0xff, $5 1065 stub $5, [$1+6] 1066$7.6: 1067 srl $3, 8, $5 1068 and $5, 0xff, $5 1069 stub $5, [$1+5] 1070$7.5: 1071 and $3, 0xff, $5 1072 stub $5, [$1+4] 1073$7.4: 1074 srl $4, 24, $5 1075 stub $5, [$1+3] 1076$7.3: 1077 srl $4, 16, $5 1078 and $5, 0xff, $5 1079 stub $5, [$1+2] 1080$7.2: 1081 srl $4, 8, $5 1082 and $5, 0xff, $5 1083 stub $5, [$1+1] 1084$7.1: 1085 and $4, 0xff, $5 1086 1087 1088 ba $8 1089 stub $5, [$1] 1090 1091 .align 4 1092 1093$7.jmp.table: 1094 1095 .word 0 1096 .word $7.1-$7.0 1097 .word $7.2-$7.0 1098 .word $7.3-$7.0 1099 .word $7.4-$7.0 1100 .word $7.5-$7.0 1101 .word $7.6-$7.0 1102 .word $7.7-$7.0 1103}) 1104 1105 1106define(testvalue,{1}) 1107 1108define(register_init, { 1109 1110! For test purposes: 1111 1112 sethi %hi(testvalue), local0 1113 or local0, %lo(testvalue), local0 1114 1115 ifelse($1,{},{}, {mov local0, $1}) 1116 ifelse($2,{},{}, {mov local0, $2}) 1117 ifelse($3,{},{}, {mov local0, $3}) 1118 ifelse($4,{},{}, {mov local0, $4}) 1119 ifelse($5,{},{}, {mov local0, $5}) 1120 ifelse($6,{},{}, {mov local0, $6}) 1121 ifelse($7,{},{}, {mov local0, $7}) 1122 ifelse($8,{},{}, {mov local0, $8}) 1123 1124 mov local0, local1 1125 mov local0, local2 1126 mov local0, local3 1127 mov local0, local4 1128 mov local0, local5 1129 mov local0, local7 1130 mov local0, local6 1131 mov local0, out0 1132 mov local0, out1 1133 mov local0, out2 1134 mov local0, out3 1135 mov local0, out4 1136 mov local0, out5 1137 mov local0, global1 1138 mov local0, global2 1139 mov local0, global3 1140 mov local0, global4 1141 mov local0, global5 1142 1143}) 1144 1145.section ".text" 1146 1147 .align 32 1148 1149.des_enc: 1150 1151 ! key address in3 1152 ! loads key next encryption/decryption first round from [in4] 1153 1154 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) 1155 1156 1157 .align 32 1158 1159.des_dec: 1160 1161 ! implemented with out5 as first parameter to avoid 1162 ! register exchange in ede modes 1163 1164 ! key address in4 1165 ! loads key next encryption/decryption first round from [in3] 1166 1167 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) 1168 1169 1170 1171! void DES_encrypt1(data, ks, enc) 1172! ******************************* 1173 1174 .align 32 1175 .global DES_encrypt1 1176 .type DES_encrypt1,#function 1177 1178DES_encrypt1: 1179 1180 save %sp, FRAME, %sp 1181 1182 call .PIC.me.up 1183 mov .PIC.me.up-(.-4),out0 1184 1185 ld [in0], in5 ! left 1186 cmp in2, 0 ! enc 1187 1188#ifdef OPENSSL_SYSNAME_ULTRASPARC 1189 be,pn %icc, .encrypt.dec ! enc/dec 1190#else 1191 be .encrypt.dec 1192#endif 1193 ld [in0+4], out5 ! right 1194 1195 ! parameter 6 1/2 for include encryption/decryption 1196 ! parameter 7 1 for move in1 to in3 1197 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1198 1199 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) 1200 1201 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used 1202 1203 fp_macro(in5, out5, 1) ! 1 for store to [in0] 1204 1205 ret 1206 restore 1207 1208.encrypt.dec: 1209 1210 add in1, 120, in3 ! use last subkey for first round 1211 1212 ! parameter 6 1/2 for include encryption/decryption 1213 ! parameter 7 1 for move in1 to in3 1214 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1215 1216 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 1217 1218 fp_macro(out5, in5, 1) ! 1 for store to [in0] 1219 1220 ret 1221 restore 1222 1223.DES_encrypt1.end: 1224 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 1225 1226 1227! void DES_encrypt2(data, ks, enc) 1228!********************************* 1229 1230 ! encrypts/decrypts without initial/final permutation 1231 1232 .align 32 1233 .global DES_encrypt2 1234 .type DES_encrypt2,#function 1235 1236DES_encrypt2: 1237 1238 save %sp, FRAME, %sp 1239 1240 call .PIC.me.up 1241 mov .PIC.me.up-(.-4),out0 1242 1243 ! Set sbox address 1 to 6 and rotate halfs 3 left 1244 ! Errors caught by destest? Yes. Still? *NO* 1245 1246 !sethi %hi(DES_SPtrans), global1 ! address sbox 1 1247 1248 !or global1, %lo(DES_SPtrans), global1 ! sbox 1 1249 1250 add global1, 256, global2 ! sbox 2 1251 add global1, 512, global3 ! sbox 3 1252 1253 ld [in0], out5 ! right 1254 add global1, 768, global4 ! sbox 4 1255 add global1, 1024, global5 ! sbox 5 1256 1257 ld [in0+4], in5 ! left 1258 add global1, 1280, local6 ! sbox 6 1259 add global1, 1792, out3 ! sbox 8 1260 1261 ! rotate 1262 1263 sll in5, 3, local5 1264 mov in1, in3 ! key address to in3 1265 1266 sll out5, 3, local7 1267 srl in5, 29, in5 1268 1269 srl out5, 29, out5 1270 add in5, local5, in5 1271 1272 add out5, local7, out5 1273 cmp in2, 0 1274 1275 ! we use our own stackframe 1276 1277#ifdef OPENSSL_SYSNAME_ULTRASPARC 1278 be,pn %icc, .encrypt2.dec ! decryption 1279#else 1280 be .encrypt2.dec 1281#endif 1282 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] 1283 1284 ld [in3], out0 ! key 7531 first round 1285 mov LOOPS, out4 ! loop counter 1286 1287 ld [in3+4], out1 ! key 8642 first round 1288 sethi %hi(0x0000FC00), local5 1289 1290 call .des_enc 1291 mov in3, in4 1292 1293 ! rotate 1294 sll in5, 29, in0 1295 srl in5, 3, in5 1296 sll out5, 29, in1 1297 add in5, in0, in5 1298 srl out5, 3, out5 1299 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1300 add out5, in1, out5 1301 st in5, [in0] 1302 st out5, [in0+4] 1303 1304 ret 1305 restore 1306 1307 1308.encrypt2.dec: 1309 1310 add in3, 120, in4 1311 1312 ld [in4], out0 ! key 7531 first round 1313 mov LOOPS, out4 ! loop counter 1314 1315 ld [in4+4], out1 ! key 8642 first round 1316 sethi %hi(0x0000FC00), local5 1317 1318 mov in5, local1 ! left expected in out5 1319 mov out5, in5 1320 1321 call .des_dec 1322 mov local1, out5 1323 1324.encrypt2.finish: 1325 1326 ! rotate 1327 sll in5, 29, in0 1328 srl in5, 3, in5 1329 sll out5, 29, in1 1330 add in5, in0, in5 1331 srl out5, 3, out5 1332 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1333 add out5, in1, out5 1334 st out5, [in0] 1335 st in5, [in0+4] 1336 1337 ret 1338 restore 1339 1340.DES_encrypt2.end: 1341 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 1342 1343 1344! void DES_encrypt3(data, ks1, ks2, ks3) 1345! ************************************** 1346 1347 .align 32 1348 .global DES_encrypt3 1349 .type DES_encrypt3,#function 1350 1351DES_encrypt3: 1352 1353 save %sp, FRAME, %sp 1354 1355 call .PIC.me.up 1356 mov .PIC.me.up-(.-4),out0 1357 1358 ld [in0], in5 ! left 1359 add in2, 120, in4 ! ks2 1360 1361 ld [in0+4], out5 ! right 1362 mov in3, in2 ! save ks3 1363 1364 ! parameter 6 1/2 for include encryption/decryption 1365 ! parameter 7 1 for mov in1 to in3 1366 ! parameter 8 1 for mov in3 to in4 1367 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1368 1369 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) 1370 1371 call .des_dec 1372 mov in2, in3 ! preload ks3 1373 1374 call .des_enc 1375 nop 1376 1377 fp_macro(in5, out5, 1) 1378 1379 ret 1380 restore 1381 1382.DES_encrypt3.end: 1383 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 1384 1385 1386! void DES_decrypt3(data, ks1, ks2, ks3) 1387! ************************************** 1388 1389 .align 32 1390 .global DES_decrypt3 1391 .type DES_decrypt3,#function 1392 1393DES_decrypt3: 1394 1395 save %sp, FRAME, %sp 1396 1397 call .PIC.me.up 1398 mov .PIC.me.up-(.-4),out0 1399 1400 ld [in0], in5 ! left 1401 add in3, 120, in4 ! ks3 1402 1403 ld [in0+4], out5 ! right 1404 mov in2, in3 ! ks2 1405 1406 ! parameter 6 1/2 for include encryption/decryption 1407 ! parameter 7 1 for mov in1 to in3 1408 ! parameter 8 1 for mov in3 to in4 1409 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1410 1411 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) 1412 1413 call .des_enc 1414 add in1, 120, in4 ! preload ks1 1415 1416 call .des_dec 1417 nop 1418 1419 fp_macro(out5, in5, 1) 1420 1421 ret 1422 restore 1423 1424.DES_decrypt3.end: 1425 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 1426 1427 .align 256 1428 .type .des_and,#object 1429 .size .des_and,284 1430 1431.des_and: 1432 1433! This table is used for AND 0xFC when it is known that register 1434! bits 8-31 are zero. Makes it possible to do three arithmetic 1435! operations in one cycle. 1436 1437 .byte 0, 0, 0, 0, 4, 4, 4, 4 1438 .byte 8, 8, 8, 8, 12, 12, 12, 12 1439 .byte 16, 16, 16, 16, 20, 20, 20, 20 1440 .byte 24, 24, 24, 24, 28, 28, 28, 28 1441 .byte 32, 32, 32, 32, 36, 36, 36, 36 1442 .byte 40, 40, 40, 40, 44, 44, 44, 44 1443 .byte 48, 48, 48, 48, 52, 52, 52, 52 1444 .byte 56, 56, 56, 56, 60, 60, 60, 60 1445 .byte 64, 64, 64, 64, 68, 68, 68, 68 1446 .byte 72, 72, 72, 72, 76, 76, 76, 76 1447 .byte 80, 80, 80, 80, 84, 84, 84, 84 1448 .byte 88, 88, 88, 88, 92, 92, 92, 92 1449 .byte 96, 96, 96, 96, 100, 100, 100, 100 1450 .byte 104, 104, 104, 104, 108, 108, 108, 108 1451 .byte 112, 112, 112, 112, 116, 116, 116, 116 1452 .byte 120, 120, 120, 120, 124, 124, 124, 124 1453 .byte 128, 128, 128, 128, 132, 132, 132, 132 1454 .byte 136, 136, 136, 136, 140, 140, 140, 140 1455 .byte 144, 144, 144, 144, 148, 148, 148, 148 1456 .byte 152, 152, 152, 152, 156, 156, 156, 156 1457 .byte 160, 160, 160, 160, 164, 164, 164, 164 1458 .byte 168, 168, 168, 168, 172, 172, 172, 172 1459 .byte 176, 176, 176, 176, 180, 180, 180, 180 1460 .byte 184, 184, 184, 184, 188, 188, 188, 188 1461 .byte 192, 192, 192, 192, 196, 196, 196, 196 1462 .byte 200, 200, 200, 200, 204, 204, 204, 204 1463 .byte 208, 208, 208, 208, 212, 212, 212, 212 1464 .byte 216, 216, 216, 216, 220, 220, 220, 220 1465 .byte 224, 224, 224, 224, 228, 228, 228, 228 1466 .byte 232, 232, 232, 232, 236, 236, 236, 236 1467 .byte 240, 240, 240, 240, 244, 244, 244, 244 1468 .byte 248, 248, 248, 248, 252, 252, 252, 252 1469 1470 ! 5 numbers for initil/final permutation 1471 1472 .word 0x0f0f0f0f ! offset 256 1473 .word 0x0000ffff ! 260 1474 .word 0x33333333 ! 264 1475 .word 0x00ff00ff ! 268 1476 .word 0x55555555 ! 272 1477 1478 .word 0 ! 276 1479 .word LOOPS ! 280 1480 .word 0x0000FC00 ! 284 1481.PIC.DES_SPtrans: 1482 .word %r_disp32(DES_SPtrans) 1483 1484! input: out0 offset between .PIC.me.up and caller 1485! output: out0 pointer to .PIC.me.up 1486! out2 pointer to .des_and 1487! global1 pointer to DES_SPtrans 1488 .align 32 1489.PIC.me.up: 1490 add out0,%o7,out0 ! pointer to .PIC.me.up 1491#if 1 1492 ld [out0+(.PIC.DES_SPtrans-.PIC.me.up)],global1 1493 add global1,(.PIC.DES_SPtrans-.PIC.me.up),global1 1494 add global1,out0,global1 1495#else 1496# ifdef OPENSSL_PIC 1497 ! In case anybody wonders why this code is same for both ABI. 1498 ! To start with it is not. Do note LDPTR below. But of course 1499 ! you must be wondering why the rest of it does not contain 1500 ! things like %hh, %hm and %lm. Well, those are needed only 1501 ! if OpenSSL library *itself* will become larger than 4GB, 1502 ! which is not going to happen any time soon. 1503 sethi %hi(DES_SPtrans),global1 1504 or global1,%lo(DES_SPtrans),global1 1505 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2 1506 add global1,out0,global1 1507 add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2 1508 LDPTR [out2+global1],global1 1509# elif 0 1510 setn DES_SPtrans,out2,global1 ! synthetic instruction ! 1511# elif defined(ABI64) 1512 sethi %hh(DES_SPtrans),out2 1513 or out2,%hm(DES_SPtrans),out2 1514 sethi %lm(DES_SPtrans),global1 1515 or global1,%lo(DES_SPtrans),global1 1516 sllx out2,32,out2 1517 or out2,global1,global1 1518# else 1519 sethi %hi(DES_SPtrans),global1 1520 or global1,%lo(DES_SPtrans),global1 1521# endif 1522#endif 1523 retl 1524 add out0,.des_and-.PIC.me.up,out2 1525 1526! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) 1527! ***************************************************************** 1528 1529 1530 .align 32 1531 .global DES_ncbc_encrypt 1532 .type DES_ncbc_encrypt,#function 1533 1534DES_ncbc_encrypt: 1535 1536 save %sp, FRAME, %sp 1537 1538 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) 1539 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) 1540 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1541 1542 call .PIC.me.up 1543 mov .PIC.me.up-(.-4),out0 1544 1545 cmp in5, 0 ! enc 1546 1547#ifdef OPENSSL_SYSNAME_ULTRASPARC 1548 be,pn %icc, .ncbc.dec 1549#else 1550 be .ncbc.dec 1551#endif 1552 STPTR in4, IVEC 1553 1554 ! addr left right temp label 1555 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv 1556 1557 addcc in2, -8, in2 ! bytes missing when first block done 1558 1559#ifdef OPENSSL_SYSNAME_ULTRASPARC 1560 bl,pn %icc, .ncbc.enc.seven.or.less 1561#else 1562 bl .ncbc.enc.seven.or.less 1563#endif 1564 mov in3, in4 ! schedule 1565 1566.ncbc.enc.next.block: 1567 1568 load_little_endian(in0, out4, global4, local3, .LLE2) ! block 1569 1570.ncbc.enc.next.block_1: 1571 1572 xor in5, out4, in5 ! iv xor 1573 xor out5, global4, out5 ! iv xor 1574 1575 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1576 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) 1577 1578.ncbc.enc.next.block_2: 1579 1580!// call .des_enc ! compares in2 to 8 1581! rounds inlined for alignment purposes 1582 1583 add global1, 768, global4 ! address sbox 4 since register used below 1584 1585 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 1586 1587#ifdef OPENSSL_SYSNAME_ULTRASPARC 1588 bl,pn %icc, .ncbc.enc.next.block_fp 1589#else 1590 bl .ncbc.enc.next.block_fp 1591#endif 1592 add in0, 8, in0 ! input address 1593 1594 ! If 8 or more bytes are to be encrypted after this block, 1595 ! we combine final permutation for this block with initial 1596 ! permutation for next block. Load next block: 1597 1598 load_little_endian(in0, global3, global4, local5, .LLE12) 1599 1600 ! parameter 1 original left 1601 ! parameter 2 original right 1602 ! parameter 3 left ip 1603 ! parameter 4 right ip 1604 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1605 ! 2: mov in4 to in3 1606 ! 1607 ! also adds -8 to length in2 and loads loop counter to out4 1608 1609 fp_ip_macro(out0, out1, global3, global4, 2) 1610 1611 store_little_endian(in1, out0, out1, local3, .SLE10) ! block 1612 1613 ld [in3], out0 ! key 7531 first round next block 1614 mov in5, local1 1615 xor global3, out5, in5 ! iv xor next block 1616 1617 ld [in3+4], out1 ! key 8642 1618 add global1, 512, global3 ! address sbox 3 since register used 1619 xor global4, local1, out5 ! iv xor next block 1620 1621 ba .ncbc.enc.next.block_2 1622 add in1, 8, in1 ! output adress 1623 1624.ncbc.enc.next.block_fp: 1625 1626 fp_macro(in5, out5) 1627 1628 store_little_endian(in1, in5, out5, local3, .SLE1) ! block 1629 1630 addcc in2, -8, in2 ! bytes missing when next block done 1631 1632#ifdef OPENSSL_SYSNAME_ULTRASPARC 1633 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0 1634#else 1635 bpos .ncbc.enc.next.block 1636#endif 1637 add in1, 8, in1 1638 1639.ncbc.enc.seven.or.less: 1640 1641 cmp in2, -8 1642 1643#ifdef OPENSSL_SYSNAME_ULTRASPARC 1644 ble,pt %icc, .ncbc.enc.finish 1645#else 1646 ble .ncbc.enc.finish 1647#endif 1648 nop 1649 1650 add in2, 8, local1 ! bytes to load 1651 1652 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1653 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) 1654 1655 ! Loads 1 to 7 bytes little endian to global4, out4 1656 1657 1658.ncbc.enc.finish: 1659 1660 LDPTR IVEC, local4 1661 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec 1662 1663 ret 1664 restore 1665 1666 1667.ncbc.dec: 1668 1669 STPTR in0, INPUT 1670 cmp in2, 0 ! length 1671 add in3, 120, in3 1672 1673 LDPTR IVEC, local7 ! ivec 1674#ifdef OPENSSL_SYSNAME_ULTRASPARC 1675 ble,pn %icc, .ncbc.dec.finish 1676#else 1677 ble .ncbc.dec.finish 1678#endif 1679 mov in3, in4 ! schedule 1680 1681 STPTR in1, OUTPUT 1682 mov in0, local5 ! input 1683 1684 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec 1685 1686.ncbc.dec.next.block: 1687 1688 load_little_endian(local5, in5, out5, local3, .LLE4) ! block 1689 1690 ! parameter 6 1/2 for include encryption/decryption 1691 ! parameter 7 1 for mov in1 to in3 1692 ! parameter 8 1 for mov in3 to in4 1693 1694 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4 1695 1696 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 1697 1698 ! in2 is bytes left to be stored 1699 ! in2 is compared to 8 in the rounds 1700 1701 xor out5, in0, out4 ! iv xor 1702#ifdef OPENSSL_SYSNAME_ULTRASPARC 1703 bl,pn %icc, .ncbc.dec.seven.or.less 1704#else 1705 bl .ncbc.dec.seven.or.less 1706#endif 1707 xor in5, in1, global4 ! iv xor 1708 1709 ! Load ivec next block now, since input and output address might be the same. 1710 1711 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv 1712 1713 store_little_endian(local7, out4, global4, local3, .SLE3) 1714 1715 STPTR local5, INPUT 1716 add local7, 8, local7 1717 addcc in2, -8, in2 1718 1719#ifdef OPENSSL_SYSNAME_ULTRASPARC 1720 bg,pt %icc, .ncbc.dec.next.block 1721#else 1722 bg .ncbc.dec.next.block 1723#endif 1724 STPTR local7, OUTPUT 1725 1726 1727.ncbc.dec.store.iv: 1728 1729 LDPTR IVEC, local4 ! ivec 1730 store_little_endian(local4, in0, in1, local5, .SLE4) 1731 1732.ncbc.dec.finish: 1733 1734 ret 1735 restore 1736 1737.ncbc.dec.seven.or.less: 1738 1739 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec 1740 1741 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) 1742 1743 1744.DES_ncbc_encrypt.end: 1745 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt 1746 1747 1748! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc) 1749! ************************************************************************** 1750 1751 1752 .align 32 1753 .global DES_ede3_cbc_encrypt 1754 .type DES_ede3_cbc_encrypt,#function 1755 1756DES_ede3_cbc_encrypt: 1757 1758 save %sp, FRAME, %sp 1759 1760 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) 1761 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1762 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) 1763 1764 call .PIC.me.up 1765 mov .PIC.me.up-(.-4),out0 1766 1767 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc 1768 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1769 cmp local3, 0 ! enc 1770 1771#ifdef OPENSSL_SYSNAME_ULTRASPARC 1772 be,pn %icc, .ede3.dec 1773#else 1774 be .ede3.dec 1775#endif 1776 STPTR in4, KS2 1777 1778 STPTR in5, KS3 1779 1780 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec 1781 1782 addcc in2, -8, in2 ! bytes missing after next block 1783 1784#ifdef OPENSSL_SYSNAME_ULTRASPARC 1785 bl,pn %icc, .ede3.enc.seven.or.less 1786#else 1787 bl .ede3.enc.seven.or.less 1788#endif 1789 STPTR in3, KS1 1790 1791.ede3.enc.next.block: 1792 1793 load_little_endian(in0, out4, global4, local3, .LLE7) 1794 1795.ede3.enc.next.block_1: 1796 1797 LDPTR KS2, in4 1798 xor in5, out4, in5 ! iv xor 1799 xor out5, global4, out5 ! iv xor 1800 1801 LDPTR KS1, in3 1802 add in4, 120, in4 ! for decryption we use last subkey first 1803 nop 1804 1805 ip_macro(in5, out5, in5, out5, in3) 1806 1807.ede3.enc.next.block_2: 1808 1809 call .des_enc ! ks1 in3 1810 nop 1811 1812 call .des_dec ! ks2 in4 1813 LDPTR KS3, in3 1814 1815 call .des_enc ! ks3 in3 compares in2 to 8 1816 nop 1817 1818#ifdef OPENSSL_SYSNAME_ULTRASPARC 1819 bl,pn %icc, .ede3.enc.next.block_fp 1820#else 1821 bl .ede3.enc.next.block_fp 1822#endif 1823 add in0, 8, in0 1824 1825 ! If 8 or more bytes are to be encrypted after this block, 1826 ! we combine final permutation for this block with initial 1827 ! permutation for next block. Load next block: 1828 1829 load_little_endian(in0, global3, global4, local5, .LLE11) 1830 1831 ! parameter 1 original left 1832 ! parameter 2 original right 1833 ! parameter 3 left ip 1834 ! parameter 4 right ip 1835 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1836 ! 2: mov in4 to in3 1837 ! 1838 ! also adds -8 to length in2 and loads loop counter to out4 1839 1840 fp_ip_macro(out0, out1, global3, global4, 1) 1841 1842 store_little_endian(in1, out0, out1, local3, .SLE9) ! block 1843 1844 mov in5, local1 1845 xor global3, out5, in5 ! iv xor next block 1846 1847 ld [in3], out0 ! key 7531 1848 add global1, 512, global3 ! address sbox 3 1849 xor global4, local1, out5 ! iv xor next block 1850 1851 ld [in3+4], out1 ! key 8642 1852 add global1, 768, global4 ! address sbox 4 1853 ba .ede3.enc.next.block_2 1854 add in1, 8, in1 1855 1856.ede3.enc.next.block_fp: 1857 1858 fp_macro(in5, out5) 1859 1860 store_little_endian(in1, in5, out5, local3, .SLE5) ! block 1861 1862 addcc in2, -8, in2 ! bytes missing when next block done 1863 1864#ifdef OPENSSL_SYSNAME_ULTRASPARC 1865 bpos,pt %icc, .ede3.enc.next.block 1866#else 1867 bpos .ede3.enc.next.block 1868#endif 1869 add in1, 8, in1 1870 1871.ede3.enc.seven.or.less: 1872 1873 cmp in2, -8 1874 1875#ifdef OPENSSL_SYSNAME_ULTRASPARC 1876 ble,pt %icc, .ede3.enc.finish 1877#else 1878 ble .ede3.enc.finish 1879#endif 1880 nop 1881 1882 add in2, 8, local1 ! bytes to load 1883 1884 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1885 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) 1886 1887.ede3.enc.finish: 1888 1889 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1890 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec 1891 1892 ret 1893 restore 1894 1895.ede3.dec: 1896 1897 STPTR in0, INPUT 1898 add in5, 120, in5 1899 1900 STPTR in1, OUTPUT 1901 mov in0, local5 1902 add in3, 120, in3 1903 1904 STPTR in3, KS1 1905 cmp in2, 0 1906 1907#ifdef OPENSSL_SYSNAME_ULTRASPARC 1908 ble %icc, .ede3.dec.finish 1909#else 1910 ble .ede3.dec.finish 1911#endif 1912 STPTR in5, KS3 1913 1914 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv 1915 load_little_endian(local7, in0, in1, local3, .LLE8) 1916 1917.ede3.dec.next.block: 1918 1919 load_little_endian(local5, in5, out5, local3, .LLE9) 1920 1921 ! parameter 6 1/2 for include encryption/decryption 1922 ! parameter 7 1 for mov in1 to in3 1923 ! parameter 8 1 for mov in3 to in4 1924 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1925 1926 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 1927 1928 call .des_enc ! ks2 in3 1929 LDPTR KS1, in4 1930 1931 call .des_dec ! ks1 in4 1932 nop 1933 1934 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 1935 1936 ! in2 is bytes left to be stored 1937 ! in2 is compared to 8 in the rounds 1938 1939 xor out5, in0, out4 1940#ifdef OPENSSL_SYSNAME_ULTRASPARC 1941 bl,pn %icc, .ede3.dec.seven.or.less 1942#else 1943 bl .ede3.dec.seven.or.less 1944#endif 1945 xor in5, in1, global4 1946 1947 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block 1948 1949 store_little_endian(local7, out4, global4, local3, .SLE7) ! block 1950 1951 STPTR local5, INPUT 1952 addcc in2, -8, in2 1953 add local7, 8, local7 1954 1955#ifdef OPENSSL_SYSNAME_ULTRASPARC 1956 bg,pt %icc, .ede3.dec.next.block 1957#else 1958 bg .ede3.dec.next.block 1959#endif 1960 STPTR local7, OUTPUT 1961 1962.ede3.dec.store.iv: 1963 1964 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1965 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec 1966 1967.ede3.dec.finish: 1968 1969 ret 1970 restore 1971 1972.ede3.dec.seven.or.less: 1973 1974 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv 1975 1976 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) 1977 1978 1979.DES_ede3_cbc_encrypt.end: 1980 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt 1981