1;/*---------------------------------------------------------------------* 2; * The following (piece of) code, (part of) the 2xSaI engine, * 3; * copyright (c) 2001 by Derek Liauw Kie Fa. * 4; * Non-Commercial use of the engine is allowed and is encouraged, * 5; * provided that appropriate credit be given and that this copyright * 6; * notice will not be removed under any circumstance. * 7; * You may freely modify this code, but I request * 8; * that any improvements to the engine be submitted to me, so * 9; * that I can implement these improvements in newer versions of * 10; * the engine. * 11; * If you need more information, have any comments or suggestions, * 12; * you can e-mail me. My e-mail: DerekL666@yahoo.com * 13; *---------------------------------------------------------------------*/ 14; modified by Spacy to compile with yasm [2006-06-20] 15 16%include "macros.mac" 17 18;---------------------- 19; 2xSaI, Super2xSaI, SuperEagle .. FINAL. no versioning anymore.. 20;---------------------- 21 22BITS 32 23 24SECTION .text 25 26srcPtr equ 8 27deltaPtr equ 12 28srcPitch equ 16 29width equ 20 30dstOffset equ 24 31dstPitch equ 28 32dstSegment equ 32 33 34colorB0 equ -2 35colorB1 equ 0 36colorB2 equ 2 37colorB3 equ 4 38 39color7 equ -2 40color8 equ 0 41color9 equ 2 42 43color4 equ -2 44color5 equ 0 45color6 equ 2 46colorS2 equ 4 47 48color1 equ -2 49color2 equ 0 50color3 equ 2 51colorS1 equ 4 52 53colorA0 equ -2 54colorA1 equ 0 55colorA2 equ 2 56colorA3 equ 4 57 58 59NEWSYM _2xSaISuper2xSaILine 60; Store some stuff 61 push ebp 62 mov ebp, esp 63 pushad 64 65; Prepare the destination 66%ifdef __DJGPP__ 67 ; Set the selector 68 mov eax, [ebp+dstSegment] 69 mov fs, ax 70%endif 71 mov edx, [ebp+dstOffset] ; edx points to the screen 72; Prepare the source 73 ; eax points to colorA 74 mov eax, [ebp+srcPtr] ;eax points to colorA 75 mov ebx, [ebp+srcPitch] ;ebx contains the source pitch 76 mov ecx, [ebp+width] ;ecx contains the number of pixels to process 77 ; eax now points to colorB1 78 sub eax, ebx ;eax points to B1 which is the base 79 80; Main Loop 81.Loop: push ecx 82 83 ;-----Check Delta------------------ 84 mov ecx, [ebp+deltaPtr] 85 86 87 ;load source img 88 movq mm0, [eax+colorB0] 89 movq mm1, [eax+colorB3] 90 movq mm2, [eax+ebx+color4] 91 movq mm3, [eax+ebx+colorS2] 92 movq mm4, [eax+ebx+ebx+color1] 93 movq mm5, [eax+ebx+ebx+colorS1] 94 push eax 95 add eax, ebx 96 movq mm6, [eax+ebx+ebx+colorA0] 97 movq mm7, [eax+ebx+ebx+colorA3] 98 pop eax 99 100 ;compare to delta 101 pcmpeqw mm0, [ecx+2+colorB0] 102 pcmpeqw mm1, [ecx+2+colorB3] 103 pcmpeqw mm2, [ecx+ebx+2+color4] 104 pcmpeqw mm3, [ecx+ebx+2+colorS2] 105 pcmpeqw mm4, [ecx+ebx+ebx+2+color1] 106 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] 107 add ecx, ebx 108 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] 109 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] 110 sub ecx, ebx 111 112 113 ;compose results 114 pand mm0, mm1 115 pand mm2, mm3 116 pand mm4, mm5 117 pand mm6, mm7 118 pand mm0, mm2 119 pand mm4, mm6 120 pxor mm7, mm7 121 pand mm0, mm4 122 movq mm6, [eax+colorB0] 123 pcmpeqw mm7, mm0 ;did any compare give us a zero ? 124 125 movq [ecx+2+colorB0], mm6 126 127 packsswb mm7, mm7 128 movd ecx, mm7 129 test ecx, ecx 130 jz near .SKIP_PROCESS ;no, so we can skip 131 132 ;End Delta 133 134 ;--------------------------------- 135 movq mm0, [eax+ebx+color5] 136 movq mm1, [eax+ebx+color6] 137 movq mm2, mm0 138 movq mm3, mm1 139 movq mm4, mm0 140 movq mm5, mm1 141 142 pand mm0, [colorMask] 143 pand mm1, [colorMask] 144 145 psrlw mm0, 1 146 psrlw mm1, 1 147 148 pand mm3, [lowPixelMask] 149 paddw mm0, mm1 150 151 pand mm3, mm2 152 paddw mm0, mm3 ;mm0 contains the interpolated values 153 movq [I56Pixel], mm0 154 movq mm7, mm0 155 156 ;------------------- 157 movq mm0, mm7 158 movq mm1, mm4 ;5,5,5,6 159 movq mm2, mm0 160 movq mm3, mm1 161 162 pand mm0, [colorMask] 163 pand mm1, [colorMask] 164 165 psrlw mm0, 1 166 psrlw mm1, 1 167 168 pand mm3, [lowPixelMask] 169 paddw mm0, mm1 170 171 pand mm3, mm2 172 paddw mm0, mm3 ;mm0 contains the interpolated values 173 movq [I5556Pixel], mm0 174 ;-------------------- 175 176 movq mm0, mm7 177 movq mm1, mm5 ;6,6,6,5 178 movq mm2, mm0 179 movq mm3, mm1 180 181 pand mm0, [colorMask] 182 pand mm1, [colorMask] 183 184 psrlw mm0, 1 185 psrlw mm1, 1 186 187 pand mm3, [lowPixelMask] 188 paddw mm0, mm1 189 190 pand mm3, mm2 191 paddw mm0, mm3 192 movq [I5666Pixel], mm0 193 194 ;------------------------- 195 ;------------------------- 196 movq mm0, [eax+ebx+ebx+color2] 197 movq mm1, [eax+ebx+ebx+color3] 198 movq mm2, mm0 199 movq mm3, mm1 200 movq mm4, mm0 201 movq mm5, mm1 202 203 pand mm0, [colorMask] 204 pand mm1, [colorMask] 205 206 psrlw mm0, 1 207 psrlw mm1, 1 208 209 pand mm3, [lowPixelMask] 210 paddw mm0, mm1 211 212 pand mm3, mm2 213 paddw mm0, mm3 214 movq [I23Pixel], mm0 215 movq mm7, mm0 216 217 ;--------------------- 218 movq mm0, mm7 219 movq mm1, mm4 ;2,2,2,3 220 movq mm2, mm0 221 movq mm3, mm1 222 223 pand mm0, [colorMask] 224 pand mm1, [colorMask] 225 226 psrlw mm0, 1 227 psrlw mm1, 1 228 229 pand mm3, [lowPixelMask] 230 paddw mm0, mm1 231 232 pand mm3, mm2 233 paddw mm0, mm3 234 movq [I2223Pixel], mm0 235 236 ;---------------------- 237 movq mm0, mm7 238 movq mm1, mm5 ;3,3,3,2 239 movq mm2, mm0 240 movq mm3, mm1 241 242 pand mm0, [colorMask] 243 pand mm1, [colorMask] 244 245 psrlw mm0, 1 246 psrlw mm1, 1 247 248 pand mm3, [lowPixelMask] 249 paddw mm0, mm1 250 251 pand mm3, mm2 252 paddw mm0, mm3 253 movq [I2333Pixel], mm0 254 255 256 ;-------------------- 257;//////////////////////////////// 258; Decide which "branch" to take 259;-------------------------------- 260 movq mm0, [eax+ebx+color5] 261 movq mm1, [eax+ebx+color6] 262 movq mm6, mm0 263 movq mm7, mm1 264 pcmpeqw mm0, [eax+ebx+ebx+color3] 265 pcmpeqw mm1, [eax+ebx+ebx+color2] 266 pcmpeqw mm6, mm7 267 268 movq mm2, mm0 269 movq mm3, mm0 270 271 pand mm0, mm1 ;colorA == colorD && colorB == colorC 272 pxor mm7, mm7 273 274 pcmpeqw mm2, mm7 275 pand mm6, mm0 276 pand mm2, mm1 ;colorA != colorD && colorB == colorC 277 278 pcmpeqw mm1, mm7 279 280 pand mm1, mm3 ;colorA == colorD && colorB != colorC 281 pxor mm0, mm6 282 por mm1, mm6 283 movq mm7, mm0 284 movq [Mask26], mm2 285 packsswb mm7, mm7 286 movq [Mask35], mm1 287 288 movd ecx, mm7 289 test ecx, ecx 290 jz near .SKIP_GUESS 291 292;--------------------------------------------- 293 movq mm6, mm0 294 movq mm4, [eax+ebx+colorA] 295 movq mm5, [eax+ebx+colorB] 296 pxor mm7, mm7 297 pand mm6, [ONE] 298 299 movq mm0, [eax+colorE] 300 movq mm1, [eax+ebx+colorG] 301 movq mm2, mm0 302 movq mm3, mm1 303 pcmpeqw mm0, mm4 304 pcmpeqw mm1, mm4 305 pcmpeqw mm2, mm5 306 pcmpeqw mm3, mm5 307 pand mm0, mm6 308 pand mm1, mm6 309 pand mm2, mm6 310 pand mm3, mm6 311 paddw mm0, mm1 312 paddw mm2, mm3 313 314 pxor mm3, mm3 315 pcmpgtw mm0, mm6 316 pcmpgtw mm2, mm6 317 pcmpeqw mm0, mm3 318 pcmpeqw mm2, mm3 319 pand mm0, mm6 320 pand mm2, mm6 321 paddw mm7, mm0 322 psubw mm7, mm2 323 324 movq mm0, [eax+colorF] 325 movq mm1, [eax+ebx+colorK] 326 movq mm2, mm0 327 movq mm3, mm1 328 pcmpeqw mm0, mm4 329 pcmpeqw mm1, mm4 330 pcmpeqw mm2, mm5 331 pcmpeqw mm3, mm5 332 pand mm0, mm6 333 pand mm1, mm6 334 pand mm2, mm6 335 pand mm3, mm6 336 paddw mm0, mm1 337 paddw mm2, mm3 338 339 pxor mm3, mm3 340 pcmpgtw mm0, mm6 341 pcmpgtw mm2, mm6 342 pcmpeqw mm0, mm3 343 pcmpeqw mm2, mm3 344 pand mm0, mm6 345 pand mm2, mm6 346 paddw mm7, mm0 347 psubw mm7, mm2 348 349 push eax 350 add eax, ebx 351 movq mm0, [eax+ebx+colorH] 352 movq mm1, [eax+ebx+ebx+colorN] 353 movq mm2, mm0 354 movq mm3, mm1 355 pcmpeqw mm0, mm4 356 pcmpeqw mm1, mm4 357 pcmpeqw mm2, mm5 358 pcmpeqw mm3, mm5 359 pand mm0, mm6 360 pand mm1, mm6 361 pand mm2, mm6 362 pand mm3, mm6 363 paddw mm0, mm1 364 paddw mm2, mm3 365 366 pxor mm3, mm3 367 pcmpgtw mm0, mm6 368 pcmpgtw mm2, mm6 369 pcmpeqw mm0, mm3 370 pcmpeqw mm2, mm3 371 pand mm0, mm6 372 pand mm2, mm6 373 paddw mm7, mm0 374 psubw mm7, mm2 375 376 movq mm0, [eax+ebx+colorL] 377 movq mm1, [eax+ebx+ebx+colorO] 378 movq mm2, mm0 379 movq mm3, mm1 380 pcmpeqw mm0, mm4 381 pcmpeqw mm1, mm4 382 pcmpeqw mm2, mm5 383 pcmpeqw mm3, mm5 384 pand mm0, mm6 385 pand mm1, mm6 386 pand mm2, mm6 387 pand mm3, mm6 388 paddw mm0, mm1 389 paddw mm2, mm3 390 391 pxor mm3, mm3 392 pcmpgtw mm0, mm6 393 pcmpgtw mm2, mm6 394 pcmpeqw mm0, mm3 395 pcmpeqw mm2, mm3 396 pand mm0, mm6 397 pand mm2, mm6 398 paddw mm7, mm0 399 psubw mm7, mm2 400 401 pop eax 402 movq mm1, mm7 403 pxor mm0, mm0 404 pcmpgtw mm7, mm0 405 pcmpgtw mm0, mm1 406 407 por mm7, [Mask35] 408 por mm0, [Mask26] 409 movq [Mask35], mm7 410 movq [Mask26], mm0 411 412.SKIP_GUESS: 413 414 ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image... 415 416 417 movq mm0, [eax+ebx+color5] 418 movq mm1, [eax+ebx+ebx+color2] 419 movq mm2, mm0 420 movq mm3, mm1 421 movq mm4, mm0 422 movq mm5, mm1 423 424 pand mm0, [colorMask] 425 pand mm1, [colorMask] 426 427 psrlw mm0, 1 428 psrlw mm1, 1 429 430 pand mm3, [lowPixelMask] 431 paddw mm0, mm1 432 433 pand mm3, mm2 434 paddw mm0, mm3 ;mm0 contains the interpolated values 435 ;--------------------------- 436 437 438 439%ifdef dfhsdfhsdahdsfhdsfh 440 441 if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) 442 product2a = INTERPOLATE (color2, color5); 443 else 444 if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) 445 product2a = INTERPOLATE(color2, color5); 446 else 447 product2a = color2; 448 449 if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) 450 product1a = INTERPOLATE (color2, color5); 451 else 452 if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) 453 product1a = INTERPOLATE(color2, color5); 454 else 455 product1a = color5; 456 457%endif 458 459 460 movq mm7, [Mask26] 461 movq mm6, [eax+colorB2] 462 movq mm5, [eax+ebx+ebx+color2] 463 movq mm4, [eax+ebx+ebx+color1] 464 pcmpeqw mm4, mm5 465 pcmpeqw mm6, mm5 466 pxor mm5, mm5 467 pand mm7, mm4 468 pcmpeqw mm6, mm5 469 pand mm7, mm6 470 471 472 473 movq mm6, [eax+ebx+ebx+color3] 474 movq mm5, [eax+ebx+ebx+color2] 475 movq mm4, [eax+ebx+ebx+color1] 476 movq mm2, [eax+ebx+color5] 477 movq mm1, [eax+ebx+color4] 478 movq mm3, [eax+colorB0] 479 480 pcmpeqw mm2, mm4 481 pcmpeqw mm6, mm5 482 pcmpeqw mm1, mm5 483 pcmpeqw mm3, mm5 484 pxor mm5, mm5 485 pcmpeqw mm2, mm5 486 pcmpeqw mm3, mm5 487 pand mm6, mm1 488 pand mm2, mm3 489 pand mm6, mm2 490 por mm7, mm6 491 492 493 movq mm6, mm7 494 pcmpeqw mm6, mm5 495 pand mm7, mm0 496 497 movq mm1, [eax+ebx+color5] 498 pand mm6, mm1 499 por mm7, mm6 500 movq [final1a], mm7 ;finished 1a 501 502 503 504 ;-------------------------------- 505 506 movq mm7, [Mask35] 507 push eax 508 add eax, ebx 509 movq mm6, [eax+ebx+ebx+colorA2] 510 pop eax 511 movq mm5, [eax+ebx+color5] 512 movq mm4, [eax+ebx+color4] 513 pcmpeqw mm4, mm5 514 pcmpeqw mm6, mm5 515 pxor mm5, mm5 516 pand mm7, mm4 517 pcmpeqw mm6, mm5 518 pand mm7, mm6 519 520 521 522 movq mm6, [eax+ebx+color6] 523 movq mm5, [eax+ebx+color5] 524 movq mm4, [eax+ebx+color4] 525 movq mm2, [eax+ebx+ebx+color2] 526 movq mm1, [eax+ebx+ebx+color1] 527 push eax 528 add eax, ebx 529 movq mm3, [eax+ebx+ebx+colorA0] 530 pop eax 531 532 pcmpeqw mm2, mm4 533 pcmpeqw mm6, mm5 534 pcmpeqw mm1, mm5 535 pcmpeqw mm3, mm5 536 pxor mm5, mm5 537 pcmpeqw mm2, mm5 538 pcmpeqw mm3, mm5 539 pand mm6, mm1 540 pand mm2, mm3 541 pand mm6, mm2 542 por mm7, mm6 543 544 545 movq mm6, mm7 546 pcmpeqw mm6, mm5 547 pand mm7, mm0 548 549 movq mm1, [eax+ebx+ebx+color2] 550 pand mm6, mm1 551 por mm7, mm6 552 movq [final2a], mm7 ;finished 2a 553 554 555 ;-------------------------------------------- 556 557 558%ifdef dfhsdfhsdahdsfhdsfh 559 if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) 560 product2b = Q_INTERPOLATE (color3, color3, color3, color2); 561 else 562 if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3) 563 product2b = Q_INTERPOLATE (color2, color2, color2, color3); 564 else 565 product2b = INTERPOLATE (color2, color3); 566 567 if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) 568 product1b = Q_INTERPOLATE (color6, color6, color6, color5); 569 else 570 if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) 571 product1b = Q_INTERPOLATE (color6, color5, color5, color5); 572 else 573 product1b = INTERPOLATE (color5, color6); 574%endif 575 576 push eax 577 add eax, ebx 578 pxor mm7, mm7 579 movq mm0, [eax+ebx+ebx+colorA0] 580 movq mm1, [eax+ebx+ebx+colorA1] 581 movq mm2, [eax+ebx+ebx+colorA2] 582 movq mm3, [eax+ebx+ebx+colorA3] 583 pop eax 584 movq mm4, [eax+ebx+ebx+color2] 585 movq mm5, [eax+ebx+ebx+color3] 586 movq mm6, [eax+ebx+color6] 587 588 pcmpeqw mm6, mm5 589 pcmpeqw mm1, mm5 590 pcmpeqw mm4, mm2 591 pcmpeqw mm0, mm5 592 pcmpeqw mm4, mm7 593 pcmpeqw mm0, mm7 594 pand mm0, mm4 595 pand mm6, mm1 596 pand mm0, mm6 597 598 movq mm4, [eax+ebx+color2] 599 movq mm5, [eax+ebx+ebx+color5] 600 movq mm6, [eax+ebx+ebx+color3] 601 602 pcmpeqw mm5, mm4 603 pcmpeqw mm2, mm4 604 pcmpeqw mm1, mm6 605 pcmpeqw mm3, mm4 606 pcmpeqw mm1, mm7 607 pcmpeqw mm3, mm7 608 pand mm2, mm5 609 pand mm1, mm3 610 pand mm1, mm2 611 612 movq mm2, mm0 613 movq mm7, [I2333Pixel] 614 movq mm6, [I2223Pixel] 615 movq mm5, [I23Pixel] 616 movq mm4, [Mask35] 617 movq mm3, [Mask26] 618 619 por mm2, mm4 620 pand mm4, [eax+ebx+ebx+color3] 621 por mm2, mm3 622 pand mm3, [eax+ebx+ebx+color2] 623 por mm2, mm1 624 pand mm0, mm7 625 pand mm1, mm6 626 pxor mm7, mm7 627 pcmpeqw mm2, mm7 628 por mm0, mm1 629 por mm3, mm4 630 pand mm2, mm5 631 por mm0, mm3 632 por mm0, mm2 633 movq [final2b], mm0 634 635 ;----------------------------------- 636 637 638 pxor mm7, mm7 639 movq mm0, [eax+colorB0] 640 movq mm1, [eax+colorB1] 641 movq mm2, [eax+colorB2] 642 movq mm3, [eax+colorB3] 643 movq mm4, [eax+ebx+color5] 644 movq mm5, [eax+ebx+color6] 645 movq mm6, [eax+ebx+ebx+color3] 646 647 pcmpeqw mm6, mm5 648 pcmpeqw mm1, mm5 649 pcmpeqw mm4, mm2 650 pcmpeqw mm0, mm5 651 pcmpeqw mm4, mm7 652 pcmpeqw mm0, mm7 653 pand mm0, mm4 654 pand mm6, mm1 655 pand mm0, mm6 656 657 movq mm4, [eax+ebx+color5] 658 movq mm5, [eax+ebx+ebx+color2] 659 movq mm6, [eax+ebx+color6] 660 661 pcmpeqw mm5, mm4 662 pcmpeqw mm2, mm4 663 pcmpeqw mm1, mm6 664 pcmpeqw mm3, mm4 665 pcmpeqw mm1, mm7 666 pcmpeqw mm3, mm7 667 pand mm2, mm5 668 pand mm1, mm3 669 pand mm1, mm2 670 671 movq mm2, mm0 672 movq mm7, [I5666Pixel] 673 movq mm6, [I5556Pixel] 674 movq mm5, [I56Pixel] 675 movq mm4, [Mask35] 676 movq mm3, [Mask26] 677 678 por mm2, mm4 679 pand mm4, [eax+ebx+color5] 680 por mm2, mm3 681 pand mm3, [eax+ebx+color6] 682 por mm2, mm1 683 pand mm0, mm7 684 pand mm1, mm6 685 pxor mm7, mm7 686 pcmpeqw mm2, mm7 687 por mm0, mm1 688 por mm3, mm4 689 pand mm2, mm5 690 por mm0, mm3 691 por mm0, mm2 692 movq [final1b], mm0 693 694 ;--------- 695 696 movq mm0, [final1a] 697 movq mm4, [final2a] 698 movq mm2, [final1b] 699 movq mm6, [final2b] 700 701 movq mm1, mm0 702 movq mm5, mm4 703 704 705 punpcklwd mm0, mm2 706 punpckhwd mm1, mm2 707 708 punpcklwd mm4, mm6 709 punpckhwd mm5, mm6 710 711 712%ifdef FAR_POINTER 713 movq [fs:edx], mm0 714 movq [fs:edx+8], mm1 715 push edx 716 add edx, [ebp+dstPitch] 717 movq [fs:edx], mm4 718 movq [fs:edx+8], mm5 719 pop edx 720%else 721 movq [es:edx], mm0 722 movq [es:edx+8], mm1 723 push edx 724 add edx, [ebp+dstPitch] 725 movq [es:edx], mm4 726 movq [es:edx+8], mm5 727 pop edx 728%endif 729.SKIP_PROCESS: 730 mov ecx, [ebp+deltaPtr] 731 add ecx, 8 732 mov [ebp+deltaPtr], ecx 733 add edx, 16 734 add eax, 8 735 736 pop ecx 737 sub ecx, 4 738 cmp ecx, 0 739 jg near .Loop 740 741; Restore some stuff 742 popad 743 mov esp, ebp 744 pop ebp 745 emms 746 ret 747 748 749;------------------------------------------------------------------------- 750;------------------------------------------------------------------------- 751;------------------------------------------------------------------------- 752;------------------------------------------------------------------------- 753;------------------------------------------------------------------------- 754;------------------------------------------------------------------------- 755;------------------------------------------------------------------------- 756 757 758 759 760 761NEWSYM _2xSaISuperEagleLine 762; Store some stuff 763 push ebp 764 mov ebp, esp 765 pushad 766 767; Prepare the destination 768%ifdef __DJGPP__ 769 ; Set the selector 770 mov eax, [ebp+dstSegment] 771 mov fs, ax 772%endif 773 mov edx, [ebp+dstOffset] ; edx points to the screen 774; Prepare the source 775 ; eax points to colorA 776 mov eax, [ebp+srcPtr] 777 mov ebx, [ebp+srcPitch] 778 mov ecx, [ebp+width] 779 ; eax now points to colorB1 780 sub eax, ebx 781 782; Main Loop 783.Loop: push ecx 784 785 ;-----Check Delta------------------ 786 mov ecx, [ebp+deltaPtr] 787 788 movq mm0, [eax+colorB0] 789 movq mm1, [eax+colorB3] 790 movq mm2, [eax+ebx+color4] 791 movq mm3, [eax+ebx+colorS2] 792 movq mm4, [eax+ebx+ebx+color1] 793 movq mm5, [eax+ebx+ebx+colorS1] 794 push eax 795 add eax, ebx 796 movq mm6, [eax+ebx+ebx+colorA0] 797 movq mm7, [eax+ebx+ebx+colorA3] 798 pop eax 799 800 pcmpeqw mm0, [ecx+2+colorB0] 801 pcmpeqw mm1, [ecx+2+colorB3] 802 pcmpeqw mm2, [ecx+ebx+2+color4] 803 pcmpeqw mm3, [ecx+ebx+2+colorS2] 804 pcmpeqw mm4, [ecx+ebx+ebx+2+color1] 805 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1] 806 add ecx, ebx 807 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0] 808 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3] 809 sub ecx, ebx 810 811 812 pand mm0, mm1 813 pand mm2, mm3 814 pand mm4, mm5 815 pand mm6, mm7 816 pand mm0, mm2 817 pand mm4, mm6 818 pxor mm7, mm7 819 pand mm0, mm4 820 movq mm6, [eax+colorB0] 821 pcmpeqw mm7, mm0 822 823 movq [ecx+2+colorB0], mm6 824 825 packsswb mm7, mm7 826 movd ecx, mm7 827 test ecx, ecx 828 jz near .SKIP_PROCESS 829 830 ;End Delta 831 832 ;--------------------------------- 833 movq mm0, [eax+ebx+color5] 834 movq mm1, [eax+ebx+color6] 835 movq mm2, mm0 836 movq mm3, mm1 837 movq mm4, mm0 838 movq mm5, mm1 839 840 pand mm0, [colorMask] 841 pand mm1, [colorMask] 842 843 psrlw mm0, 1 844 psrlw mm1, 1 845 846 pand mm3, [lowPixelMask] 847 paddw mm0, mm1 848 849 pand mm3, mm2 850 paddw mm0, mm3 ;mm0 contains the interpolated values 851 movq [I56Pixel], mm0 852 movq mm7, mm0 853 854 ;------------------- 855 movq mm0, mm7 856 movq mm1, mm4 ;5,5,5,6 857 movq mm2, mm0 858 movq mm3, mm1 859 860 pand mm0, [colorMask] 861 pand mm1, [colorMask] 862 863 psrlw mm0, 1 864 psrlw mm1, 1 865 866 pand mm3, [lowPixelMask] 867 paddw mm0, mm1 868 869 pand mm3, mm2 870 paddw mm0, mm3 ;mm0 contains the interpolated values 871 movq [product1a], mm0 872 ;-------------------- 873 874 movq mm0, mm7 875 movq mm1, mm5 ;6,6,6,5 876 movq mm2, mm0 877 movq mm3, mm1 878 879 pand mm0, [colorMask] 880 pand mm1, [colorMask] 881 882 psrlw mm0, 1 883 psrlw mm1, 1 884 885 pand mm3, [lowPixelMask] 886 paddw mm0, mm1 887 888 pand mm3, mm2 889 paddw mm0, mm3 890 movq [product1b], mm0 891 892 ;------------------------- 893 ;------------------------- 894 movq mm0, [eax+ebx+ebx+color2] 895 movq mm1, [eax+ebx+ebx+color3] 896 movq mm2, mm0 897 movq mm3, mm1 898 movq mm4, mm0 899 movq mm5, mm1 900 901 pand mm0, [colorMask] 902 pand mm1, [colorMask] 903 904 psrlw mm0, 1 905 psrlw mm1, 1 906 907 pand mm3, [lowPixelMask] 908 paddw mm0, mm1 909 910 pand mm3, mm2 911 paddw mm0, mm3 912 movq [I23Pixel], mm0 913 movq mm7, mm0 914 915 ;--------------------- 916 movq mm0, mm7 917 movq mm1, mm4 ;2,2,2,3 918 movq mm2, mm0 919 movq mm3, mm1 920 921 pand mm0, [colorMask] 922 pand mm1, [colorMask] 923 924 psrlw mm0, 1 925 psrlw mm1, 1 926 927 pand mm3, [lowPixelMask] 928 paddw mm0, mm1 929 930 pand mm3, mm2 931 paddw mm0, mm3 932 movq [product2a], mm0 933 934 ;---------------------- 935 movq mm0, mm7 936 movq mm1, mm5 ;3,3,3,2 937 movq mm2, mm0 938 movq mm3, mm1 939 940 pand mm0, [colorMask] 941 pand mm1, [colorMask] 942 943 psrlw mm0, 1 944 psrlw mm1, 1 945 946 pand mm3, [lowPixelMask] 947 paddw mm0, mm1 948 949 pand mm3, mm2 950 paddw mm0, mm3 951 movq [product2b], mm0 952 953 954 ;//////////////////////////////// 955 ; Decide which "branch" to take 956 ;-------------------------------- 957 movq mm4, [eax+ebx+color5] 958 movq mm5, [eax+ebx+color6] 959 movq mm6, [eax+ebx+ebx+color3] 960 movq mm7, [eax+ebx+ebx+color2] 961 962 pxor mm3, mm3 963 movq mm0, mm4 964 movq mm1, mm5 965 966 pcmpeqw mm0, mm6 967 pcmpeqw mm1, mm7 968 pcmpeqw mm1, mm3 969 pand mm0, mm1 970 movq [Mask35], mm0 971 972 movq mm0, [eax+ebx+ebx+colorS1] 973 movq mm1, [eax+ebx+color4] 974 push eax 975 add eax, ebx 976 movq mm2, [eax+ebx+ebx+colorA2] 977 pop eax 978 movq mm3, [eax+colorB1] 979 pcmpeqw mm0, mm4 980 pcmpeqw mm1, mm4 981 pcmpeqw mm2, mm4 982 pcmpeqw mm3, mm4 983 pand mm0, mm1 984 pand mm2, mm3 985 por mm0, mm2 986 pand mm0, [Mask35] 987 movq [Mask35b], mm0 988 989 ;----------- 990 pxor mm3, mm3 991 movq mm0, mm4 992 movq mm1, mm5 993 994 pcmpeqw mm0, mm6 995 pcmpeqw mm1, mm7 996 pcmpeqw mm0, mm3 997 pand mm0, mm1 998 movq [Mask26], mm0 999 1000 movq mm0, [eax+ebx+ebx+color1] 1001 movq mm1, [eax+ebx+colorS2] 1002 push eax 1003 add eax, ebx 1004 movq mm2, [eax+ebx+ebx+colorA1] 1005 pop eax 1006 movq mm3, [eax+colorB2] 1007 pcmpeqw mm0, mm5 1008 pcmpeqw mm1, mm5 1009 pcmpeqw mm2, mm5 1010 pcmpeqw mm3, mm5 1011 pand mm0, mm1 1012 pand mm2, mm3 1013 por mm0, mm2 1014 pand mm0, [Mask26] 1015 movq [Mask26b], mm0 1016 1017 ;-------------------- 1018 movq mm0, mm4 1019 movq mm1, mm5 1020 movq mm2, mm0 1021 1022 pcmpeqw mm2, mm1 1023 pcmpeqw mm0, mm6 1024 pcmpeqw mm1, mm7 1025 pand mm0, mm1 1026 pand mm2, mm0 1027 pxor mm0, mm2 1028 movq mm7, mm0 1029 1030 ;------------------ 1031 packsswb mm7, mm7 1032 movd ecx, mm7 1033 test ecx, ecx 1034 jz near .SKIP_GUESS 1035 1036;--------------------------------------------- 1037; Map of the pixels: I|E F|J 1038; G|A B|K 1039; H|C D|L 1040; M|N O|P 1041 movq mm6, mm0 1042 movq mm4, [eax+ebx+color5] 1043 movq mm5, [eax+ebx+color6] 1044 pxor mm7, mm7 1045 pand mm6, [ONE] 1046 1047 movq mm0, [eax+colorB1] 1048 movq mm1, [eax+ebx+color4] 1049 movq mm2, mm0 1050 movq mm3, mm1 1051 pcmpeqw mm0, mm4 1052 pcmpeqw mm1, mm4 1053 pcmpeqw mm2, mm5 1054 pcmpeqw mm3, mm5 1055 pand mm0, mm6 1056 pand mm1, mm6 1057 pand mm2, mm6 1058 pand mm3, mm6 1059 paddw mm0, mm1 1060 paddw mm2, mm3 1061 1062 pxor mm3, mm3 1063 pcmpgtw mm0, mm6 1064 pcmpgtw mm2, mm6 1065 pcmpeqw mm0, mm3 1066 pcmpeqw mm2, mm3 1067 pand mm0, mm6 1068 pand mm2, mm6 1069 paddw mm7, mm0 1070 psubw mm7, mm2 1071 1072 movq mm0, [eax+colorB2] 1073 movq mm1, [eax+ebx+colorS2] 1074 movq mm2, mm0 1075 movq mm3, mm1 1076 pcmpeqw mm0, mm4 1077 pcmpeqw mm1, mm4 1078 pcmpeqw mm2, mm5 1079 pcmpeqw mm3, mm5 1080 pand mm0, mm6 1081 pand mm1, mm6 1082 pand mm2, mm6 1083 pand mm3, mm6 1084 paddw mm0, mm1 1085 paddw mm2, mm3 1086 1087 pxor mm3, mm3 1088 pcmpgtw mm0, mm6 1089 pcmpgtw mm2, mm6 1090 pcmpeqw mm0, mm3 1091 pcmpeqw mm2, mm3 1092 pand mm0, mm6 1093 pand mm2, mm6 1094 paddw mm7, mm0 1095 psubw mm7, mm2 1096 1097 push eax 1098 add eax, ebx 1099 movq mm0, [eax+ebx+color1] 1100 movq mm1, [eax+ebx+ebx+colorA1] 1101 movq mm2, mm0 1102 movq mm3, mm1 1103 pcmpeqw mm0, mm4 1104 pcmpeqw mm1, mm4 1105 pcmpeqw mm2, mm5 1106 pcmpeqw mm3, mm5 1107 pand mm0, mm6 1108 pand mm1, mm6 1109 pand mm2, mm6 1110 pand mm3, mm6 1111 paddw mm0, mm1 1112 paddw mm2, mm3 1113 1114 pxor mm3, mm3 1115 pcmpgtw mm0, mm6 1116 pcmpgtw mm2, mm6 1117 pcmpeqw mm0, mm3 1118 pcmpeqw mm2, mm3 1119 pand mm0, mm6 1120 pand mm2, mm6 1121 paddw mm7, mm0 1122 psubw mm7, mm2 1123 1124 movq mm0, [eax+ebx+colorS1] 1125 movq mm1, [eax+ebx+ebx+colorA2] 1126 movq mm2, mm0 1127 movq mm3, mm1 1128 pcmpeqw mm0, mm4 1129 pcmpeqw mm1, mm4 1130 pcmpeqw mm2, mm5 1131 pcmpeqw mm3, mm5 1132 pand mm0, mm6 1133 pand mm1, mm6 1134 pand mm2, mm6 1135 pand mm3, mm6 1136 paddw mm0, mm1 1137 paddw mm2, mm3 1138 1139 pxor mm3, mm3 1140 pcmpgtw mm0, mm6 1141 pcmpgtw mm2, mm6 1142 pcmpeqw mm0, mm3 1143 pcmpeqw mm2, mm3 1144 pand mm0, mm6 1145 pand mm2, mm6 1146 paddw mm7, mm0 1147 psubw mm7, mm2 1148 1149 pop eax 1150 movq mm1, mm7 1151 pxor mm0, mm0 1152 pcmpgtw mm7, mm0 1153 pcmpgtw mm0, mm1 1154 1155 por mm7, [Mask35] 1156 por mm1, [Mask26] 1157 movq [Mask35], mm7 1158 movq [Mask26], mm1 1159 1160.SKIP_GUESS: 1161 ;Start the ASSEMBLY !!! 1162 1163 movq mm4, [Mask35] 1164 movq mm5, [Mask26] 1165 movq mm6, [Mask35b] 1166 movq mm7, [Mask26b] 1167 1168 movq mm0, [eax+ebx+color5] 1169 movq mm1, [eax+ebx+color6] 1170 movq mm2, [eax+ebx+ebx+color2] 1171 movq mm3, [eax+ebx+ebx+color3] 1172 pcmpeqw mm0, mm2 1173 pcmpeqw mm1, mm3 1174 movq mm2, mm4 1175 movq mm3, mm5 1176 por mm0, mm1 1177 por mm2, mm3 1178 pand mm2, mm0 1179 pxor mm0, mm2 1180 movq mm3, mm0 1181 1182 movq mm2, mm0 1183 pxor mm0, mm0 1184 por mm2, mm4 1185 pxor mm4, mm6 1186 por mm2, mm5 1187 pxor mm5, mm7 1188 pcmpeqw mm2, mm0 1189 ;---------------- 1190 1191 movq mm0, [eax+ebx+color5] 1192 movq mm1, mm3 1193 por mm1, mm4 1194 por mm1, mm6 1195 pand mm0, mm1 1196 movq mm1, mm5 1197 pand mm1, [I56Pixel] 1198 por mm0, mm1 1199 movq mm1, mm7 1200 pand mm1, [product1b] 1201 por mm0, mm1 1202 movq mm1, mm2 1203 pand mm1, [product1a] 1204 por mm0, mm1 1205 movq [final1a], mm0 1206 1207 movq mm0, [eax+ebx+color6] 1208 movq mm1, mm3 1209 por mm1, mm5 1210 por mm1, mm7 1211 pand mm0, mm1 1212 movq mm1, mm4 1213 pand mm1, [I56Pixel] 1214 por mm0, mm1 1215 movq mm1, mm6 1216 pand mm1, [product1a] 1217 por mm0, mm1 1218 movq mm1, mm2 1219 pand mm1, [product1b] 1220 por mm0, mm1 1221 movq [final1b], mm0 1222 1223 movq mm0, [eax+ebx+ebx+color2] 1224 movq mm1, mm3 1225 por mm1, mm5 1226 por mm1, mm7 1227 pand mm0, mm1 1228 movq mm1, mm4 1229 pand mm1, [I23Pixel] 1230 por mm0, mm1 1231 movq mm1, mm6 1232 pand mm1, [product2b] 1233 por mm0, mm1 1234 movq mm1, mm2 1235 pand mm1, [product2a] 1236 por mm0, mm1 1237 movq [final2a], mm0 1238 1239 movq mm0, [eax+ebx+ebx+color3] 1240 movq mm1, mm3 1241 por mm1, mm4 1242 por mm1, mm6 1243 pand mm0, mm1 1244 movq mm1, mm5 1245 pand mm1, [I23Pixel] 1246 por mm0, mm1 1247 movq mm1, mm7 1248 pand mm1, [product2a] 1249 por mm0, mm1 1250 movq mm1, mm2 1251 pand mm1, [product2b] 1252 por mm0, mm1 1253 movq [final2b], mm0 1254 1255 1256 movq mm0, [final1a] 1257 movq mm2, [final1b] 1258 movq mm1, mm0 1259 movq mm4, [final2a] 1260 movq mm6, [final2b] 1261 movq mm5, mm4 1262 punpcklwd mm0, mm2 1263 punpckhwd mm1, mm2 1264 punpcklwd mm4, mm6 1265 punpckhwd mm5, mm6 1266 1267 1268 1269 1270%ifdef __DJGPP__ 1271 movq [fs:edx], mm0 1272 movq [fs:edx+8], mm1 1273 push edx 1274 add edx, [ebp+dstPitch] 1275 movq [fs:edx], mm4 1276 movq [fs:edx+8], mm5 1277 pop edx 1278%else 1279 movq [es:edx], mm0 1280 movq [es:edx+8], mm1 1281 push edx 1282 add edx, [ebp+dstPitch] 1283 movq [es:edx], mm4 1284 movq [es:edx+8], mm5 1285 pop edx 1286%endif 1287.SKIP_PROCESS: 1288 mov ecx, [ebp+deltaPtr] 1289 add ecx, 8 1290 mov [ebp+deltaPtr], ecx 1291 add edx, 16 1292 add eax, 8 1293 1294 pop ecx 1295 sub ecx, 4 1296 cmp ecx, 0 1297 jg near .Loop 1298 1299; Restore some stuff 1300 popad 1301 mov esp, ebp 1302 pop ebp 1303 emms 1304 ret 1305 1306 1307;------------------------------------------------------------------------- 1308;------------------------------------------------------------------------- 1309;------------------------------------------------------------------------- 1310;------------------------------------------------------------------------- 1311;------------------------------------------------------------------------- 1312;------------------------------------------------------------------------- 1313;------------------------------------------------------------------------- 1314 1315 1316;This is 2xSaI 1317colorI equ -2 1318colorE equ 0 1319colorF equ 2 1320colorJ equ 4 1321 1322colorG equ -2 1323colorA equ 0 1324colorB equ 2 1325colorK equ 4 1326 1327colorH equ -2 1328colorC equ 0 1329colorD equ 2 1330colorL equ 4 1331 1332colorM equ -2 1333colorN equ 0 1334colorO equ 2 1335colorP equ 4 1336 1337NEWSYM _2xSaILine 1338; Store some stuff 1339 push ebp 1340 mov ebp, esp 1341 pushad 1342 1343; Prepare the destination 1344%ifdef __DJGPP__ 1345 ; Set the selector 1346 mov eax, [ebp+dstSegment] 1347 mov fs, ax 1348%endif 1349 mov edx, [ebp+dstOffset] ; edx points to the screen 1350; Prepare the source 1351 ; eax points to colorA 1352 mov eax, [ebp+srcPtr] 1353 mov ebx, [ebp+srcPitch] 1354 mov ecx, [ebp+width] 1355 ; eax now points to colorE 1356 sub eax, ebx 1357 1358 1359; Main Loop 1360.Loop: push ecx 1361 1362 ;-----Check Delta------------------ 1363 mov ecx, [ebp+deltaPtr] 1364 1365 movq mm0, [eax+colorI] 1366 movq mm1, [eax+colorJ] 1367 movq mm2, [eax+ebx+colorG] 1368 movq mm3, [eax+ebx+colorK] 1369 movq mm4, [eax+ebx+ebx+colorH] 1370 movq mm5, [eax+ebx+ebx+colorL] 1371 push eax 1372 add eax, ebx 1373 movq mm6, [eax+ebx+ebx+colorM] 1374 movq mm7, [eax+ebx+ebx+colorP] 1375 pop eax 1376 1377 pcmpeqw mm0, [ecx+2+colorI] 1378 pcmpeqw mm1, [ecx+2+colorK] 1379 pcmpeqw mm2, [ecx+ebx+2+colorG] 1380 pcmpeqw mm3, [ecx+ebx+2+colorK] 1381 pcmpeqw mm4, [ecx+ebx+ebx+2+colorH] 1382 pcmpeqw mm5, [ecx+ebx+ebx+2+colorL] 1383 add ecx, ebx 1384 pcmpeqw mm6, [ecx+ebx+ebx+2+colorM] 1385 pcmpeqw mm7, [ecx+ebx+ebx+2+colorP] 1386 sub ecx, ebx 1387 1388 1389 pand mm0, mm1 1390 pand mm2, mm3 1391 pand mm4, mm5 1392 pand mm6, mm7 1393 pand mm0, mm2 1394 pand mm4, mm6 1395 pxor mm7, mm7 1396 pand mm0, mm4 1397 movq mm6, [eax+colorI] 1398 pcmpeqw mm7, mm0 1399 1400 movq [ecx+2+colorI], mm6 1401 1402 packsswb mm7, mm7 1403 movd ecx, mm7 1404 test ecx, ecx 1405 jz near .SKIP_PROCESS 1406 1407 ;End Delta 1408 1409 ;--------------------------------- 1410 1411 1412;1 1413 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL) 1414 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA 1415 movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB 1416 1417 movq mm1, mm0 1418 movq mm3, mm2 1419 1420 pcmpeqw mm0, [eax+ebx+ebx+colorD] 1421 pcmpeqw mm1, [eax+colorE] 1422 pcmpeqw mm2, [eax+ebx+ebx+colorL] 1423 pcmpeqw mm3, [eax+ebx+ebx+colorC] 1424 1425 pand mm0, mm1 1426 pxor mm1, mm1 1427 pand mm0, mm2 1428 pcmpeqw mm3, mm1 1429 pand mm0, mm3 ;result in mm0 1430 1431 ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ) 1432 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA 1433 movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB 1434 movq mm5, mm4 1435 movq mm7, mm6 1436 1437 pcmpeqw mm4, [eax+ebx+ebx+colorC] 1438 pcmpeqw mm5, [eax+colorF] 1439 pcmpeqw mm6, [eax+colorJ] 1440 pcmpeqw mm7, [eax+colorE] 1441 1442 pand mm4, mm5 1443 pxor mm5, mm5 1444 pand mm4, mm6 1445 pcmpeqw mm7, mm5 1446 pand mm4, mm7 ;result in mm4 1447 1448 por mm0, mm4 ;combine the masks 1449 movq [Mask1], mm0 1450 1451 ;-------------------------------------------- 1452 1453;2 1454 ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH) 1455 movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB 1456 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA 1457 movq mm1, mm0 1458 movq mm3, mm2 1459 1460 pcmpeqw mm0, [eax+ebx+ebx+colorC] 1461 pcmpeqw mm1, [eax+colorF] 1462 pcmpeqw mm2, [eax+ebx+ebx+colorH] 1463 pcmpeqw mm3, [eax+ebx+ebx+colorD] 1464 1465 pand mm0, mm1 1466 pxor mm1, mm1 1467 pand mm0, mm2 1468 pcmpeqw mm3, mm1 1469 pand mm0, mm3 ;result in mm0 1470 1471 ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI) 1472 movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB 1473 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA 1474 movq mm5, mm4 1475 movq mm7, mm6 1476 1477 pcmpeqw mm4, [eax+ebx+ebx+colorD] 1478 pcmpeqw mm5, [eax+colorE] 1479 pcmpeqw mm6, [eax+colorI] 1480 pcmpeqw mm7, [eax+colorF] 1481 1482 pand mm4, mm5 1483 pxor mm5, mm5 1484 pand mm4, mm6 1485 pcmpeqw mm7, mm5 1486 pand mm4, mm7 ;result in mm4 1487 1488 por mm0, mm4 ;combine the masks 1489 movq [Mask2], mm0 1490 1491 1492;interpolate colorA and colorB 1493 movq mm0, [eax+ebx+colorA] 1494 movq mm1, [eax+ebx+colorB] 1495 1496 movq mm2, mm0 1497 movq mm3, mm1 1498 1499 pand mm0, [colorMask] 1500 pand mm1, [colorMask] 1501 1502 psrlw mm0, 1 1503 psrlw mm1, 1 1504 1505 pand mm3, [lowPixelMask] 1506 paddw mm0, mm1 1507 1508 pand mm3, mm2 1509 paddw mm0, mm3 ;mm0 contains the interpolated values 1510 1511 ;assemble the pixels 1512 movq mm1, [eax+ebx+colorA] 1513 movq mm2, [eax+ebx+colorB] 1514 1515 movq mm3, [Mask1] 1516 movq mm5, mm1 1517 movq mm4, [Mask2] 1518 movq mm6, mm1 1519 1520 pand mm1, mm3 1521 por mm3, mm4 1522 pxor mm7, mm7 1523 pand mm2, mm4 1524 1525 pcmpeqw mm3, mm7 1526 por mm1, mm2 1527 pand mm0, mm3 1528 1529 por mm0, mm1 1530 1531 punpcklwd mm5, mm0 1532 punpckhwd mm6, mm0 1533 1534%ifdef __DJGPP__ 1535 movq [fs:edx], mm5 1536 movq [fs:edx+8], mm6 1537%else 1538 movq [es:edx], mm5 1539 movq [es:edx+8], mm6 1540%endif 1541 1542;------------------------------------------------ 1543; Create the Nextline 1544;------------------------------------------------ 1545;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO) 1546 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA 1547 movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC 1548 movq mm1, mm0 1549 movq mm3, mm2 1550 1551 push eax 1552 add eax, ebx 1553 pcmpeqw mm0, [eax+ebx+colorD] 1554 pcmpeqw mm1, [eax+colorG] 1555 pcmpeqw mm2, [eax+ebx+ebx+colorO] 1556 pcmpeqw mm3, [eax+colorB] 1557 pop eax 1558 1559 pand mm0, mm1 1560 pxor mm1, mm1 1561 pand mm0, mm2 1562 pcmpeqw mm3, mm1 1563 pand mm0, mm3 ;result in mm0 1564 1565 ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM) 1566 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA 1567 movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC 1568 movq mm5, mm4 1569 movq mm7, mm6 1570 1571 push eax 1572 add eax, ebx 1573 pcmpeqw mm4, [eax+ebx+colorH] 1574 pcmpeqw mm5, [eax+colorB] 1575 pcmpeqw mm6, [eax+ebx+ebx+colorM] 1576 pcmpeqw mm7, [eax+colorG] 1577 pop eax 1578 1579 pand mm4, mm5 1580 pxor mm5, mm5 1581 pand mm4, mm6 1582 pcmpeqw mm7, mm5 1583 pand mm4, mm7 ;result in mm4 1584 1585 por mm0, mm4 ;combine the masks 1586 movq [Mask1], mm0 1587 ;-------------------------------------------- 1588 1589;4 1590 ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF) 1591 movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC 1592 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA 1593 movq mm1, mm0 1594 movq mm3, mm2 1595 1596 pcmpeqw mm0, [eax+ebx+colorB] 1597 pcmpeqw mm1, [eax+ebx+ebx+colorH] 1598 pcmpeqw mm2, [eax+colorF] 1599 pcmpeqw mm3, [eax+ebx+ebx+colorD] 1600 1601 pand mm0, mm1 1602 pxor mm1, mm1 1603 pand mm0, mm2 1604 pcmpeqw mm3, mm1 1605 pand mm0, mm3 ;result in mm0 1606 1607 ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI) 1608 movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC 1609 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA 1610 movq mm5, mm4 1611 movq mm7, mm6 1612 1613 pcmpeqw mm4, [eax+ebx+ebx+colorD] 1614 pcmpeqw mm5, [eax+ebx+colorG] 1615 pcmpeqw mm6, [eax+colorI] 1616 pcmpeqw mm7, [eax+ebx+ebx+colorH] 1617 1618 pand mm4, mm5 1619 pxor mm5, mm5 1620 pand mm4, mm6 1621 pcmpeqw mm7, mm5 1622 pand mm4, mm7 ;result in mm4 1623 1624 por mm0, mm4 ;combine the masks 1625 movq [Mask2], mm0 1626 ;---------------------------------------------- 1627 1628;interpolate colorA and colorC 1629 movq mm0, [eax+ebx+colorA] 1630 movq mm1, [eax+ebx+ebx+colorC] 1631 1632 movq mm2, mm0 1633 movq mm3, mm1 1634 1635 pand mm0, [colorMask] 1636 pand mm1, [colorMask] 1637 1638 psrlw mm0, 1 1639 psrlw mm1, 1 1640 1641 pand mm3, [lowPixelMask] 1642 paddw mm0, mm1 1643 1644 pand mm3, mm2 1645 paddw mm0, mm3 ;mm0 contains the interpolated values 1646 ;------------- 1647 1648 ;assemble the pixels 1649 movq mm1, [eax+ebx+colorA] 1650 movq mm2, [eax+ebx+ebx+colorC] 1651 1652 movq mm3, [Mask1] 1653 movq mm4, [Mask2] 1654 1655 pand mm1, mm3 1656 pand mm2, mm4 1657 1658 por mm3, mm4 1659 pxor mm7, mm7 1660 por mm1, mm2 1661 1662 pcmpeqw mm3, mm7 1663 pand mm0, mm3 1664 por mm0, mm1 1665 movq [ACPixel], mm0 1666 1667;//////////////////////////////// 1668; Decide which "branch" to take 1669;-------------------------------- 1670 movq mm0, [eax+ebx+colorA] 1671 movq mm1, [eax+ebx+colorB] 1672 movq mm6, mm0 1673 movq mm7, mm1 1674 pcmpeqw mm0, [eax+ebx+ebx+colorD] 1675 pcmpeqw mm1, [eax+ebx+ebx+colorC] 1676 pcmpeqw mm6, mm7 1677 1678 movq mm2, mm0 1679 movq mm3, mm0 1680 1681 pand mm0, mm1 ;colorA == colorD && colorB == colorC 1682 pxor mm7, mm7 1683 1684 pcmpeqw mm2, mm7 1685 pand mm6, mm0 1686 pand mm2, mm1 ;colorA != colorD && colorB == colorC 1687 1688 pcmpeqw mm1, mm7 1689 1690 pand mm1, mm3 ;colorA == colorD && colorB != colorC 1691 pxor mm0, mm6 1692 por mm1, mm6 1693 movq mm7, mm0 1694 movq [Mask2], mm2 1695 packsswb mm7, mm7 1696 movq [Mask1], mm1 1697 1698 movd ecx, mm7 1699 test ecx, ecx 1700 jz near .SKIP_GUESS 1701;--------------------------------------------- 1702; Map of the pixels: I|E F|J 1703; G|A B|K 1704; H|C D|L 1705; M|N O|P 1706 movq mm6, mm0 1707 movq mm4, [eax+ebx+colorA] 1708 movq mm5, [eax+ebx+colorB] 1709 pxor mm7, mm7 1710 pand mm6, [ONE] 1711 1712 movq mm0, [eax+colorE] 1713 movq mm1, [eax+ebx+colorG] 1714 movq mm2, mm0 1715 movq mm3, mm1 1716 pcmpeqw mm0, mm4 1717 pcmpeqw mm1, mm4 1718 pcmpeqw mm2, mm5 1719 pcmpeqw mm3, mm5 1720 pand mm0, mm6 1721 pand mm1, mm6 1722 pand mm2, mm6 1723 pand mm3, mm6 1724 paddw mm0, mm1 1725 paddw mm2, mm3 1726 1727 pxor mm3, mm3 1728 pcmpgtw mm0, mm6 1729 pcmpgtw mm2, mm6 1730 pcmpeqw mm0, mm3 1731 pcmpeqw mm2, mm3 1732 pand mm0, mm6 1733 pand mm2, mm6 1734 paddw mm7, mm0 1735 psubw mm7, mm2 1736 1737 movq mm0, [eax+colorF] 1738 movq mm1, [eax+ebx+colorK] 1739 movq mm2, mm0 1740 movq mm3, mm1 1741 pcmpeqw mm0, mm4 1742 pcmpeqw mm1, mm4 1743 pcmpeqw mm2, mm5 1744 pcmpeqw mm3, mm5 1745 pand mm0, mm6 1746 pand mm1, mm6 1747 pand mm2, mm6 1748 pand mm3, mm6 1749 paddw mm0, mm1 1750 paddw mm2, mm3 1751 1752 pxor mm3, mm3 1753 pcmpgtw mm0, mm6 1754 pcmpgtw mm2, mm6 1755 pcmpeqw mm0, mm3 1756 pcmpeqw mm2, mm3 1757 pand mm0, mm6 1758 pand mm2, mm6 1759 paddw mm7, mm0 1760 psubw mm7, mm2 1761 1762 push eax 1763 add eax, ebx 1764 movq mm0, [eax+ebx+colorH] 1765 movq mm1, [eax+ebx+ebx+colorN] 1766 movq mm2, mm0 1767 movq mm3, mm1 1768 pcmpeqw mm0, mm4 1769 pcmpeqw mm1, mm4 1770 pcmpeqw mm2, mm5 1771 pcmpeqw mm3, mm5 1772 pand mm0, mm6 1773 pand mm1, mm6 1774 pand mm2, mm6 1775 pand mm3, mm6 1776 paddw mm0, mm1 1777 paddw mm2, mm3 1778 1779 pxor mm3, mm3 1780 pcmpgtw mm0, mm6 1781 pcmpgtw mm2, mm6 1782 pcmpeqw mm0, mm3 1783 pcmpeqw mm2, mm3 1784 pand mm0, mm6 1785 pand mm2, mm6 1786 paddw mm7, mm0 1787 psubw mm7, mm2 1788 1789 movq mm0, [eax+ebx+colorL] 1790 movq mm1, [eax+ebx+ebx+colorO] 1791 movq mm2, mm0 1792 movq mm3, mm1 1793 pcmpeqw mm0, mm4 1794 pcmpeqw mm1, mm4 1795 pcmpeqw mm2, mm5 1796 pcmpeqw mm3, mm5 1797 pand mm0, mm6 1798 pand mm1, mm6 1799 pand mm2, mm6 1800 pand mm3, mm6 1801 paddw mm0, mm1 1802 paddw mm2, mm3 1803 1804 pxor mm3, mm3 1805 pcmpgtw mm0, mm6 1806 pcmpgtw mm2, mm6 1807 pcmpeqw mm0, mm3 1808 pcmpeqw mm2, mm3 1809 pand mm0, mm6 1810 pand mm2, mm6 1811 paddw mm7, mm0 1812 psubw mm7, mm2 1813 1814 pop eax 1815 movq mm1, mm7 1816 pxor mm0, mm0 1817 pcmpgtw mm7, mm0 1818 pcmpgtw mm0, mm1 1819 1820 por mm7, [Mask1] 1821 por mm1, [Mask2] 1822 movq [Mask1], mm7 1823 movq [Mask2], mm1 1824 1825.SKIP_GUESS: 1826 ;---------------------------- 1827 ;interpolate A, B, C and D 1828 movq mm0, [eax+ebx+colorA] 1829 movq mm1, [eax+ebx+colorB] 1830 movq mm4, mm0 1831 movq mm2, [eax+ebx+ebx+colorC] 1832 movq mm5, mm1 1833 movq mm3, [qcolorMask] 1834 movq mm6, mm2 1835 movq mm7, [qlowpixelMask] 1836 1837 pand mm0, mm3 1838 pand mm1, mm3 1839 pand mm2, mm3 1840 pand mm3, [eax+ebx+ebx+colorD] 1841 1842 psrlw mm0, 2 1843 pand mm4, mm7 1844 psrlw mm1, 2 1845 pand mm5, mm7 1846 psrlw mm2, 2 1847 pand mm6, mm7 1848 psrlw mm3, 2 1849 pand mm7, [eax+ebx+ebx+colorD] 1850 1851 paddw mm0, mm1 1852 paddw mm2, mm3 1853 1854 paddw mm4, mm5 1855 paddw mm6, mm7 1856 1857 paddw mm4, mm6 1858 paddw mm0, mm2 1859 psrlw mm4, 2 1860 pand mm4, [qlowpixelMask] 1861 paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D 1862 1863;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ 1864 ;assemble the pixels 1865 movq mm1, [Mask1] 1866 movq mm2, [Mask2] 1867 movq mm4, [eax+ebx+colorA] 1868 movq mm5, [eax+ebx+colorB] 1869 pand mm4, mm1 1870 pand mm5, mm2 1871 1872 pxor mm7, mm7 1873 por mm1, mm2 1874 por mm4, mm5 1875 pcmpeqw mm1, mm7 1876 pand mm0, mm1 1877 por mm4, mm0 ;mm4 contains the diagonal pixels 1878 1879 movq mm0, [ACPixel] 1880 movq mm1, mm0 1881 punpcklwd mm0, mm4 1882 punpckhwd mm1, mm4 1883 1884 push edx 1885 add edx, [ebp+dstPitch] 1886 1887%ifdef __DJGPP__ 1888 movq [fs:edx], mm0 1889 movq [fs:edx+8], mm1 1890%else 1891 movq [es:edx], mm0 1892 movq [es:edx+8], mm1 1893%endif 1894 pop edx 1895 1896.SKIP_PROCESS: 1897 mov ecx, [ebp+deltaPtr] 1898 add ecx, 8 1899 mov [ebp+deltaPtr], ecx 1900 add edx, 16 1901 add eax, 8 1902 1903 pop ecx 1904 sub ecx, 4 1905 cmp ecx, 0 1906 jg near .Loop 1907 1908; Restore some stuff 1909 popad 1910 mov esp, ebp 1911 pop ebp 1912 emms 1913 ret 1914 1915;------------------------------------------------------------------------- 1916;------------------------------------------------------------------------- 1917;------------------------------------------------------------------------- 1918;------------------------------------------------------------------------- 1919;------------------------------------------------------------------------- 1920;------------------------------------------------------------------------- 1921;------------------------------------------------------------------------- 1922 1923NEWSYM _Init_2xSaIMMX 1924; Store some stuff 1925 push ebp 1926 mov ebp, esp 1927 push edx 1928 1929 1930;Damn thing doesn't work 1931; mov eax,1 1932; cpuid 1933; test edx, 0x00800000 ;test bit 23 1934; jz end2 ;bit not set => no MMX detected 1935 1936 mov eax, [ebp+8] ;PixelFormat 1937 cmp eax, 555 1938 jz Bits555 1939 cmp eax, 565 1940 jz Bits565 1941end2: 1942 mov eax, 1 1943 jmp end 1944Bits555: 1945 mov edx, 0x7BDE7BDE 1946 mov eax, colorMask 1947 mov [eax], edx 1948 mov [eax+4], edx 1949 mov edx, 0x04210421 1950 mov eax, lowPixelMask 1951 mov [eax], edx 1952 mov [eax+4], edx 1953 mov edx, 0x739C739C 1954 mov eax, qcolorMask 1955 mov [eax], edx 1956 mov [eax+4], edx 1957 mov edx, 0x0C630C63 1958 mov eax, qlowpixelMask 1959 mov [eax], edx 1960 mov [eax+4], edx 1961 mov eax, 0 1962 jmp end 1963Bits565: 1964 mov edx, 0xF7DEF7DE 1965 mov eax, colorMask 1966 mov [eax], edx 1967 mov [eax+4], edx 1968 mov edx, 0x08210821 1969 mov eax, lowPixelMask 1970 mov [eax], edx 1971 mov [eax+4], edx 1972 mov edx, 0xE79CE79C 1973 mov eax, qcolorMask 1974 mov [eax], edx 1975 mov [eax+4], edx 1976 mov edx, 0x18631863 1977 mov eax, qlowpixelMask 1978 mov [eax], edx 1979 mov [eax+4], edx 1980 mov eax, 0 1981 jmp end 1982end: 1983 pop edx 1984 mov esp, ebp 1985 pop ebp 1986 ret 1987 1988 1989;------------------------------------------------------------------------- 1990;------------------------------------------------------------------------- 1991;------------------------------------------------------------------------- 1992;------------------------------------------------------------------------- 1993;------------------------------------------------------------------------- 1994;------------------------------------------------------------------------- 1995;------------------------------------------------------------------------- 1996 1997SECTION .data 1998;Some constants 1999colorMask dd 0xF7DEF7DE, 0xF7DEF7DE 2000lowPixelMask dd 0x08210821, 0x08210821 2001 2002qcolorMask dd 0xE79CE79C, 0xE79CE79C 2003qlowpixelMask dd 0x18631863, 0x18631863 2004 2005FALSE dd 0x00000000, 0x00000000 2006TRUE dd 0xffffffff, 0xffffffff 2007ONE dd 0x00010001, 0x00010001 2008 2009 2010SECTION .bss 2011ACPixel resb 8 2012Mask1 resb 8 2013Mask2 resb 8 2014 2015I56Pixel resb 8 2016I23Pixel resb 8 2017I5556Pixel resb 8 2018I2223Pixel resb 8 2019I5666Pixel resb 8 2020I2333Pixel resb 8 2021Mask26 resb 8 2022Mask35 resb 8 2023Mask26b resb 8 2024Mask35b resb 8 2025product1a resb 8 2026product1b resb 8 2027product2a resb 8 2028product2b resb 8 2029final1a resb 8 2030final1b resb 8 2031final2a resb 8 2032final2b resb 8 2033