1; $Id: tmap_per.asm,v 1.3 2003/02/18 20:15:48 btb Exp $ 2;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX 3;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO 4;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A 5;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS 6;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS 7;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE 8;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE 9;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS 10;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE. 11;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED. 12; 13; 14; Perspective texture mapper inner loop. 15; 16; Old Log: 17; Revision 1.26 1995/02/20 18:22:55 john 18; Put all the externs in the assembly modules into tmap_inc.asm. 19; Also, moved all the C versions of the inner loops into a new module, 20; scanline.c. 21; 22; Revision 1.25 1995/02/20 17:09:08 john 23; Added code so that you can build the tmapper with no assembly! 24; 25; Revision 1.24 1995/01/10 09:32:07 mike 26; mostly fix garbage at end of scanline, but slow down by 1-4%. 27; 28; Revision 1.23 1994/12/02 23:29:57 mike 29; optimizations. 30; 31; Revision 1.22 1994/11/30 00:57:00 mike 32; optimization. 33; 34; Revision 1.21 1994/11/21 13:57:42 mike 35; fix right side shear bug 36; 37; Revision 1.20 1994/11/12 16:41:09 mike 38; jae -> ja. 39; 40; Revision 1.19 1994/10/27 19:40:00 john 41; Made lighting table lookup be _gr_fade_table[eax] instead 42; of fs:[eax], which gets rig of a segment override that 43; supposedly costs 1 clock on a 486. Mainly, I wanted to verify 44; that the only reason we need selectors is for the source texture 45; data . 46; 47; Revision 1.18 1994/05/03 11:08:32 mike 48; Trap divide overflows. 49; 50; Revision 1.17 1994/04/21 15:03:41 mike 51; make faster. 52; 53; Revision 1.16 1994/04/08 16:46:57 john 54; Made 32 fade levels. Hacked. 55; 56; Revision 1.15 1994/03/31 08:35:18 mike 57; Fix quantized-by-4 bug in inner loop. 58; 59; Revision 1.14 1994/03/14 17:41:14 mike 60; Fix bug in unlighted version. 61; 62; Revision 1.13 1994/03/14 15:45:14 mike 63; streamline code. 64; 65; Revision 1.12 1994/01/14 14:01:58 mike 66; *** empty log message *** 67; 68; Revision 1.11 1993/12/18 14:43:44 john 69; Messed around with doing 1/z, the u*(1/z) and v*(1/z) 70; (Went from 23 fps to 21 fps... not good! ) 71; 72; Revision 1.10 1993/12/17 16:14:17 john 73; Split lighted/nonlighted, so there is no cmp lighting 74; in the inner loop. 75; 76; Revision 1.9 1993/12/17 12:34:29 john 77; Made leftover bytes use linear approx instead of correct... 78; should save about 8 divides per scanline on average. 79; Also, took out anti-aliasing code and rearranged to 80; order of some instructions to help on 486 pipelining. 81; (The anti-aliasing code did *not* look good, so I 82; figure there was no reason to keep it in. ) 83; 84; Revision 1.8 1993/12/16 18:37:52 mike 85; Align some stuff on 4 byte boundaries. 86; 87; Revision 1.7 1993/11/30 08:44:18 john 88; Made selector set check for < 64*64 bitmaps. 89; 90; Revision 1.6 1993/11/23 17:25:26 john 91; Added safety "and eax, 0fffh" in lighting lookup. 92; 93; Revision 1.5 1993/11/23 15:08:52 mike 94; Fixed lighting bug. 95; 96; Revision 1.4 1993/11/23 14:38:50 john 97; optimized NORMAL code by switching EBX and ESI, so BH can be used in 98; the lighting process. 99; 100; Revision 1.3 1993/11/23 14:30:53 john 101; Made the perspective tmapper do 1/8 divides; added lighting. 102; 103; Revision 1.2 1993/11/22 10:24:59 mike 104; *** empty log message *** 105; 106; Revision 1.1 1993/09/08 17:29:53 mike 107; Initial revision 108; 109; 110; 111 112[BITS 32] 113 114global _asm_tmap_scanline_per 115global asm_tmap_scanline_per 116 117%include "tmap_inc.asm" 118 119[SECTION .data] 120align 4 121 ;extern _per2_flag;:dword 122%ifdef __linux__ 123; Cater for linux ELF compilers... 124global x 125%define _loop_count loop_count 126%define _new_end new_end 127%define _scan_doubling_flag scan_doubling_flag 128%define _linear_if_far_flag linear_if_far_flag 129%endif 130 131 global _x 132 global _loop_count 133 global _new_end 134 global _scan_doubling_flag 135 global _linear_if_far_flag 136 137; global _max_ecx 138; global _min_ecx 139 140 mem_edx dd 0 141 x: 142 _x dd 0 143 _loop_count dd 0 144 145; _max_ecx dd 0 146; _min_ecx dd 55555555h 147 _new_end dd 1 ; if set, use new, but slower, way of finishing off extra pixels on scanline, 01/10/95 --MK 148 149 _scan_doubling_flag dd 0 150 _linear_if_far_flag dd 0 151 152;---------- local variables 153align 4 154 req_base dd 0 155 req_size dd 0 156 U0 dd 0 157 U1 dd 0 158 V0 dd 0 159 V1 dd 0 160 num_left_over dd 0 161 DU1 dd 0 162 DV1 dd 0 163 DZ1 dd 0 164 165[SECTION .text] 166 167; -------------------------------------------------------------------------------------------------- 168; Enter: 169; _xleft fixed point left x coordinate 170; _xright fixed point right x coordinate 171; _y fixed point y coordinate 172; _pixptr address of source pixel map 173; _u fixed point initial u coordinate 174; _v fixed point initial v coordinate 175; _z fixed point initial z coordinate 176; _du_dx fixed point du/dx 177; _dv_dx fixed point dv/dx 178; _dz_dx fixed point dz/dx 179 180; for (x = (int) xleft; x <= (int) xright; x++) { 181; _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63)); 182; _setpixel(x,y); 183; 184; u += du_dx; 185; v += dv_dx; 186; z += dz_dx; 187; } 188 189 190align 16 191_asm_tmap_scanline_per: 192asm_tmap_scanline_per: 193; push es 194 pusha 195 196;---------------------------- setup for loop --------------------------------- 197; Setup for loop: _loop_count iterations = (int) xright - (int) xleft 198; esi source pixel pointer = pixptr 199; edi initial row pointer = y*320+x 200; NOTE: fx_xright and fx_xleft changed from fix to int by mk on 12/01/94. 201 202; set esi = pointer to start of texture map data 203 204; set edi = address of first pixel to modify 205 mov edi,[_fx_y] 206; mov es,[_pixel_data_selector] ; selector[0*2] 207 208 mov edi,[_y_pointers+edi*4] 209 210 mov ebx,[_fx_xleft] 211 test ebx, ebx 212 jns ebx_ok 213 xor ebx, ebx 214ebx_ok: add edi,[_write_buffer] 215 add edi,ebx 216 217; set _loop_count = # of iterations 218 mov eax,[_fx_xright] 219 sub eax,ebx 220 js near _none_to_do 221 mov [_loop_count],eax 222 223; lighting values are passed in fixed point, but need to be in 8 bit integer, 8 bit fraction so we can easily 224; get the integer by reading %bh 225 sar dword [_fx_l], 8 226 sar dword [_fx_dl_dx],8 227 jns dl_dx_ok 228 inc dword [_fx_dl_dx] ; round towards 0 for negative deltas 229dl_dx_ok: 230 231; set initial values 232 mov ebx,[_fx_u] 233 mov ebp,[_fx_v] 234 mov ecx,[_fx_z] 235 236 test dword [_per2_flag],-1 237 je tmap_loop 238 239 test dword [_Lighting_on], -1 240 je near _tmap_loop_fast_nolight 241 jmp _tmap_loop_fast 242;tmap_loop_fast_nolight_jumper: 243; jmp tmap_loop_fast_nolight 244 245;================ PERSPECTIVE TEXTURE MAP INNER LOOPS ======================== 246; 247; Usage in loop: eax division, pixel value 248; ebx u 249; ecx z 250; edx division 251; ebp v 252; esi source pixel pointer 253; edi destination pixel pointer 254 255;-------------------- NORMAL PERSPECTIVE TEXTURE MAP LOOP ----------------- 256tmap_loop: 257 mov esi, ebx ; esi becomes u coordinate 258 259 align 4 260tmap_loop0: 261 262; compute v coordinate 263 mov eax, ebp ; get v 264 mov edx, eax 265 sar edx, 31 266 idiv ecx ; eax = (v/z) 267 268 and eax,3fh ; mask with height-1 269 mov ebx,eax 270 271; compute u coordinate 272 mov eax, esi ; get u 273 mov edx, eax 274 sar edx, 31 275 idiv ecx ; eax = (u/z) 276 277 shl eax,26 278 shld ebx,eax,6 ; esi = v*64+u 279 280; read 1 pixel 281 add ebx, [_pixptr] 282 xor eax, eax 283 test dword [_Lighting_on], -1 284 mov al, [ebx] ; get pixel from source bitmap 285 je NoLight1 286 287; LIGHTING CODE 288 mov ebx, [_fx_l] ; get temp copy of lighting value 289 mov ah, bh ; get lighting level 290 add ebx, [_fx_dl_dx] ; update lighting value 291 mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables 292 mov [_fx_l], ebx ; save temp copy of lighting value 293 294; transparency check 295NoLight1: cmp al,255 296 je skip1 297 298 mov [edi],al 299skip1: inc edi 300 301; update deltas 302 add ebp,[_fx_dv_dx] 303 add esi,[_fx_du_dx] 304 add ecx,[_fx_dz_dx] 305 je _div_0_abort ; would be dividing by 0, so abort 306 307 dec dword [_loop_count] 308 jns tmap_loop0 309 310_none_to_do: 311 popa 312; pop es 313 ret 314 315; We detected a z=0 condition, which seems pretty bogus, don't you think? 316; So, we abort, but maybe we want to know about it. 317_div_0_abort: 318 jmp _none_to_do 319 320;-------------------------- PER/4 TMAPPER ---------------- 321; 322; x = x1 323; U0 = u/w; V0 = v/w; 324; while ( 1 ) 325; u += du_dx*4; v+= dv_dx*4 326; U1 = u/w; V1 = v/w; 327; DUDX = (U1-U0)/4; DVDX = (V1-V0)/4; 328; 329; ; Pixel 0 330; pixels = texmap[V0*64+U0]; 331; U0 += DUDX; V0 += DVDX 332; ; Pixel 1 333; pixels = (pixels<<8)+texmap[V0*64+U0]; 334; U0 += DUDX; V0 += DVDX 335; ; Pixel 2 336; pixels = (pixels<<8)+texmap[V0*64+U0]; 337; U0 += DUDX; V0 += DVDX 338; ; Pixel 3 339; pixels = (pixels<<8)+texmap[V0*64+U0]; 340; 341; screen[x] = pixel 342; x += 4; 343; U0 = U1; V0 = V1 344 345NBITS equ 4 ; 2^NBITS pixels plotted per divide 346ZSHIFT equ 4 ; precision used in PDIV macro 347 348 349;PDIV MACRO 350; Returns EAX/ECX in 16.16 format in EAX. Trashes EDX 351; sig bits 6.3 352; mov edx,eax 353; shl eax,ZSHIFT 354; sar edx,32-ZSHIFT 355; idiv ecx ; eax = (v/z) 356; shl eax, 16-ZSHIFT 357;ENDM 358 359global _tmap_loop_fast 360 361; -------------------------------------- Start of Getting Dword Aligned ---------------------------------------------- 362; ebx fx_u 363 364_tmap_loop_fast: 365 mov esi,ebx 366 367 align 4 368NotDwordAligned1: 369 test edi, 11b 370 jz DwordAligned1 371 372; compute v coordinate 373 mov eax, ebp ; get v 374 mov edx, eax 375 sar edx, 31 376 idiv ecx ; eax = (v/z) 377 378 and eax,3fh ; mask with height-1 379 mov ebx,eax 380 381; compute u coordinate 382 mov eax, esi ; get u 383 mov edx, eax 384 sar edx, 31 385 idiv ecx ; eax = (u/z) 386 387 shl eax,26 388 shld ebx,eax,6 ; esi = v*64+u 389 390; read 1 pixel 391 add ebx,[_pixptr] 392 xor eax, eax 393 mov al, [ebx] ; get pixel from source bitmap 394 395; lighting code 396 mov ebx, [_fx_l] ; get temp copy of lighting value 397 mov ah, bh ; get lighting level 398 add ebx, [_fx_dl_dx] ; update lighting value 399 mov [_fx_l], ebx ; save temp copy of lighting value 400 401; transparency check 402 cmp al,255 403 je skip2 ; this pixel is transparent, so don't write it (or light it) 404 405 mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables 406 407; write 1 pixel 408 mov [edi],al 409skip2: inc edi 410 411; update deltas 412 add ebp,[_fx_dv_dx] 413 add esi,[_fx_du_dx] 414 add ecx,[_fx_dz_dx] 415 je _div_0_abort ; would be dividing by 0, so abort 416 417 dec dword [_loop_count] 418 jns NotDwordAligned1 419 420 jmp _none_to_do 421 422; -------------------------------------- End of Getting Dword Aligned ---------------------------------------------- 423 424DwordAligned1: 425 426 mov eax, [_loop_count] 427 mov ebx, esi ; get fx_u [pentium pipelining] 428 inc eax 429 mov esi, eax 430 and esi, (1 << NBITS) - 1 431 sar eax, NBITS 432 mov [num_left_over], esi 433 je near tmap_loop ; there are no 2^NBITS chunks, do divide/pixel for whole scanline 434 mov [_loop_count], eax ; _loop_count = pixels / NPIXS 435 436; compute initial v coordinate 437 mov eax,ebp ; get v 438 mov edx,ebp 439 shl eax,ZSHIFT 440 sar edx,32-ZSHIFT 441 idiv ecx ; eax = (v/z) 442 shl eax, 16-ZSHIFT 443 mov [V0], eax 444 445; compute initial u coordinate 446 mov eax,ebx ; get u 447 mov edx,ebx 448 shl eax,ZSHIFT 449 sar edx,32-ZSHIFT 450 idiv ecx ; eax = (v/z) 451 shl eax, 16-ZSHIFT 452 mov [U0], eax 453 454; Set deltas to NPIXS pixel increments 455 mov eax, [_fx_du_dx] 456 shl eax, NBITS 457 mov [DU1], eax 458 mov eax, [_fx_dv_dx] 459 shl eax, NBITS 460 mov [DV1], eax 461 mov eax, [_fx_dz_dx] 462 shl eax, NBITS 463 mov [DZ1], eax 464 465 align 4 466TopOfLoop4: 467 add ebx, [DU1] 468 add ebp, [DV1] 469 add ecx, [DZ1] 470 je near _div_0_abort ; would be dividing by 0, so abort 471 472; Done with ebx, ebp, ecx until next iteration 473 push ebx 474 push ecx 475 push ebp 476 push edi 477 478; Find fixed U1 479 mov eax, ebx 480 mov edx,ebx 481 shl eax,ZSHIFT 482 sar edx,32-ZSHIFT 483 idiv ecx ; eax = (v/z) 484 shl eax, 16-ZSHIFT 485 mov ebx, eax ; ebx = U1 until pop's 486 487; Find fixed V1 488 mov eax, ebp 489 mov edx, ebp 490 shl eax,ZSHIFT 491 sar edx,32-ZSHIFT 492 idiv ecx ; eax = (v/z) 493 494 mov ecx, [U0] ; ecx = U0 until pop's 495 mov edi, [V0] ; edi = V0 until pop's 496 497 shl eax, 16-ZSHIFT 498 mov ebp, eax ; ebp = V1 until pop's 499 500; Make ESI = V0:U0 in 6:10,6:10 format 501 mov eax, ecx 502 shr eax, 6 503 mov esi, edi 504 shl esi, 10 505 mov si, ax 506 507; Make EDX = DV:DU in 6:10,6:10 format 508 mov eax, ebx 509 sub eax, ecx 510 sar eax, NBITS+6 511 mov edx, ebp 512 sub edx, edi 513 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac 514 mov dx, ax ; put delta u in low word 515 516; Save the U1 and V1 so we don't have to divide on the next iteration 517 mov [U0], ebx 518 mov [V0], ebp 519 520 pop edi ; Restore EDI before using it 521 522; LIGHTING CODE 523 mov ebx, [_fx_l] 524 mov ebp, [_fx_dl_dx] 525 526 test dword [_Transparency_on],-1 527 je near no_trans1 528 529%macro repproc1 0 530 mov eax, esi ; get u,v 531 shr eax, 26 ; shift out all but int(v) 532 shld ax,si,6 ; shift in u, shifting up v 533 add esi, edx ; inc u,v 534 add eax, [_pixptr] 535 movzx eax, byte [eax] ; get pixel from source bitmap 536 cmp al,255 537 je %%skipa1 538 mov ah, bh ; form lighting table lookup value 539 add ebx, ebp ; update lighting value 540 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer 541 mov [edi],al 542%%skipa1: 543 inc edi 544 545; Do odd pixel 546 mov eax, esi ; get u,v 547 shr eax, 26 ; shift out all but int(v) 548 shld ax,si,6 ; shift in u, shifting up v 549 add esi, edx ; inc u,v 550 add eax,[_pixptr] 551 movzx eax, byte [eax] ; get pixel from source bitmap 552 cmp al,255 553 je %%skipa2 554 mov ah, bh ; form lighting table lookup value 555 add ebx, ebp ; update lighting value 556 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer 557 mov [edi],al 558%%skipa2: 559 inc edi 560%endmacro 561 562 563%rep (2 << (NBITS-2)) 564; local skip3,no_trans1 565; local skipa1,skipa2 566 repproc1 567%endrep 568 569jmp cont1 570 571; ------------------------------------------------------- 572no_trans1: 573 574%macro repproc2 0 575 mov eax, esi ; get u,v 576 shr eax, 26 ; shift out all but int(v) 577 shld ax,si,6 ; shift in u, shifting up v 578 add esi, edx ; inc u,v 579 add eax,[_pixptr] 580 movzx eax, byte [eax] ; get pixel from source bitmap 581 mov ah, bh ; form lighting table lookup value 582 add ebx, ebp ; update lighting value 583 mov cl, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer 584 585; Do odd pixel 586 mov eax, esi ; get u,v 587 shr eax, 26 ; shift out all but int(v) 588 shld ax,si,6 ; shift in u, shifting up v 589 add esi, edx ; inc u,v 590 add eax,[_pixptr] 591 movzx eax, byte [eax] ; get pixel from source bitmap 592 mov ah, bh ; form lighting table lookup value 593 add ebx, ebp ; update lighting value 594 mov ch, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer 595 596; ----- This is about 1% faster than the above, and could probably be optimized more. 597; ----- Problem is, it gets the u,v coordinates backwards. What you would need to do 598; ----- is switch the packing of the u,v coordinates above (about 95 lines up). 599;----------; mov eax, esi 600;----------; shr ax, 10 601;----------; rol eax, 6 602;----------; mov dx, ax 603;----------; add esi, mem_edx 604;----------; mov dl, es:[edx] 605;----------; mov dh, bh 606;----------; add ebx, ebp 607;----------; mov cl, _gr_fade_table[edx] 608;----------; 609;----------; mov eax, esi 610;----------; shr ax, 10 611;----------; rol eax, 6 612;----------; mov dx, ax 613;----------; add esi, mem_edx 614;----------; mov dl, es:[edx] 615;----------; mov dh, bh 616;----------; add ebx, ebp 617;----------; mov ch, _gr_fade_table[edx] 618 619 ror ecx, 16 ; move to next double dest pixel position 620%endmacro 621 622%rep (1 << (NBITS-2)) 623 624 repproc2 625 repproc2 626 627 mov [edi],ecx ; Draw 4 pixels to display 628 add edi,4 629%endrep 630;; pop edx 631cont1: 632 633; ------------------------------------------------------- 634 635; LIGHTING CODE 636 mov [_fx_l], ebx 637 pop ebp 638 pop ecx 639 pop ebx 640 dec dword [_loop_count] 641 jnz near TopOfLoop4 642 643EndOfLoop4: 644 test dword [num_left_over], -1 645 je near _none_to_do 646 647; ----------------------------------------- Start of LeftOver Pixels ------------------------------------------ 648DoEndPixels: 649 push ecx 650 651 mov eax, ecx 652 lea eax, [eax*2+eax] 653 654 add ecx, [DZ1] 655 js notokhere 656 shl ecx,2 657 cmp eax, ecx 658 pop ecx 659 jl okhere 660 jmp bah_bah 661notokhere: 662 pop ecx 663bah_bah: 664 test dword [_new_end],-1 665 jne near NewDoEndPixels 666okhere: 667 668 add ebx, [DU1] 669 add ebp, [DV1] 670 add ecx, [DZ1] 671 je near _div_0_abort 672 jns dep_cont 673 674; z went negative. 675; this can happen because we added DZ1 to the current z, but dz1 represents dz for perhaps 16 pixels 676; though we might only plot one more pixel. 677 mov cl, 1 678 679dep_loop: mov eax, [DU1] 680 sar eax, cl 681 sub ebx, eax 682 683 mov eax, [DV1] 684 sar eax, cl 685 sub ebp, eax 686 687 mov eax, [DZ1] 688 sar eax, cl 689 sub ecx, eax 690 je near _div_0_abort 691 jns dep_cont 692 693 inc cl 694 cmp cl, NBITS 695 jne dep_loop 696 697dep_cont: 698 push edi ; use edi as a temporary variable 699 700 cmp ecx,1 << (ZSHIFT+1) 701 jg ecx_ok 702 mov ecx, 1 << (ZSHIFT+1) 703ecx_ok: 704 705; Find fixed U1 706 mov eax, ebx 707 ;PDIV 708 mov edx,eax 709 shl eax,ZSHIFT 710 sar edx,32-ZSHIFT 711 idiv ecx ; eax = (v/z) 712 shl eax, 16-ZSHIFT 713 714 mov ebx, eax ; ebx = U1 until pop's 715 716; Find fixed V1 717 mov eax, ebp 718 ;PDIV 719 mov edx,eax 720 shl eax,ZSHIFT 721 sar edx,32-ZSHIFT 722 idiv ecx ; eax = (v/z) 723 shl eax, 16-ZSHIFT 724 725 mov ebp, eax ; ebp = V1 until pop's 726 727 mov ecx, [U0] ; ecx = U0 until pop's 728 mov edi, [V0] ; edi = V0 until pop's 729 730; Make ESI = V0:U0 in 6:10,6:10 format 731 mov eax, ecx 732 shr eax, 6 733 mov esi, edi 734 shl esi, 10 735 mov si, ax 736 737; Make EDX = DV:DU in 6:10,6:10 format 738 mov eax, ebx 739 sub eax, ecx 740 sar eax, NBITS+6 741 mov edx, ebp 742 sub edx, edi 743 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac 744 mov dx, ax ; put delta u in low word 745 746 pop edi ; Restore EDI before using it 747 748 mov ecx, [num_left_over] 749 750; LIGHTING CODE 751 mov ebx, [_fx_l] 752 mov ebp, [_fx_dl_dx] 753 754 ITERATION equ 0 755 756%macro repproc3 0 757; Do even pixel 758 mov eax, esi ; get u,v 759 shr eax, 26 ; shift out all but int(v) 760 shld ax,si,6 ; shift in u, shifting up v 761 add eax,[_pixptr] 762 movzx eax, byte [eax] ; get pixel from source bitmap 763 add esi, edx ; inc u,v 764 mov ah, bh ; form lighting table lookup value 765 add ebx, ebp ; update lighting value 766 cmp al,255 767 je %%skip4 768 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer 769 mov [edi+ITERATION], al ; write pixel 770%%skip4: dec ecx 771 jz near _none_to_do 772 773; Do odd pixel 774 mov eax, esi ; get u,v 775 shr eax, 26 ; shift out all but int(v) 776 shld ax,si,6 ; shift in u, shifting up v 777 add eax,[_pixptr] 778 movzx eax, byte [eax] ; get pixel from source bitmap 779 add esi, edx ; inc u,v 780 mov ah, bh ; form lighting table lookup value 781 add ebx, [_fx_dl_dx] ; update lighting value 782 cmp al,255 783 je %%skip5 784 mov al, [_gr_fade_table+eax] ; xlat thru lighting table into dest buffer 785 mov [edi+ITERATION+1], al ; write pixel 786%%skip5: dec ecx 787 jz near _none_to_do 788%endmacro 789 790%rep (1 << (NBITS-1)) 791 ;local skip4, skip5 792 repproc3 793%assign ITERATION ITERATION + 2 794 795%endrep 796 797; Should never get here!!!! 798 int 3 799 jmp _none_to_do 800 801; ----------------------------------------- End of LeftOver Pixels ------------------------------------------ 802 803; --BUGGY NEW--NewDoEndPixels: 804; --BUGGY NEW-- mov eax, num_left_over 805; --BUGGY NEW-- and num_left_over, 3 806; --BUGGY NEW-- shr eax, 2 807; --BUGGY NEW-- je NDEP_1 808; --BUGGY NEW-- mov _loop_count, eax 809; --BUGGY NEW-- 810; --BUGGY NEW--; do 4 pixels per hunk, not 16, so div deltas by 4 (16/4=4) 811; --BUGGY NEW-- shr DU1,2 812; --BUGGY NEW-- shr DV1,2 813; --BUGGY NEW-- shr DZ1,2 814; --BUGGY NEW-- 815; --BUGGY NEW--NDEP_TopOfLoop4: 816; --BUGGY NEW-- add ebx, DU1 817; --BUGGY NEW-- add ebp, DV1 818; --BUGGY NEW-- add ecx, DZ1 819; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort 820; --BUGGY NEW-- 821; --BUGGY NEW--; Done with ebx, ebp, ecx until next iteration 822; --BUGGY NEW-- push ebx 823; --BUGGY NEW-- push ecx 824; --BUGGY NEW-- push ebp 825; --BUGGY NEW-- push edi 826; --BUGGY NEW-- 827; --BUGGY NEW--; Find fixed U1 828; --BUGGY NEW-- mov eax, ebx 829; --BUGGY NEW-- mov edx,ebx 830; --BUGGY NEW-- shl eax,(ZSHIFT-2) 831; --BUGGY NEW-- sar edx,32-(ZSHIFT-2) 832; --BUGGY NEW-- idiv ecx ; eax = (v/z) 833; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2) 834; --BUGGY NEW-- mov ebx, eax ; ebx = U1 until pop's 835; --BUGGY NEW-- 836; --BUGGY NEW--; Find fixed V1 837; --BUGGY NEW-- mov eax, ebp 838; --BUGGY NEW-- mov edx, ebp 839; --BUGGY NEW-- shl eax,(ZSHIFT-2) 840; --BUGGY NEW-- sar edx,32-(ZSHIFT-2) 841; --BUGGY NEW-- idiv ecx ; eax = (v/z) 842; --BUGGY NEW-- 843; --BUGGY NEW-- mov ecx, U0 ; ecx = U0 until pop's 844; --BUGGY NEW-- mov edi, V0 ; edi = V0 until pop's 845; --BUGGY NEW-- 846; --BUGGY NEW-- shl eax, 16-(ZSHIFT-2) 847; --BUGGY NEW-- mov ebp, eax ; ebp = V1 until pop's 848; --BUGGY NEW-- 849; --BUGGY NEW--; Make ESI = V0:U0 in 6:10,6:10 format 850; --BUGGY NEW-- mov eax, ecx 851; --BUGGY NEW-- shr eax, 6 852; --BUGGY NEW-- mov esi, edi 853; --BUGGY NEW-- shl esi, 10 854; --BUGGY NEW-- mov si, ax 855; --BUGGY NEW-- 856; --BUGGY NEW--; Make EDX = DV:DU in 6:10,6:10 format 857; --BUGGY NEW-- mov eax, ebx 858; --BUGGY NEW-- sub eax, ecx 859; --BUGGY NEW-- sar eax, (NBITS-2)+6 860; --BUGGY NEW-- mov edx, ebp 861; --BUGGY NEW-- sub edx, edi 862; --BUGGY NEW-- shl edx, 10-(NBITS-2) ; EDX = V1-V0/ 4 in 6:10 int:frac 863; --BUGGY NEW-- mov dx, ax ; put delta u in low word 864; --BUGGY NEW-- 865; --BUGGY NEW--; Save the U1 and V1 so we don't have to divide on the next iteration 866; --BUGGY NEW-- mov U0, ebx 867; --BUGGY NEW-- mov V0, ebp 868; --BUGGY NEW-- 869; --BUGGY NEW-- pop edi ; Restore EDI before using it 870; --BUGGY NEW-- 871; --BUGGY NEW--; LIGHTING CODE 872; --BUGGY NEW-- mov ebx, _fx_l 873; --BUGGY NEW-- mov ebp, _fx_dl_dx 874; --BUGGY NEW-- 875; --BUGGY NEW--;** test _Transparency_on,-1 876; --BUGGY NEW--;** je NDEP_no_trans1 877; --BUGGY NEW-- 878; --BUGGY NEW-- REPT 2 879; --BUGGY NEW-- local NDEP_skipa1, NDEP_skipa2 880; --BUGGY NEW-- 881; --BUGGY NEW-- mov eax, esi ; get u,v 882; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v) 883; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v 884; --BUGGY NEW-- add esi, edx ; inc u,v 885; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap 886; --BUGGY NEW-- cmp al,255 887; --BUGGY NEW-- je NDEP_skipa1 888; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value 889; --BUGGY NEW-- add ebx, ebp ; update lighting value 890; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer 891; --BUGGY NEW-- mov [edi],al 892; --BUGGY NEW--NDEP_skipa1: 893; --BUGGY NEW-- inc edi 894; --BUGGY NEW-- 895; --BUGGY NEW--; Do odd pixel 896; --BUGGY NEW-- mov eax, esi ; get u,v 897; --BUGGY NEW-- shr eax, 26 ; shift out all but int(v) 898; --BUGGY NEW-- shld ax,si,6 ; shift in u, shifting up v 899; --BUGGY NEW-- add esi, edx ; inc u,v 900; --BUGGY NEW-- mov al, es:[eax] ; get pixel from source bitmap 901; --BUGGY NEW-- cmp al,255 902; --BUGGY NEW-- je NDEP_skipa2 903; --BUGGY NEW-- mov ah, bh ; form lighting table lookup value 904; --BUGGY NEW-- add ebx, ebp ; update lighting value 905; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat thru lighting table into dest buffer 906; --BUGGY NEW-- mov [edi],al 907; --BUGGY NEW--NDEP_skipa2: 908; --BUGGY NEW-- inc edi 909; --BUGGY NEW-- 910; --BUGGY NEW-- ENDM 911; --BUGGY NEW-- 912; --BUGGY NEW-- mov _fx_l, ebx 913; --BUGGY NEW-- pop ebp 914; --BUGGY NEW-- pop ecx 915; --BUGGY NEW-- pop ebx 916; --BUGGY NEW-- dec _loop_count 917; --BUGGY NEW-- jnz NDEP_TopOfLoop4 918; --BUGGY NEW-- 919; --BUGGY NEW-- test num_left_over, -1 920; --BUGGY NEW-- je _none_to_do 921; --BUGGY NEW-- 922; --BUGGY NEW--NDEP_1: 923; --BUGGY NEW-- mov esi,ebx 924; --BUGGY NEW-- 925; --BUGGY NEW-- align 4 926; --BUGGY NEW--NDEP_loop: 927; --BUGGY NEW-- 928; --BUGGY NEW--; compute v coordinate 929; --BUGGY NEW-- mov eax, ebp ; get v 930; --BUGGY NEW-- mov edx, eax 931; --BUGGY NEW-- sar edx, 31 932; --BUGGY NEW-- idiv ecx ; eax = (v/z) 933; --BUGGY NEW-- 934; --BUGGY NEW-- and eax,3fh ; mask with height-1 935; --BUGGY NEW-- mov ebx,eax 936; --BUGGY NEW-- 937; --BUGGY NEW--; compute u coordinate 938; --BUGGY NEW-- mov eax, esi ; get u 939; --BUGGY NEW-- mov edx, eax 940; --BUGGY NEW-- sar edx, 31 941; --BUGGY NEW-- idiv ecx ; eax = (u/z) 942; --BUGGY NEW-- 943; --BUGGY NEW-- shl eax,26 944; --BUGGY NEW-- shld ebx,eax,6 ; esi = v*64+u 945; --BUGGY NEW-- 946; --BUGGY NEW--; read 1 pixel 947; --BUGGY NEW-- xor eax, eax 948; --BUGGY NEW-- mov al, es:[ebx] ; get pixel from source bitmap 949; --BUGGY NEW-- 950; --BUGGY NEW--; lighting code 951; --BUGGY NEW-- mov ebx, _fx_l ; get temp copy of lighting value 952; --BUGGY NEW-- mov ah, bh ; get lighting level 953; --BUGGY NEW-- add ebx, _fx_dl_dx ; update lighting value 954; --BUGGY NEW-- mov _fx_l, ebx ; save temp copy of lighting value 955; --BUGGY NEW-- 956; --BUGGY NEW--; transparency check 957; --BUGGY NEW-- cmp al,255 958; --BUGGY NEW-- je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it) 959; --BUGGY NEW-- 960; --BUGGY NEW-- mov al, _gr_fade_table[eax] ; xlat pixel thru lighting tables 961; --BUGGY NEW-- 962; --BUGGY NEW--; write 1 pixel 963; --BUGGY NEW-- mov [edi],al 964; --BUGGY NEW--NDEP_skip2: inc edi 965; --BUGGY NEW-- 966; --BUGGY NEW--; update deltas 967; --BUGGY NEW-- add ebp,_fx_dv_dx 968; --BUGGY NEW-- add esi,_fx_du_dx 969; --BUGGY NEW-- add ecx,_fx_dz_dx 970; --BUGGY NEW-- je _div_0_abort ; would be dividing by 0, so abort 971; --BUGGY NEW-- 972; --BUGGY NEW-- dec num_left_over 973; --BUGGY NEW-- jne NDEP_loop 974; --BUGGY NEW-- 975; --BUGGY NEW-- jmp _none_to_do 976 977NewDoEndPixels: 978 mov esi,ebx 979 980 align 4 981NDEP_loop: 982 983; compute v coordinate 984 mov eax, ebp ; get v 985 mov edx, eax 986 sar edx, 31 987 idiv ecx ; eax = (v/z) 988 989 and eax,3fh ; mask with height-1 990 mov ebx,eax 991 992; compute u coordinate 993 mov eax, esi ; get u 994 mov edx, eax 995 sar edx, 31 996 idiv ecx ; eax = (u/z) 997 998 shl eax,26 999 shld ebx,eax,6 ; esi = v*64+u 1000 1001; read 1 pixel 1002 add ebx,[_pixptr] 1003 xor eax, eax 1004 mov al, [ebx] ; get pixel from source bitmap 1005 1006; lighting code 1007 mov ebx, [_fx_l] ; get temp copy of lighting value 1008 mov ah, bh ; get lighting level 1009 add ebx, [_fx_dl_dx] ; update lighting value 1010 mov [_fx_l], ebx ; save temp copy of lighting value 1011 1012; transparency check 1013 cmp al,255 1014 je NDEP_skip2 ; this pixel is transparent, so don't write it (or light it) 1015 1016 mov al, [_gr_fade_table+eax] ; xlat pixel thru lighting tables 1017 1018; write 1 pixel 1019 mov [edi],al 1020NDEP_skip2: inc edi 1021 1022; update deltas 1023 add ebp,[_fx_dv_dx] 1024 add esi,[_fx_du_dx] 1025 add ecx,[_fx_dz_dx] 1026 je near _div_0_abort ; would be dividing by 0, so abort 1027 1028 dec dword [num_left_over] 1029 jne NDEP_loop 1030 1031 jmp _none_to_do 1032 1033; ==================================================== No Lighting Code ====================================================== 1034global _tmap_loop_fast_nolight 1035_tmap_loop_fast_nolight: 1036 mov esi,ebx 1037 1038 align 4 1039NotDwordAligned1_nolight: 1040 test edi, 11b 1041 jz DwordAligned1_nolight 1042 1043; compute v coordinate 1044 mov eax,ebp ; get v 1045 mov edx, eax 1046 sar edx, 31 1047 idiv ecx ; eax = (v/z) 1048 1049 and eax,3fh ; mask with height-1 1050 mov ebx,eax 1051 1052; compute u coordinate 1053 mov eax, esi ; get u 1054 mov edx, eax 1055 sar edx, 31 1056 idiv ecx ; eax = (u/z) 1057 1058 shl eax,26 1059 shld ebx,eax,6 ; esi = v*64+u 1060 1061; read 1 pixel 1062 add ebx,[_pixptr] 1063 mov al,[ebx] ; get pixel from source bitmap 1064 1065; write 1 pixel 1066 cmp al,255 1067 je skip6 1068 mov [edi],al 1069skip6: inc edi 1070 1071; update deltas 1072 add ebp,[_fx_dv_dx] 1073 add esi,[_fx_du_dx] 1074 add ecx,[_fx_dz_dx] 1075 je near _div_0_abort ; would be dividing by 0, so abort 1076 1077 dec dword [_loop_count] 1078 jns NotDwordAligned1_nolight 1079 jmp _none_to_do 1080 1081DwordAligned1_nolight: 1082 mov ebx,esi 1083 1084 mov eax, [_loop_count] 1085 inc eax 1086 mov [num_left_over], eax 1087 shr eax, NBITS 1088 1089 test eax, -1 1090 je near tmap_loop ; no 2^NBITS chunks, do divide/pixel for whole scanline 1091 1092 mov [_loop_count], eax ; _loop_count = pixels / NPIXS 1093 shl eax, NBITS 1094 sub [num_left_over], eax ; num_left_over = obvious 1095 1096; compute initial v coordinate 1097 mov eax,ebp ; get v 1098 ;PDIV 1099 mov edx,eax 1100 shl eax,ZSHIFT 1101 sar edx,32-ZSHIFT 1102 idiv ecx ; eax = (v/z) 1103 shl eax, 16-ZSHIFT 1104 1105 mov [V0], eax 1106 1107; compute initial u coordinate 1108 mov eax,ebx ; get u 1109 ;PDIV 1110 mov edx,eax 1111 shl eax,ZSHIFT 1112 sar edx,32-ZSHIFT 1113 idiv ecx ; eax = (v/z) 1114 shl eax, 16-ZSHIFT 1115 1116 mov [U0], eax 1117 1118; Set deltas to NPIXS pixel increments 1119 mov eax, [_fx_du_dx] 1120 shl eax, NBITS 1121 mov [DU1], eax 1122 mov eax, [_fx_dv_dx] 1123 shl eax, NBITS 1124 mov [DV1], eax 1125 mov eax, [_fx_dz_dx] 1126 shl eax, NBITS 1127 mov [DZ1], eax 1128 1129 align 4 1130TopOfLoop4_nolight: 1131 add ebx, [DU1] 1132 add ebp, [DV1] 1133 add ecx, [DZ1] 1134 je near _div_0_abort 1135 1136; Done with ebx, ebp, ecx until next iteration 1137 push ebx 1138 push ecx 1139 push ebp 1140 push edi 1141 1142; Find fixed U1 1143 mov eax, ebx 1144 ;PDIV 1145 mov edx,eax 1146 shl eax,ZSHIFT 1147 sar edx,32-ZSHIFT 1148 idiv ecx ; eax = (v/z) 1149 shl eax, 16-ZSHIFT 1150 1151 mov ebx, eax ; ebx = U1 until pop's 1152 1153; Find fixed V1 1154 mov eax, ebp 1155 ;PDIV 1156 mov edx,eax 1157 shl eax,ZSHIFT 1158 sar edx,32-ZSHIFT 1159 idiv ecx ; eax = (v/z) 1160 shl eax, 16-ZSHIFT 1161 1162 mov ebp, eax ; ebp = V1 until pop's 1163 1164 mov ecx, [U0] ; ecx = U0 until pop's 1165 mov edi, [V0] ; edi = V0 until pop's 1166 1167; Make ESI = V0:U0 in 6:10,6:10 format 1168 mov eax, ecx 1169 shr eax, 6 1170 mov esi, edi 1171 shl esi, 10 1172 mov si, ax 1173 1174; Make EDX = DV:DU in 6:10,6:10 format 1175 mov eax, ebx 1176 sub eax, ecx 1177 sar eax, NBITS+6 1178 mov edx, ebp 1179 sub edx, edi 1180 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac 1181 mov dx, ax ; put delta u in low word 1182 1183; Save the U1 and V1 so we don't have to divide on the next iteration 1184 mov [U0], ebx 1185 mov [V0], ebp 1186 1187 pop edi ; Restore EDI before using it 1188 1189%macro repproc4 0 1190; Do 1 pixel 1191 mov eax, esi ; get u,v 1192 shr eax, 26 ; shift out all but int(v) 1193 shld ax,si,6 ; shift in u, shifting up v 1194 add esi, edx ; inc u,v 1195 add eax,[_pixptr] 1196 mov cl, [eax] ; load into buffer register 1197 1198 mov eax, esi ; get u,v 1199 shr eax, 26 ; shift out all but int(v) 1200 shld ax,si,6 ; shift in u, shifting up v 1201 add eax,[_pixptr] 1202 mov ch, [eax] ; load into buffer register 1203 add esi, edx ; inc u,v 1204 ror ecx, 16 ; move to next dest pixel 1205 1206 mov eax, esi ; get u,v 1207 shr eax, 26 ; shift out all but int(v) 1208 shld ax,si,6 ; shift in u, shifting up v 1209 add eax,[_pixptr] 1210 mov cl, [eax] ; load into buffer register 1211 add esi, edx ; inc u,v 1212 1213 mov eax, esi ; get u,v 1214 shr eax, 26 ; shift out all but int(v) 1215 shld ax,si,6 ; shift in u, shifting up v 1216 add eax,[_pixptr] 1217 mov ch, [eax] ; load into buffer register 1218 add esi, edx ; inc u,v 1219 ror ecx, 16 ;-- can get rid of this, just write in different order below -- ; move to next dest pixel 1220 1221 test dword [_Transparency_on],-1 1222 je %%no_trans2 1223 cmp ecx,-1 1224 je %%skip7 1225 1226 cmp cl,255 1227 je %%skip1q 1228 mov [edi],cl 1229%%skip1q: 1230 1231 cmp ch,255 1232 je %%skip2q 1233 mov [edi+1],ch 1234%%skip2q: 1235 ror ecx,16 1236 1237 cmp cl,255 1238 je %%skip3q 1239 mov [edi+2],cl 1240%%skip3q: 1241 1242 1243 cmp ch,255 1244 je %%skip4q 1245 mov [edi+3],ch 1246%%skip4q: 1247 1248 jmp %%skip7 1249%%no_trans2: 1250 mov [edi],ecx ; Draw 4 pixels to display 1251%%skip7: add edi,4 1252%endmacro 1253 1254%rep (1 << (NBITS-2)) 1255 ;local skip7, no_trans2, skip1q, skip2q, skip3q, skip4q 1256 repproc4 1257 1258%endrep 1259 1260 pop ebp 1261 pop ecx 1262 pop ebx 1263 dec dword [_loop_count] 1264 jnz near TopOfLoop4_nolight 1265 1266EndOfLoop4_nolight: 1267 1268 test dword [num_left_over], -1 1269 je near _none_to_do 1270 1271DoEndPixels_nolight: 1272 add ebx, [DU1] 1273 add ebp, [DV1] 1274 add ecx, [DZ1] 1275 je near _div_0_abort 1276 push edi ; use edi as a temporary variable 1277 1278; Find fixed U1 1279 mov eax, ebx 1280 mov edx,eax 1281 shl eax,ZSHIFT 1282 sar edx,32-ZSHIFT 1283 idiv ecx ; eax = (v/z) 1284 shl eax, 16-ZSHIFT 1285 mov ebx, eax ; ebx = U1 until pop's 1286 1287; Find fixed V1 1288 mov eax, ebp 1289 mov edx,eax 1290 shl eax,ZSHIFT 1291 sar edx,32-ZSHIFT 1292 idiv ecx ; eax = (v/z) 1293 shl eax, 16-ZSHIFT 1294 mov ebp, eax ; ebp = V1 until pop's 1295 1296 mov ecx, [U0] ; ecx = U0 until pop's 1297 mov edi, [V0] ; edi = V0 until pop's 1298 1299; Make ESI = V0:U0 in 6:10,6:10 format 1300 mov eax, ecx 1301 shr eax, 6 1302 mov esi, edi 1303 shl esi, 10 1304 mov si, ax 1305 1306; Make EDX = DV:DU in 6:10,6:10 format 1307 mov eax, ebx 1308 sub eax, ecx 1309 sar eax, NBITS+6 1310 mov edx, ebp 1311 sub edx, edi 1312 shl edx, 10-NBITS ; EDX = V1-V0/ 4 in 6:10 int:frac 1313 mov dx, ax ; put delta u in low word 1314 1315 pop edi ; Restore EDI before using it 1316 1317 mov ecx, [num_left_over] 1318 1319%assign ITERATION 0 1320%macro repproc5 0 1321; Do 1 pixel 1322 mov eax, esi ; get u,v 1323 shr eax, 26 ; shift out all but int(v) 1324 shld ax,si,6 ; shift in u, shifting up v 1325 add eax,[_pixptr] 1326 movzx eax, byte [eax] ; load into buffer register 1327 add esi, edx ; inc u,v 1328 cmp al,255 1329 je %%skip8 1330 mov [edi+ITERATION], al ; write pixel 1331%%skip8: dec ecx 1332 jz near _none_to_do 1333%endmacro 1334 1335%rep (1 << NBITS) 1336 ;local skip8 1337 repproc5 1338%assign ITERATION ITERATION + 1 1339%endrep 1340 1341; Should never get here!!!!! 1342 int 3 1343 jmp _none_to_do 1344 1345