1// 2// d_spr8.s 3// x86 assembly-language horizontal 8-bpp transparent span-drawing code. 4// 5 6#include "qasm.h" 7 8#if id386 9 10//---------------------------------------------------------------------- 11// 8-bpp horizontal span drawing code for polygons, with transparency. 12//---------------------------------------------------------------------- 13 14 .text 15 16// out-of-line, rarely-needed clamping code 17 18LClampHigh0: 19 movl C(bbextents),%esi 20 jmp LClampReentry0 21LClampHighOrLow0: 22 jg LClampHigh0 23 xorl %esi,%esi 24 jmp LClampReentry0 25 26LClampHigh1: 27 movl C(bbextentt),%edx 28 jmp LClampReentry1 29LClampHighOrLow1: 30 jg LClampHigh1 31 xorl %edx,%edx 32 jmp LClampReentry1 33 34LClampLow2: 35 movl $2048,%ebp 36 jmp LClampReentry2 37LClampHigh2: 38 movl C(bbextents),%ebp 39 jmp LClampReentry2 40 41LClampLow3: 42 movl $2048,%ecx 43 jmp LClampReentry3 44LClampHigh3: 45 movl C(bbextentt),%ecx 46 jmp LClampReentry3 47 48LClampLow4: 49 movl $2048,%eax 50 jmp LClampReentry4 51LClampHigh4: 52 movl C(bbextents),%eax 53 jmp LClampReentry4 54 55LClampLow5: 56 movl $2048,%ebx 57 jmp LClampReentry5 58LClampHigh5: 59 movl C(bbextentt),%ebx 60 jmp LClampReentry5 61 62 63#define pspans 4+16 64 65 .align 4 66.globl C(D_SpriteDrawSpans) 67C(D_SpriteDrawSpans): 68 pushl %ebp // preserve caller's stack frame 69 pushl %edi 70 pushl %esi // preserve register variables 71 pushl %ebx 72 73// 74// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock 75// and span list pointers, and 1/z step in 0.32 fixed-point 76// 77// FIXME: any overlap from rearranging? 78 flds C(d_sdivzstepu) 79 fmuls fp_8 80 movl C(cacheblock),%edx 81 flds C(d_tdivzstepu) 82 fmuls fp_8 83 movl pspans(%esp),%ebx // point to the first span descriptor 84 flds C(d_zistepu) 85 fmuls fp_8 86 movl %edx,pbase // pbase = cacheblock 87 flds C(d_zistepu) 88 fmuls fp_64kx64k 89 fxch %st(3) 90 fstps sdivz8stepu 91 fstps zi8stepu 92 fstps tdivz8stepu 93 fistpl izistep 94 movl izistep,%eax 95 rorl $16,%eax // put upper 16 bits in low word 96 movl sspan_t_count(%ebx),%ecx 97 movl %eax,izistep 98 99 cmpl $0,%ecx 100 jle LNextSpan 101 102LSpanLoop: 103 104// 105// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the 106// initial s and t values 107// 108// FIXME: pipeline FILD? 109 fildl sspan_t_v(%ebx) 110 fildl sspan_t_u(%ebx) 111 112 fld %st(1) // dv | du | dv 113 fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv 114 fld %st(1) // du | dv*d_sdivzstepv | du | dv 115 fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 116 fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 117 fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | 118 // dv*d_sdivzstepv | du | dv 119 fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | 120 // dv*d_sdivzstepv | du | dv 121 faddp %st(0),%st(2) // du*d_tdivzstepu | 122 // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv 123 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | 124 // du*d_tdivzstepu | du | dv 125 fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | 126 // du*d_tdivzstepu | du | dv 127 fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | 128 // du*d_sdivzstepu + dv*d_sdivzstepv | 129 // du*d_tdivzstepu | du | dv 130 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | 131 // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv 132 fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + 133 // du*d_sdivzstepu; stays in %st(2) at end 134 fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | 135 // s/z 136 fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | 137 // du*d_tdivzstepu | du | s/z 138 fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | 139 // du*d_tdivzstepu | du | s/z 140 faddp %st(0),%st(2) // dv*d_zistepv | 141 // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z 142 fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | 143 // dv*d_zistepv | s/z 144 fmuls C(d_zistepu) // du*d_zistepu | 145 // dv*d_tdivzstepv + du*d_tdivzstepu | 146 // dv*d_zistepv | s/z 147 fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | 148 // du*d_zistepu | dv*d_zistepv | s/z 149 fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + 150 // du*d_tdivzstepu; stays in %st(1) at end 151 fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z 152 faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z 153 154 flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z 155 fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z 156 fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + 157 // du*d_zistepu; stays in %st(0) at end 158 // 1/z | fp_64k | t/z | s/z 159 160 fld %st(0) // FIXME: get rid of stall on FMUL? 161 fmuls fp_64kx64k 162 fxch %st(1) 163 164// 165// calculate and clamp s & t 166// 167 fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z 168 fxch %st(1) 169 170 fistpl izi // 0.32 fixed-point 1/z 171 movl izi,%ebp 172 173// 174// set pz to point to the first z-buffer pixel in the span 175// 176 rorl $16,%ebp // put upper 16 bits in low word 177 movl sspan_t_v(%ebx),%eax 178 movl %ebp,izi 179 movl sspan_t_u(%ebx),%ebp 180 imull C(d_zrowbytes) 181 shll $1,%ebp // a word per pixel 182 addl C(d_pzbuffer),%eax 183 addl %ebp,%eax 184 movl %eax,pz 185 186// 187// point %edi to the first pixel in the span 188// 189 movl C(d_viewbuffer),%ebp 190 movl sspan_t_v(%ebx),%eax 191 pushl %ebx // preserve spans pointer 192 movl C(tadjust),%edx 193 movl C(sadjust),%esi 194 movl C(d_scantable)(,%eax,4),%edi // v * screenwidth 195 addl %ebp,%edi 196 movl sspan_t_u(%ebx),%ebp 197 addl %ebp,%edi // pdest = &pdestspan[scans->u]; 198 199// 200// now start the FDIV for the end of the span 201// 202 cmpl $8,%ecx 203 ja LSetupNotLast1 204 205 decl %ecx 206 jz LCleanup1 // if only one pixel, no need to start an FDIV 207 movl %ecx,spancountminus1 208 209// finish up the s and t calcs 210 fxch %st(1) // z*64k | 1/z | t/z | s/z 211 212 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 213 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 214 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 215 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 216 fxch %st(1) // s | t | 1/z | t/z | s/z 217 fistpl s // 1/z | t | t/z | s/z 218 fistpl t // 1/z | t/z | s/z 219 220 fildl spancountminus1 221 222 flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1 223 flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1 224 fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1 225 fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 226 fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 227 fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1 228 fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 | 229 // _d_tdivzstepu*scm1 230 fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 | 231 // _d_tdivzstepu*scm1 232 faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 233 fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 234 faddp %st(0),%st(3) // _d_sdivzstepu*scm1 235 faddp %st(0),%st(3) 236 237 flds fp_64k 238 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to 239 // overlap 240 jmp LFDIVInFlight1 241 242LCleanup1: 243// finish up the s and t calcs 244 fxch %st(1) // z*64k | 1/z | t/z | s/z 245 246 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 247 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 248 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 249 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 250 fxch %st(1) // s | t | 1/z | t/z | s/z 251 fistpl s // 1/z | t | t/z | s/z 252 fistpl t // 1/z | t/z | s/z 253 jmp LFDIVInFlight1 254 255 .align 4 256LSetupNotLast1: 257// finish up the s and t calcs 258 fxch %st(1) // z*64k | 1/z | t/z | s/z 259 260 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 261 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 262 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 263 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 264 fxch %st(1) // s | t | 1/z | t/z | s/z 265 fistpl s // 1/z | t | t/z | s/z 266 fistpl t // 1/z | t/z | s/z 267 268 fadds zi8stepu 269 fxch %st(2) 270 fadds sdivz8stepu 271 fxch %st(2) 272 flds tdivz8stepu 273 faddp %st(0),%st(2) 274 flds fp_64k 275 fdiv %st(1),%st(0) // z = 1/1/z 276 // this is what we've gone to all this trouble to 277 // overlap 278LFDIVInFlight1: 279 280 addl s,%esi 281 addl t,%edx 282 movl C(bbextents),%ebx 283 movl C(bbextentt),%ebp 284 cmpl %ebx,%esi 285 ja LClampHighOrLow0 286LClampReentry0: 287 movl %esi,s 288 movl pbase,%ebx 289 shll $16,%esi 290 cmpl %ebp,%edx 291 movl %esi,sfracf 292 ja LClampHighOrLow1 293LClampReentry1: 294 movl %edx,t 295 movl s,%esi // sfrac = scans->sfrac; 296 shll $16,%edx 297 movl t,%eax // tfrac = scans->tfrac; 298 sarl $16,%esi 299 movl %edx,tfracf 300 301// 302// calculate the texture starting address 303// 304 sarl $16,%eax 305 addl %ebx,%esi 306 imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth 307 addl %eax,%esi // psource = pbase + (sfrac >> 16) + 308 // ((tfrac >> 16) * cachewidth); 309 310// 311// determine whether last span or not 312// 313 cmpl $8,%ecx 314 jna LLastSegment 315 316// 317// not the last segment; do full 8-wide segment 318// 319LNotLastSegment: 320 321// 322// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 323// get there 324// 325 326// pick up after the FDIV that was left in flight previously 327 328 fld %st(0) // duplicate it 329 fmul %st(4),%st(0) // s = s/z * z 330 fxch %st(1) 331 fmul %st(3),%st(0) // t = t/z * z 332 fxch %st(1) 333 fistpl snext 334 fistpl tnext 335 movl snext,%eax 336 movl tnext,%edx 337 338 subl $8,%ecx // count off this segments' pixels 339 movl C(sadjust),%ebp 340 pushl %ecx // remember count of remaining pixels 341 movl C(tadjust),%ecx 342 343 addl %eax,%ebp 344 addl %edx,%ecx 345 346 movl C(bbextents),%eax 347 movl C(bbextentt),%edx 348 349 cmpl $2048,%ebp 350 jl LClampLow2 351 cmpl %eax,%ebp 352 ja LClampHigh2 353LClampReentry2: 354 355 cmpl $2048,%ecx 356 jl LClampLow3 357 cmpl %edx,%ecx 358 ja LClampHigh3 359LClampReentry3: 360 361 movl %ebp,snext 362 movl %ecx,tnext 363 364 subl s,%ebp 365 subl t,%ecx 366 367// 368// set up advancetable 369// 370 movl %ecx,%eax 371 movl %ebp,%edx 372 sarl $19,%edx // sstep >>= 16; 373 movl C(cachewidth),%ebx 374 sarl $19,%eax // tstep >>= 16; 375 jz LIsZero 376 imull %ebx,%eax // (tstep >> 16) * cachewidth; 377LIsZero: 378 addl %edx,%eax // add in sstep 379 // (tstep >> 16) * cachewidth + (sstep >> 16); 380 movl tfracf,%edx 381 movl %eax,advancetable+4 // advance base in t 382 addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + 383 // (sstep >> 16); 384 shll $13,%ebp // left-justify sstep fractional part 385 movl %ebp,sstep 386 movl sfracf,%ebx 387 shll $13,%ecx // left-justify tstep fractional part 388 movl %eax,advancetable // advance extra in t 389 movl %ecx,tstep 390 391 movl pz,%ecx 392 movl izi,%ebp 393 394 cmpw (%ecx),%bp 395 jl Lp1 396 movb (%esi),%al // get first source texel 397 cmpb $(TRANSPARENT_COLOR),%al 398 jz Lp1 399 movw %bp,(%ecx) 400 movb %al,(%edi) // store first dest pixel 401Lp1: 402 addl izistep,%ebp 403 adcl $0,%ebp 404 addl tstep,%edx // advance tfrac fractional part by tstep frac 405 406 sbbl %eax,%eax // turn tstep carry into -1 (0 if none) 407 addl sstep,%ebx // advance sfrac fractional part by sstep frac 408 adcl advancetable+4(,%eax,4),%esi // point to next source texel 409 410 cmpw 2(%ecx),%bp 411 jl Lp2 412 movb (%esi),%al 413 cmpb $(TRANSPARENT_COLOR),%al 414 jz Lp2 415 movw %bp,2(%ecx) 416 movb %al,1(%edi) 417Lp2: 418 addl izistep,%ebp 419 adcl $0,%ebp 420 addl tstep,%edx 421 sbbl %eax,%eax 422 addl sstep,%ebx 423 adcl advancetable+4(,%eax,4),%esi 424 425 cmpw 4(%ecx),%bp 426 jl Lp3 427 movb (%esi),%al 428 cmpb $(TRANSPARENT_COLOR),%al 429 jz Lp3 430 movw %bp,4(%ecx) 431 movb %al,2(%edi) 432Lp3: 433 addl izistep,%ebp 434 adcl $0,%ebp 435 addl tstep,%edx 436 sbbl %eax,%eax 437 addl sstep,%ebx 438 adcl advancetable+4(,%eax,4),%esi 439 440 cmpw 6(%ecx),%bp 441 jl Lp4 442 movb (%esi),%al 443 cmpb $(TRANSPARENT_COLOR),%al 444 jz Lp4 445 movw %bp,6(%ecx) 446 movb %al,3(%edi) 447Lp4: 448 addl izistep,%ebp 449 adcl $0,%ebp 450 addl tstep,%edx 451 sbbl %eax,%eax 452 addl sstep,%ebx 453 adcl advancetable+4(,%eax,4),%esi 454 455 cmpw 8(%ecx),%bp 456 jl Lp5 457 movb (%esi),%al 458 cmpb $(TRANSPARENT_COLOR),%al 459 jz Lp5 460 movw %bp,8(%ecx) 461 movb %al,4(%edi) 462Lp5: 463 addl izistep,%ebp 464 adcl $0,%ebp 465 addl tstep,%edx 466 sbbl %eax,%eax 467 addl sstep,%ebx 468 adcl advancetable+4(,%eax,4),%esi 469 470// 471// start FDIV for end of next segment in flight, so it can overlap 472// 473 popl %eax 474 cmpl $8,%eax // more than one segment after this? 475 ja LSetupNotLast2 // yes 476 477 decl %eax 478 jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV 479 movl %eax,spancountminus1 480 fildl spancountminus1 481 482 flds C(d_zistepu) // _d_zistepu | spancountminus1 483 fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1 484 flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 485 fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 486 fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1 487 faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1 488 fxch %st(1) // scm1 | _d_tdivzstepu*scm1 489 fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 490 fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 491 faddp %st(0),%st(3) // _d_sdivzstepu*scm1 492 flds fp_64k // 64k | _d_sdivzstepu*scm1 493 fxch %st(1) // _d_sdivzstepu*scm1 | 64k 494 faddp %st(0),%st(4) // 64k 495 496 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to 497 // overlap 498 jmp LFDIVInFlight2 499 500 .align 4 501LSetupNotLast2: 502 fadds zi8stepu 503 fxch %st(2) 504 fadds sdivz8stepu 505 fxch %st(2) 506 flds tdivz8stepu 507 faddp %st(0),%st(2) 508 flds fp_64k 509 fdiv %st(1),%st(0) // z = 1/1/z 510 // this is what we've gone to all this trouble to 511 // overlap 512LFDIVInFlight2: 513 pushl %eax 514 515 cmpw 10(%ecx),%bp 516 jl Lp6 517 movb (%esi),%al 518 cmpb $(TRANSPARENT_COLOR),%al 519 jz Lp6 520 movw %bp,10(%ecx) 521 movb %al,5(%edi) 522Lp6: 523 addl izistep,%ebp 524 adcl $0,%ebp 525 addl tstep,%edx 526 sbbl %eax,%eax 527 addl sstep,%ebx 528 adcl advancetable+4(,%eax,4),%esi 529 530 cmpw 12(%ecx),%bp 531 jl Lp7 532 movb (%esi),%al 533 cmpb $(TRANSPARENT_COLOR),%al 534 jz Lp7 535 movw %bp,12(%ecx) 536 movb %al,6(%edi) 537Lp7: 538 addl izistep,%ebp 539 adcl $0,%ebp 540 addl tstep,%edx 541 sbbl %eax,%eax 542 addl sstep,%ebx 543 adcl advancetable+4(,%eax,4),%esi 544 545 cmpw 14(%ecx),%bp 546 jl Lp8 547 movb (%esi),%al 548 cmpb $(TRANSPARENT_COLOR),%al 549 jz Lp8 550 movw %bp,14(%ecx) 551 movb %al,7(%edi) 552Lp8: 553 addl izistep,%ebp 554 adcl $0,%ebp 555 addl tstep,%edx 556 sbbl %eax,%eax 557 addl sstep,%ebx 558 adcl advancetable+4(,%eax,4),%esi 559 560 addl $8,%edi 561 addl $16,%ecx 562 movl %edx,tfracf 563 movl snext,%edx 564 movl %ebx,sfracf 565 movl tnext,%ebx 566 movl %edx,s 567 movl %ebx,t 568 569 movl %ecx,pz 570 movl %ebp,izi 571 572 popl %ecx // retrieve count 573 574// 575// determine whether last span or not 576// 577 cmpl $8,%ecx // are there multiple segments remaining? 578 ja LNotLastSegment // yes 579 580// 581// last segment of scan 582// 583LLastSegment: 584 585// 586// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 587// get there. The number of pixels left is variable, and we want to land on the 588// last pixel, not step one past it, so we can't run into arithmetic problems 589// 590 testl %ecx,%ecx 591 jz LNoSteps // just draw the last pixel and we're done 592 593// pick up after the FDIV that was left in flight previously 594 595 596 fld %st(0) // duplicate it 597 fmul %st(4),%st(0) // s = s/z * z 598 fxch %st(1) 599 fmul %st(3),%st(0) // t = t/z * z 600 fxch %st(1) 601 fistpl snext 602 fistpl tnext 603 604 movl C(tadjust),%ebx 605 movl C(sadjust),%eax 606 607 addl snext,%eax 608 addl tnext,%ebx 609 610 movl C(bbextents),%ebp 611 movl C(bbextentt),%edx 612 613 cmpl $2048,%eax 614 jl LClampLow4 615 cmpl %ebp,%eax 616 ja LClampHigh4 617LClampReentry4: 618 movl %eax,snext 619 620 cmpl $2048,%ebx 621 jl LClampLow5 622 cmpl %edx,%ebx 623 ja LClampHigh5 624LClampReentry5: 625 626 cmpl $1,%ecx // don't bother 627 je LOnlyOneStep // if two pixels in segment, there's only one step, 628 // of the segment length 629 subl s,%eax 630 subl t,%ebx 631 632 addl %eax,%eax // convert to 15.17 format so multiply by 1.31 633 addl %ebx,%ebx // reciprocal yields 16.48 634 imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) 635 movl %edx,%ebp 636 637 movl %ebx,%eax 638 imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) 639 640LSetEntryvec: 641// 642// set up advancetable 643// 644 movl spr8entryvec_table(,%ecx,4),%ebx 645 movl %edx,%eax 646 pushl %ebx // entry point into code for RET later 647 movl %ebp,%ecx 648 sarl $16,%ecx // sstep >>= 16; 649 movl C(cachewidth),%ebx 650 sarl $16,%edx // tstep >>= 16; 651 jz LIsZeroLast 652 imull %ebx,%edx // (tstep >> 16) * cachewidth; 653LIsZeroLast: 654 addl %ecx,%edx // add in sstep 655 // (tstep >> 16) * cachewidth + (sstep >> 16); 656 movl tfracf,%ecx 657 movl %edx,advancetable+4 // advance base in t 658 addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + 659 // (sstep >> 16); 660 shll $16,%ebp // left-justify sstep fractional part 661 movl sfracf,%ebx 662 shll $16,%eax // left-justify tstep fractional part 663 movl %edx,advancetable // advance extra in t 664 665 movl %eax,tstep 666 movl %ebp,sstep 667 movl %ecx,%edx 668 669 movl pz,%ecx 670 movl izi,%ebp 671 672 ret // jump to the number-of-pixels handler 673 674//---------------------------------------- 675 676LNoSteps: 677 movl pz,%ecx 678 subl $7,%edi // adjust for hardwired offset 679 subl $14,%ecx 680 jmp LEndSpan 681 682 683LOnlyOneStep: 684 subl s,%eax 685 subl t,%ebx 686 movl %eax,%ebp 687 movl %ebx,%edx 688 jmp LSetEntryvec 689 690//---------------------------------------- 691 692.globl Spr8Entry2_8 693Spr8Entry2_8: 694 subl $6,%edi // adjust for hardwired offsets 695 subl $12,%ecx 696 movb (%esi),%al 697 jmp LLEntry2_8 698 699//---------------------------------------- 700 701.globl Spr8Entry3_8 702Spr8Entry3_8: 703 subl $5,%edi // adjust for hardwired offsets 704 subl $10,%ecx 705 jmp LLEntry3_8 706 707//---------------------------------------- 708 709.globl Spr8Entry4_8 710Spr8Entry4_8: 711 subl $4,%edi // adjust for hardwired offsets 712 subl $8,%ecx 713 jmp LLEntry4_8 714 715//---------------------------------------- 716 717.globl Spr8Entry5_8 718Spr8Entry5_8: 719 subl $3,%edi // adjust for hardwired offsets 720 subl $6,%ecx 721 jmp LLEntry5_8 722 723//---------------------------------------- 724 725.globl Spr8Entry6_8 726Spr8Entry6_8: 727 subl $2,%edi // adjust for hardwired offsets 728 subl $4,%ecx 729 jmp LLEntry6_8 730 731//---------------------------------------- 732 733.globl Spr8Entry7_8 734Spr8Entry7_8: 735 decl %edi // adjust for hardwired offsets 736 subl $2,%ecx 737 jmp LLEntry7_8 738 739//---------------------------------------- 740 741.globl Spr8Entry8_8 742Spr8Entry8_8: 743 cmpw (%ecx),%bp 744 jl Lp9 745 movb (%esi),%al 746 cmpb $(TRANSPARENT_COLOR),%al 747 jz Lp9 748 movw %bp,(%ecx) 749 movb %al,(%edi) 750Lp9: 751 addl izistep,%ebp 752 adcl $0,%ebp 753 addl tstep,%edx 754 sbbl %eax,%eax 755 addl sstep,%ebx 756 adcl advancetable+4(,%eax,4),%esi 757LLEntry7_8: 758 cmpw 2(%ecx),%bp 759 jl Lp10 760 movb (%esi),%al 761 cmpb $(TRANSPARENT_COLOR),%al 762 jz Lp10 763 movw %bp,2(%ecx) 764 movb %al,1(%edi) 765Lp10: 766 addl izistep,%ebp 767 adcl $0,%ebp 768 addl tstep,%edx 769 sbbl %eax,%eax 770 addl sstep,%ebx 771 adcl advancetable+4(,%eax,4),%esi 772LLEntry6_8: 773 cmpw 4(%ecx),%bp 774 jl Lp11 775 movb (%esi),%al 776 cmpb $(TRANSPARENT_COLOR),%al 777 jz Lp11 778 movw %bp,4(%ecx) 779 movb %al,2(%edi) 780Lp11: 781 addl izistep,%ebp 782 adcl $0,%ebp 783 addl tstep,%edx 784 sbbl %eax,%eax 785 addl sstep,%ebx 786 adcl advancetable+4(,%eax,4),%esi 787LLEntry5_8: 788 cmpw 6(%ecx),%bp 789 jl Lp12 790 movb (%esi),%al 791 cmpb $(TRANSPARENT_COLOR),%al 792 jz Lp12 793 movw %bp,6(%ecx) 794 movb %al,3(%edi) 795Lp12: 796 addl izistep,%ebp 797 adcl $0,%ebp 798 addl tstep,%edx 799 sbbl %eax,%eax 800 addl sstep,%ebx 801 adcl advancetable+4(,%eax,4),%esi 802LLEntry4_8: 803 cmpw 8(%ecx),%bp 804 jl Lp13 805 movb (%esi),%al 806 cmpb $(TRANSPARENT_COLOR),%al 807 jz Lp13 808 movw %bp,8(%ecx) 809 movb %al,4(%edi) 810Lp13: 811 addl izistep,%ebp 812 adcl $0,%ebp 813 addl tstep,%edx 814 sbbl %eax,%eax 815 addl sstep,%ebx 816 adcl advancetable+4(,%eax,4),%esi 817LLEntry3_8: 818 cmpw 10(%ecx),%bp 819 jl Lp14 820 movb (%esi),%al 821 cmpb $(TRANSPARENT_COLOR),%al 822 jz Lp14 823 movw %bp,10(%ecx) 824 movb %al,5(%edi) 825Lp14: 826 addl izistep,%ebp 827 adcl $0,%ebp 828 addl tstep,%edx 829 sbbl %eax,%eax 830 addl sstep,%ebx 831 adcl advancetable+4(,%eax,4),%esi 832LLEntry2_8: 833 cmpw 12(%ecx),%bp 834 jl Lp15 835 movb (%esi),%al 836 cmpb $(TRANSPARENT_COLOR),%al 837 jz Lp15 838 movw %bp,12(%ecx) 839 movb %al,6(%edi) 840Lp15: 841 addl izistep,%ebp 842 adcl $0,%ebp 843 addl tstep,%edx 844 sbbl %eax,%eax 845 addl sstep,%ebx 846 adcl advancetable+4(,%eax,4),%esi 847 848LEndSpan: 849 cmpw 14(%ecx),%bp 850 jl Lp16 851 movb (%esi),%al // load first texel in segment 852 cmpb $(TRANSPARENT_COLOR),%al 853 jz Lp16 854 movw %bp,14(%ecx) 855 movb %al,7(%edi) 856Lp16: 857 858// 859// clear s/z, t/z, 1/z from FP stack 860// 861 fstp %st(0) 862 fstp %st(0) 863 fstp %st(0) 864 865 popl %ebx // restore spans pointer 866LNextSpan: 867 addl $(sspan_t_size),%ebx // point to next span 868 movl sspan_t_count(%ebx),%ecx 869 cmpl $0,%ecx // any more spans? 870 jg LSpanLoop // yes 871 jz LNextSpan // yes, but this one's empty 872 873 popl %ebx // restore register variables 874 popl %esi 875 popl %edi 876 popl %ebp // restore the caller's stack frame 877 ret 878 879#endif // id386 880