1/* $Id: asm-386.S,v 1.8 1997/12/17 00:50:51 brianp Exp $ */ 2 3/* 4 * asm-386.S - special (hopefully faster) transformation functions for x86 5 * 6 * by Josh Vanderhoof 7 * 8 * This file is in the public domain. 9 */ 10 11/* 12 * $Log: asm-386.S,v $ 13 * Revision 1.8 1997/12/17 00:50:51 brianp 14 * applied Josh's patch to fix texture coordinate transformation bugs 15 * 16 * Revision 1.7 1997/12/17 00:27:11 brianp 17 * applied Josh's patch to fix bfris 18 * 19 * Revision 1.6 1997/12/01 01:02:41 brianp 20 * added FreeBSD patches (Daniel J. O'Connor) 21 * 22 * Revision 1.5 1997/11/19 23:52:17 brianp 23 * added missing "cld" instruction in asm_transform_points4_identity() 24 * 25 * Revision 1.4 1997/11/11 02:22:41 brianp 26 * small change per Josh to ensure U/V pairing 27 * 28 * Revision 1.3 1997/11/07 03:37:24 brianp 29 * added missing line from Stephane Rehel 30 * 31 * Revision 1.2 1997/11/07 03:30:37 brianp 32 * added Josh's 11-5-97 patches 33 * 34 * Revision 1.1 1997/10/30 06:00:33 brianp 35 * Initial revision 36 */ 37 38#include <asm.inc> 39 40#define S(x) dword ptr [esi + 4*x] 41#define D(x) dword ptr [edi + 4*x] 42#define M(x, y) dword ptr [edx + 16*x + 4*y] 43 44.code 45 46/* 47 * void asm_transform_points3_general( GLuint n, GLfloat d[][4], 48 * GLfloat m[16], GLfloat s[][4] ); 49 */ 50PUBLIC _asm_transform_points3_general 51_asm_transform_points3_general: 52.align 4 53 push esi 54 push edi 55 56 mov ecx, [esp + 12] /* ecx = n */ 57 mov edi, [esp + 16] /* edi = d */ 58 mov edx, [esp + 20] /* edx = m */ 59 mov esi, [esp + 24] /* esi = s */ 60 61 test ecx, ecx 62 jz _asm_transform_points3_general_end 63 64.align 4 65_asm_transform_points3_general_loop: 66 fld S(0) 67 fmul M(0, 0) 68 fld S(0) 69 fmul M(0, 1) 70 fld S(0) 71 fmul M(0, 2) 72 fld S(0) 73 fmul M(0, 3) 74 75 fld S(1) 76 fmul M(1, 0) 77 fld S(1) 78 fmul M(1, 1) 79 fld S(1) 80 fmul M(1, 2) 81 fld S(1) 82 fmul M(1, 3) 83 84 /* 85 * The FPU stack should now look like this: 86 * 87 * st(7) = S(0) * M(0, 0) 88 * st(6) = S(0) * M(0, 1) 89 * st(5) = S(0) * M(0, 2) 90 * st(4) = S(0) * M(0, 3) 91 * st(3) = S(1) * M(1, 0) 92 * st(2) = S(1) * M(1, 1) 93 * st(1) = S(1) * M(1, 2) 94 * st(0) = S(1) * M(1, 3) 95 */ 96 97 fxch st(3) /* 3 1 2 0 4 5 6 7 */ 98 faddp st(7), st /* 1 2 0 4 5 6 7 */ 99 fxch st(1) /* 2 1 0 4 5 6 7 */ 100 faddp st(5), st /* 1 0 4 5 6 7 */ 101 faddp st(3), st /* 0 4 5 6 7 */ 102 faddp st(1), st /* 4 5 6 7 */ 103 104 /* 105 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) 106 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) 107 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) 108 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) 109 */ 110 111 fld S(2) 112 fmul M(2, 0) 113 fld S(2) 114 fmul M(2, 1) 115 fld S(2) 116 fmul M(2, 2) 117 fld S(2) 118 fmul M(2, 3) 119 120 /* 121 * st(7) = S(0) * M(0, 0) + S(1) * M(1, 0) 122 * st(6) = S(0) * M(0, 1) + S(1) * M(1, 1) 123 * st(5) = S(0) * M(0, 2) + S(1) * M(1, 2) 124 * st(4) = S(0) * M(0, 3) + S(1) * M(1, 3) 125 * st(3) = S(2) * M(2, 0) 126 * st(2) = S(2) * M(2, 1) 127 * st(1) = S(2) * M(2, 2) 128 * st(0) = S(2) * M(2, 3) 129 */ 130 131 fxch st(3) /* 3 1 2 0 4 5 6 7 */ 132 faddp st(7), st /* 1 2 0 4 5 6 7 */ 133 fxch st(1) /* 2 1 0 4 5 6 7 */ 134 faddp st(5), st /* 1 0 4 5 6 7 */ 135 faddp st(3), st /* 0 4 5 6 7 */ 136 faddp st(1), st /* 4 5 6 7 */ 137 138 /* 139 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) 140 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) 141 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) 142 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3) 143 */ 144 145 fxch st(3) /* 3 1 2 0 */ 146 fadd M(3, 0) 147 fxch st(2) /* 2 1 3 0 */ 148 fadd M(3, 1) 149 fxch st(1) /* 1 2 3 0 */ 150 fadd M(3, 2) 151 fxch st(3) /* 0 2 3 1 */ 152 fadd M(3, 3) 153 154 /* 155 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) + M(3, 2) 156 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) + M(3, 0) 157 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) + M(3, 1) 158 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3) + M(3, 3) 159 */ 160 161 fxch st(3) /* 3 1 2 0 */ 162 fstp D(2) /* 1 2 0 */ 163 fxch st(1) /* 2 1 0 */ 164 fstp D(0) /* 1 0 */ 165 lea esi, S(4) 166 fstp D(1) /* 0 */ 167 dec ecx 168 fstp D(3) /* */ 169 170 lea edi, D(4) 171 172 jnz _asm_transform_points3_general_loop 173 174_asm_transform_points3_general_end: 175 pop edi 176 pop esi 177 ret 178 179 180/* 181 * void asm_transform_points3_identity( GLuint n, GLfloat d[][4], 182 * GLfloat s[][4] ); 183 */ 184PUBLIC _asm_transform_points3_identity 185_asm_transform_points3_identity: 186.align 4 187 push esi 188 push edi 189 mov ecx, [esp + 12] /* ecx = n */ 190 mov edi, [esp + 16] /* edi = d */ 191 mov esi, [esp + 20] /* esi = s */ 192 push ebx 193 push ebp 194 195 test ecx, ecx 196 jz _asm_transform_points3_identity_end 197 198 mov ebp, HEX(3f800000) 199 200.align 4 201_asm_transform_points3_identity_loop: 202 mov eax, S(0) 203 mov edx, S(1) 204 mov ebx, S(2) 205 lea esi, S(4) 206 mov D(0), eax 207 mov D(1), edx 208 mov D(2), ebx 209 mov D(3), ebp 210 dec ecx 211 lea edi, D(4) 212 jnz _asm_transform_points3_identity_loop 213 214_asm_transform_points3_identity_end: 215 pop ebp 216 pop ebx 217 pop edi 218 pop esi 219 ret 220 221 222/* 223 * void asm_transform_points3_2d( GLuint n, GLfloat d[][4], GLfloat m[16], 224 * GLfloat s[][4] ); 225 */ 226PUBLIC _asm_transform_points3_2d 227_asm_transform_points3_2d: 228.align 4 229 push esi 230 push edi 231 mov ecx, [esp + 12] /* ecx = n */ 232 mov edi, [esp + 16] /* edi = d */ 233 mov edx, [esp + 20] /* edx = m */ 234 mov esi, [esp + 24] /* esi = s */ 235 push ebp 236 237 mov ebp, HEX(3f800000) 238 239 test cl, DEC(1) 240 jz _asm_transform_points3_2d_step 241 242 dec ecx 243 244 fld S(0) 245 fmul M(0, 0) 246 fld S(0) 247 fmul M(0, 1) 248 fld S(1) 249 fmul M(1, 0) 250 fld S(1) 251 fmul M(1, 1) 252 253 /* 254 * st(3) = S(0) * M(0, 0) 255 * st(2) = S(0) * M(0, 1) 256 * st(1) = S(1) * M(1, 0) 257 * st(0) = S(1) * M(1, 1) 258 */ 259 260 fxch st(1) /* 1 0 2 3 */ 261 fadd M(3, 0) 262 fxch st(1) /* 0 1 2 3 */ 263 fadd M(3, 1) 264 fxch st(1) /* 1 0 2 3 */ 265 faddp st(3), st /* 0 2 3 */ 266 faddp st(1), st /* 2 3 */ 267 fstp D(1) /* 3 */ 268 fstp D(0) /* */ 269 mov eax, S(2) 270 lea esi, S(4) 271 mov D(3), ebp 272 mov D(2), eax 273 lea edi, D(4) 274 275_asm_transform_points3_2d_step: 276 test ecx, ecx 277 jz _asm_transform_points3_2d_end 278 279.align 4 280_asm_transform_points3_2d_loop: 281 fld S(0) 282 fmul M(0, 0) 283 fld S(0) 284 fmul M(0, 1) 285 fld S(4) 286 fmul M(0, 0) 287 fld S(4) 288 fmul M(0, 1) 289 fld S(1) 290 fmul M(1, 0) 291 fld S(1) 292 fmul M(1, 1) 293 fld S(5) 294 fmul M(1, 0) 295 fld S(5) 296 fmul M(1, 1) 297 298 /* 299 * st(7) = S(0) * M(0, 0) 300 * st(6) = S(0) * M(0, 1) 301 * st(5) = S(4) * M(0, 0) 302 * st(4) = S(4) * M(0, 1) 303 * st(3) = S(1) * M(1, 0) 304 * st(2) = S(1) * M(1, 1) 305 * st(1) = S(5) * M(1, 0) 306 * st(0) = S(5) * M(1, 1) 307 */ 308 309 fxch st(7) /* 7 1 2 3 4 5 6 0 */ 310 fadd M(3, 0) 311 fxch st(6) /* 6 1 2 3 4 5 7 0 */ 312 fadd M(3, 1) 313 fxch st(5) /* 5 1 2 3 4 6 7 0 */ 314 fadd M(3, 0) 315 fxch st(4) /* 4 1 2 3 5 6 7 0 */ 316 fadd M(3, 1) 317 318 mov eax, S(2) 319 mov D(3), ebp 320 mov D(2), eax 321 mov eax, S(6) 322 mov D(7), ebp 323 mov D(6), eax 324 lea esi, S(8) 325 sub ecx, DEC(2) 326 327 /* 328 * st(7) = S(5) * M(1, 1) 329 * st(6) = S(0) * M(0, 0) + M(3, 0) 330 * st(5) = S(0) * M(0, 1) + M(3, 1) 331 * st(4) = S(4) * M(0, 0) + M(3, 0) 332 * st(3) = S(1) * M(1, 0) 333 * st(2) = S(1) * M(1, 1) 334 * st(1) = S(5) * M(1, 0) 335 * st(0) = S(4) * M(0, 1) + M(3, 1) 336 */ 337 338 faddp st(7), st /* 1 2 3 4 5 6 7 */ 339 faddp st(3), st /* 2 3 4 5 6 7 */ 340 faddp st(3), st /* 3 4 5 6 7 */ 341 faddp st(3), st /* 4 5 6 7 */ 342 fxch st(3) /* 7 5 6 4 */ 343 fstp D(5) /* 5 6 4 */ 344 fstp D(1) /* 6 4 */ 345 fstp D(0) /* 4 */ 346 fstp D(4) /* */ 347 348 lea edi, D(8) 349 jnz _asm_transform_points3_2d_loop 350 351_asm_transform_points3_2d_end: 352 pop ebp 353 pop edi 354 pop esi 355 ret 356 357 358/* 359 * void asm_transform_points3_2d_no_rot( GLuint n, GLfloat d[][4], 360 * GLfloat m[16], GLfloat s[][4] ); 361 * 362 */ 363PUBLIC _asm_transform_points3_2d_no_rot 364_asm_transform_points3_2d_no_rot: 365.align 4 366 push esi 367 push edi 368 mov ecx, [esp + 12] /* ecx = n */ 369 mov edi, [esp + 16] /* edi = d */ 370 mov edx, [esp + 20] /* edx = m */ 371 mov esi, [esp + 24] /* esi = s */ 372 push ebp 373 374 test ecx, ecx 375 jz _asm_transform_points3_2d_no_rot_end 376 377 mov ebp, HEX(3f800000) 378 379.align 4 380_asm_transform_points3_2d_no_rot_loop: 381 fld S(0) 382 fmul M(0, 0) 383 fld S(1) 384 fmul M(1, 1) 385 fxch st(1) 386 fadd M(3, 0) 387 fxch st(1) 388 fadd M(3, 1) 389 fxch st(1) 390 fstp D(0) 391 fstp D(1) 392 393 mov eax, S(2) /* cycle 1: U pipe */ 394 mov D(3), ebp /* V pipe */ 395 mov D(2), eax /* cycle 2: U pipe */ 396 397 dec ecx 398 lea esi, S(4) 399 lea edi, D(4) 400 jnz _asm_transform_points3_2d_no_rot_loop 401 402_asm_transform_points3_2d_no_rot_end: 403 pop ebp 404 pop edi 405 pop esi 406 ret 407 408 409 410/* 411 * void asm_transform_points3_3d( GLuint n, GLfloat d[][4], GLfloat m[16], 412 * GLfloat s[][4] ); 413 */ 414PUBLIC _asm_transform_points3_3d 415_asm_transform_points3_3d: 416.align 4 417 push esi 418 push edi 419 mov ecx, [esp + 12] /* ecx = n */ 420 mov edi, [esp + 16] /* edi = d */ 421 mov edx, [esp + 20] /* edx = m */ 422 mov esi, [esp + 24] /* esi = s */ 423 424 test ecx, ecx 425 jz _asm_transform_points3_3d_end 426 427 mov eax, HEX(3f800000) 428 429.align 4 430_asm_transform_points3_3d_loop: 431 fld S(0) 432 fmul M(0, 0) 433 fld S(0) 434 fmul M(0, 1) 435 fld S(0) 436 fmul M(0, 2) 437 438 fld S(1) 439 fmul M(1, 0) 440 fld S(1) 441 fmul M(1, 1) 442 fld S(1) 443 fmul M(1, 2) 444 445 /* 446 * st(5) = S(0) * M(0, 0) 447 * st(4) = S(0) * M(0, 1) 448 * st(3) = S(0) * M(0, 2) 449 * st(2) = S(1) * M(1, 0) 450 * st(1) = S(1) * M(1, 1) 451 * st(0) = S(1) * M(1, 2) 452 */ 453 454 fxch st(2) /* 2 1 0 3 4 5 */ 455 faddp st(5), st /* 1 0 3 4 5 */ 456 faddp st(3), st /* 0 3 4 5 */ 457 faddp st(1), st /* 3 4 5 */ 458 459 /* 460 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) 461 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) 462 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) 463 */ 464 465 fld S(2) 466 fmul M(2, 0) 467 fld S(2) 468 fmul M(2, 1) 469 fld S(2) 470 fmul M(2, 2) 471 472 /* 473 * st(5) = S(0) * M(0, 0) + S(1) * M(1, 0) 474 * st(4) = S(0) * M(0, 1) + S(1) * M(1, 1) 475 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2) 476 * st(2) = S(2) * M(2, 0) 477 * st(1) = S(2) * M(2, 1) 478 * st(0) = S(2) * M(2, 2) 479 */ 480 481 fxch st(2) /* 2 1 0 3 4 5 */ 482 faddp st(5), st /* 1 0 3 4 5 */ 483 faddp st(3), st /* 0 3 4 5 */ 484 faddp st(1), st /* 3 4 5 */ 485 486 /* 487 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) 488 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) 489 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) 490 */ 491 492 fxch st(2) /* 2 1 0 */ 493 fadd M(3, 0) 494 fxch st(1) /* 1 2 0 */ 495 fadd M(3, 1) 496 fxch st(2) /* 0 2 1 */ 497 fadd M(3, 2) 498 499 fxch st(1) /* 2 0 1 */ 500 fstp D(0) /* 0 1 */ 501 fstp D(2) /* 1 */ 502 fstp D(1) /* */ 503 mov D(3), eax 504 505 lea esi, S(4) 506 dec ecx 507 508 lea edi, D(4) 509 510 jnz _asm_transform_points3_3d_loop 511 512_asm_transform_points3_3d_end: 513 pop edi 514 pop esi 515 ret 516 517 518 519/* 520 * void asm_transform_points4_general( GLuint n, GLfloat d[][4], 521 * GLfloat m[16], GLfloat s[][4] ); 522 */ 523PUBLIC _asm_transform_points4_general 524_asm_transform_points4_general: 525.align 4 526 push esi 527 push edi 528 mov ecx, [esp + 12] /* ecx = n */ 529 mov edi, [esp + 16] /* edi = d */ 530 mov edx, [esp + 20] /* edx = m */ 531 mov esi, [esp + 24] /* esi = s */ 532 533 test ecx, ecx 534 jz _asm_transform_points4_general_end 535 536.align 4 537_asm_transform_points4_general_loop: 538 fld S(0) 539 fmul M(0, 0) 540 fld S(0) 541 fmul M(0, 1) 542 fld S(0) 543 fmul M(0, 2) 544 fld S(0) 545 fmul M(0, 3) 546 547 fld S(1) 548 fmul M(1, 0) 549 fld S(1) 550 fmul M(1, 1) 551 fld S(1) 552 fmul M(1, 2) 553 fld S(1) 554 fmul M(1, 3) 555 556 /* 557 * st(7) = S(0) * M(0, 0) 558 * st(6) = S(0) * M(0, 1) 559 * st(5) = S(0) * M(0, 2) 560 * st(4) = S(0) * M(0, 3) 561 * st(3) = S(1) * M(1, 0) 562 * st(2) = S(1) * M(1, 1) 563 * st(1) = S(1) * M(1, 2) 564 * st(0) = S(1) * M(1, 3) 565 */ 566 567 fxch st(3) /* 3 1 2 0 4 5 6 7 */ 568 faddp st(7), st /* 1 2 0 4 5 6 7 */ 569 fxch st(1) /* 2 1 0 4 5 6 7 */ 570 faddp st(5), st /* 1 0 4 5 6 7 */ 571 faddp st(3), st /* 0 4 5 6 7 */ 572 faddp st(1), st /* 4 5 6 7 */ 573 574 /* 575 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) 576 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) 577 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) 578 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) 579 */ 580 581 fld S(2) 582 fmul M(2, 0) 583 fld S(2) 584 fmul M(2, 1) 585 fld S(2) 586 fmul M(2, 2) 587 fld S(2) 588 fmul M(2, 3) 589 590 /* 591 * st(7) = S(0) * M(0, 0) + S(1) * M(1, 0) 592 * st(6) = S(0) * M(0, 1) + S(1) * M(1, 1) 593 * st(5) = S(0) * M(0, 2) + S(1) * M(1, 2) 594 * st(4) = S(0) * M(0, 3) + S(1) * M(1, 3) 595 * st(3) = S(2) * M(2, 0) 596 * st(2) = S(2) * M(2, 1) 597 * st(1) = S(2) * M(2, 2) 598 * st(0) = S(2) * M(2, 3) 599 */ 600 601 fxch st(3) /* 3 1 2 0 4 5 6 7 */ 602 faddp st(7), st /* 1 2 0 4 5 6 7 */ 603 fxch st(1) /* 2 1 0 4 5 6 7 */ 604 faddp st(5), st /* 1 0 4 5 6 7 */ 605 faddp st(3), st /* 0 4 5 6 7 */ 606 faddp st(1), st /* 4 5 6 7 */ 607 608 /* 609 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) 610 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) 611 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) 612 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3) 613 */ 614 615 fld S(3) 616 fmul M(3, 0) 617 fld S(3) 618 fmul M(3, 1) 619 fld S(3) 620 fmul M(3, 2) 621 fld S(3) 622 fmul M(3, 3) 623 624 /* 625 * st(7) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) 626 * st(6) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) 627 * st(5) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) 628 * st(4) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3) 629 * st(3) = S(3) * M(3, 0) 630 * st(2) = S(3) * M(3, 1) 631 * st(1) = S(3) * M(3, 2) 632 * st(0) = S(3) * M(3, 3) 633 */ 634 635 fxch st(3) /* 3 1 2 0 4 5 6 7 */ 636 faddp st(7), st /* 1 2 0 4 5 6 7 */ 637 fxch st(1) /* 2 1 0 4 5 6 7 */ 638 faddp st(5), st /* 1 0 4 5 6 7 */ 639 faddp st(3), st /* 0 4 5 6 7 */ 640 641 lea esi, S(4) 642 dec ecx 643 644 faddp st(1), st /* 4 5 6 7 */ 645 646 /* 647 * st(3) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) + S(3) * M(3, 0) 648 * st(2) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) + S(3) * M(3, 1) 649 * st(1) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) + S(3) * M(3, 2) 650 * st(0) = S(0) * M(0, 3) + S(1) * M(1, 3) + S(2) * M(2, 3) + S(3) * M(3, 3) 651 */ 652 653 fxch st(3) /* 3 1 2 0 */ 654 fstp D(0) /* 1 2 0 */ 655 fxch st(1) /* 2 1 0 */ 656 fstp D(1) /* 1 0 */ 657 fstp D(2) /* 0 */ 658 fstp D(3) /* */ 659 660 lea edi, D(4) 661 662 jnz _asm_transform_points4_general_loop 663 664_asm_transform_points4_general_end: 665 pop edi 666 pop esi 667 ret 668 669 670 671/* 672 * void asm_transform_points4_identity( GLuint n, GLfloat d[][4], 673 * GLfloat s[][4] ); 674 */ 675PUBLIC _asm_transform_points4_identity 676_asm_transform_points4_identity: 677.align 4 678 push esi 679 push edi 680 mov ecx, [esp + 12] /* ecx = n */ 681 mov edi, [esp + 16] /* edi = d */ 682 mov esi, [esp + 20] /* esi = s */ 683 684 lea ecx, [ecx * 4] 685 686 cld 687 rep movsd 688 689 pop edi 690 pop esi 691 ret 692 693 694 695/* 696 * void asm_transform_points4_2d( GLuint n, GLfloat d[][4], GLfloat m[16], 697 * GLfloat s[][4] ); 698 */ 699PUBLIC _asm_transform_points4_2d 700_asm_transform_points4_2d: 701.align 4 702 push esi 703 push edi 704 mov ecx, [esp + 12] /* ecx = n */ 705 mov edi, [esp + 16] /* edi = d */ 706 mov edx, [esp + 20] /* edx = m */ 707 mov esi, [esp + 24] /* esi = s */ 708 709 test ecx, ecx 710 jz _asm_transform_points4_2d_end 711 712 push ebx 713 714.align 4 715_asm_transform_points4_2d_loop: 716 fld S(0) 717 fmul M(0, 0) 718 fld S(0) 719 fmul M(0, 1) 720 fld S(1) 721 fmul M(1, 0) 722 fld S(1) 723 fmul M(1, 1) 724 fld S(3) 725 fmul M(3, 0) 726 fld S(3) 727 fmul M(3, 1) 728 729 /* 730 * st(5) = S(0) * M(0, 0) 731 * st(4) = S(0) * M(0, 1) 732 * st(3) = S(1) * M(1, 0) 733 * st(2) = S(1) * M(1, 1) 734 * st(1) = S(3) * M(3, 0) 735 * st(0) = S(3) * M(3, 1) 736 */ 737 738 mov eax, S(2) 739 mov ebx, S(3) 740 lea esi, S(4) 741 dec ecx 742 mov D(2), eax 743 mov D(3), ebx 744 faddp st(4), st 745 faddp st(4), st 746 faddp st(2), st 747 faddp st(2), st 748 fstp D(1) 749 fstp D(0) 750 lea edi, D(4) 751 jnz _asm_transform_points4_2d_loop 752 753 pop ebx 754 755_asm_transform_points4_2d_end: 756 pop edi 757 pop esi 758 ret 759 760 761 762/* 763 * void asm_transform_points4_2d_no_rot( GLuint n, GLfloat d[][4], 764 * GLfloat m[16], GLfloat s[][4] ); 765 */ 766PUBLIC _asm_transform_points4_2d_no_rot 767_asm_transform_points4_2d_no_rot: 768.align 4 769 push esi 770 push edi 771 mov ecx, [esp + 12] /* ecx = n */ 772 mov edi, [esp + 16] /* edi = d */ 773 mov edx, [esp + 20] /* edx = m */ 774 mov esi, [esp + 24] /* esi = s */ 775 776 test ecx, ecx 777 jz _asm_transform_points4_2d_no_rot_end 778 push ebx 779 780.align 4 781_asm_transform_points4_2d_no_rot_loop: 782 fld S(0) 783 fmul M(0, 0) 784 fld S(1) 785 fmul M(1, 1) 786 fld S(3) 787 fmul M(3, 0) 788 fld S(3) 789 fmul M(3, 1) 790 mov eax, S(2) 791 mov ebx, S(3) 792 lea esi, S(4) 793 dec ecx 794 mov D(2), eax 795 mov D(3), ebx 796 faddp st(2), st 797 faddp st(2), st 798 fstp D(1) 799 fstp D(0) 800 lea edi, D(4) 801 jnz _asm_transform_points4_2d_no_rot_loop 802 803 pop ebx 804 805_asm_transform_points4_2d_no_rot_end: 806 pop edi 807 pop esi 808 ret 809 810 811 812/* 813 * void asm_transform_points4_3d( GLuint n, GLfloat d[][4], GLfloat m[16], 814 * GLfloat s[][4] ); 815 */ 816PUBLIC _asm_transform_points4_3d 817_asm_transform_points4_3d: 818.align 4 819 push esi 820 push edi 821 mov ecx, [esp + 12] /* ecx = n */ 822 mov edi, [esp + 16] /* edi = d */ 823 mov edx, [esp + 20] /* edx = m */ 824 mov esi, [esp + 24] /* esi = s */ 825 826 test ecx, ecx 827 jz _asm_transform_points4_3d_end 828 829.align 4 830_asm_transform_points4_3d_loop: 831 fld S(3) 832 833 fld S(0) 834 fmul M(0, 0) 835 fld S(0) 836 fmul M(0, 1) 837 fld S(0) 838 fmul M(0, 2) 839 840 fld S(1) 841 fmul M(1, 0) 842 fld S(1) 843 fmul M(1, 1) 844 fld S(1) 845 fmul M(1, 2) 846 847 /* 848 * st(5) = S(0) * M(0, 0) 849 * st(4) = S(0) * M(0, 1) 850 * st(3) = S(0) * M(0, 2) 851 * st(2) = S(1) * M(1, 0) 852 * st(1) = S(1) * M(1, 1) 853 * st(0) = S(1) * M(1, 2) 854 */ 855 856 fxch st(2) /* 2 1 0 3 4 5 */ 857 faddp st(5), st /* 1 0 3 4 5 */ 858 faddp st(3), st /* 0 3 4 5 */ 859 faddp st(1), st /* 3 4 5 */ 860 861 /* 862 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) 863 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) 864 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) 865 */ 866 867 fld S(2) 868 fmul M(2, 0) 869 fld S(2) 870 fmul M(2, 1) 871 fld S(2) 872 fmul M(2, 2) 873 874 /* 875 * st(5) = S(0) * M(0, 0) + S(1) * M(1, 0) 876 * st(4) = S(0) * M(0, 1) + S(1) * M(1, 1) 877 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2) 878 * st(2) = S(2) * M(2, 0) 879 * st(1) = S(2) * M(2, 1) 880 * st(0) = S(2) * M(2, 2) 881 */ 882 883 fxch st(2) /* 2 1 0 3 4 5 */ 884 faddp st(5), st /* 1 0 3 4 5 */ 885 faddp st(3), st /* 0 3 4 5 */ 886 faddp st(1), st /* 3 4 5 */ 887 888 /* 889 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) 890 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) 891 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) 892 */ 893 894 fld S(3) 895 fmul M(3, 0) 896 fld S(3) 897 fmul M(3, 1) 898 fld S(3) 899 fmul M(3, 2) 900 901 /* 902 * st(5) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) 903 * st(4) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) 904 * st(3) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) 905 * st(2) = S(3) * M(3, 0) 906 * st(1) = S(3) * M(3, 1) 907 * st(0) = S(3) * M(3, 2) 908 */ 909 910 fxch st(2) /* 2 1 0 3 4 5 */ 911 faddp st(5), st /* 1 0 3 4 5 */ 912 faddp st(3), st /* 0 3 4 5 */ 913 914 lea esi, S(4) 915 dec ecx 916 917 faddp st(1), st /* 3 4 5 */ 918 919 /* 920 * st(2) = S(0) * M(0, 0) + S(1) * M(1, 0) + S(2) * M(2, 0) + S(3) * M(3, 0) 921 * st(1) = S(0) * M(0, 1) + S(1) * M(1, 1) + S(2) * M(2, 1) + S(3) * M(3, 1) 922 * st(0) = S(0) * M(0, 2) + S(1) * M(1, 2) + S(2) * M(2, 2) + S(3) * M(3, 2) 923 */ 924 925 fxch st(2) /* 2 1 0 */ 926 fstp D(0) /* 1 0 */ 927 fstp D(1) /* 0 */ 928 fstp D(2) /* */ 929 fstp D(3) 930 931 lea edi, D(4) 932 933 jnz _asm_transform_points4_3d_loop 934 935_asm_transform_points4_3d_end: 936 pop edi 937 pop esi 938 ret 939 940/* 941 * void asm_transform_points4_ortho( GLuint n, GLfloat d[][4], 942 * GLfloat m[16], GLfloat s[][4] ); 943 */ 944PUBLIC _asm_transform_points4_ortho 945_asm_transform_points4_ortho: 946.align 4 947 push esi 948 push edi 949 mov ecx, [esp + 12] /* ecx = n */ 950 mov edi, [esp + 16] /* edi = d */ 951 mov edx, [esp + 20] /* edx = m */ 952 mov esi, [esp + 24] /* esi = s */ 953 954 test ecx, ecx 955 jz _asm_transform_points4_ortho_end 956 957.align 4 958_asm_transform_points4_ortho_loop: 959 fld S(0) 960 fmul M(0, 0) 961 fld S(1) 962 fmul M(1, 1) 963 fld S(2) 964 fmul M(2, 2) 965 966 fld S(3) 967 fmul M(3, 0) 968 fld S(3) 969 fmul M(3, 1) 970 fld S(3) 971 fmul M(3, 2) 972 973 mov eax, S(3) 974 lea esi, S(4) 975 dec ecx 976 mov D(3), eax 977 978 faddp st(3), st 979 faddp st(3), st 980 faddp st(3), st 981 982 fstp D(2) 983 fstp D(1) 984 fstp D(0) 985 986 lea edi, D(4) 987 jnz _asm_transform_points4_ortho_loop 988 989_asm_transform_points4_ortho_end: 990 pop edi 991 pop esi 992 ret 993 994/* 995 * void asm_transform_points4_perspective( GLuint n, GLfloat d[][4], 996 * GLfloat m[16], GLfloat s[][4] ); 997 */ 998PUBLIC _asm_transform_points4_perspective 999_asm_transform_points4_perspective: 1000.align 4 1001 push esi 1002 push edi 1003 mov ecx, [esp + 12] /* ecx = n */ 1004 mov edi, [esp + 16] /* edi = d */ 1005 mov edx, [esp + 20] /* edx = m */ 1006 mov esi, [esp + 24] /* esi = s */ 1007 1008 test ecx, ecx 1009 jz _asm_transform_points4_perspective_end 1010 1011.align 4 1012_asm_transform_points4_perspective_loop: 1013 fld S(0) 1014 fmul M(0, 0) 1015 fld S(1) 1016 fmul M(1, 1) 1017 fld S(2) 1018 fmul M(2, 2) 1019 1020 fld S(2) 1021 fmul M(2, 0) 1022 fld S(2) 1023 fmul M(2, 1) 1024 fld S(3) 1025 fmul M(3, 2) 1026 1027 mov eax, S(2) 1028 lea esi, S(4) 1029 xor eax, HEX(80000000) 1030 dec ecx 1031 1032 faddp st(3), st 1033 faddp st(3), st 1034 faddp st(3), st 1035 1036 fstp D(2) 1037 fstp D(1) 1038 fstp D(0) 1039 1040 mov D(3), eax 1041 lea edi, D(4) 1042 jnz _asm_transform_points4_perspective_loop 1043 1044_asm_transform_points4_perspective_end: 1045 pop edi 1046 pop esi 1047 ret 1048 1049 1050 1051/* 1052 * Table for clip test. 1053 * 1054 * bit6 = S(3) < 0 1055 * bit5 = S(2) < 0 1056 * bit4 = abs(S(2)) > abs(S(3)) 1057 * bit3 = S(1) < 0 1058 * bit2 = abs(S(1)) > abs(S(3)) 1059 * bit1 = S(0) < 0 1060 * bit0 = abs(S(0)) > abs(S(3)) 1061 */ 1062 1063/* Vertex buffer clipping flags (from vb.h) */ 1064#if 0 1065 1066#define CLIP_RIGHT_BIT 0x01 1067#define CLIP_LEFT_BIT 0x02 1068#define CLIP_TOP_BIT 0x04 1069#define CLIP_BOTTOM_BIT 0x08 1070#define CLIP_NEAR_BIT 0x10 1071#define CLIP_FAR_BIT 0x20 1072#define CLIP_USER_BIT 0x40 1073#define CLIP_ALL_BITS 0x3f 1074 1075#define MAGN_X(i) (~(((i) & 1) - 1)) 1076#define SIGN_X(i) (~((((i) >> 1) & 1) - 1)) 1077#define MAGN_Y(i) (~((((i) >> 2) & 1) - 1)) 1078#define SIGN_Y(i) (~((((i) >> 3) & 1) - 1)) 1079#define MAGN_Z(i) (~((((i) >> 4) & 1) - 1)) 1080#define SIGN_Z(i) (~((((i) >> 5) & 1) - 1)) 1081#define SIGN_W(i) (~((((i) >> 6) & 1) - 1)) 1082 1083#define CLIP_VALUE(i) \ 1084 (CLIP_RIGHT_BIT \ 1085 & ((~SIGN_X(i) & SIGN_W(i)) \ 1086 | (~SIGN_X(i) & ~SIGN_W(i) & MAGN_X(i)) \ 1087 | (SIGN_X(i) & SIGN_W(i) & ~MAGN_X(i)))) \ 1088 | (CLIP_LEFT_BIT \ 1089 & ((SIGN_X(i) & SIGN_W(i)) \ 1090 | (~SIGN_X(i) & SIGN_W(i) & ~MAGN_X(i)) \ 1091 | (SIGN_X(i) & ~SIGN_W(i) & MAGN_X(i)))) \ 1092 | (CLIP_TOP_BIT \ 1093 & ((~SIGN_Y(i) & SIGN_W(i)) \ 1094 | (~SIGN_Y(i) & ~SIGN_W(i) & MAGN_Y(i)) \ 1095 | (SIGN_Y(i) & SIGN_W(i) & ~MAGN_Y(i)))) \ 1096 | (CLIP_BOTTOM_BIT \ 1097 & ((SIGN_Y(i) & SIGN_W(i)) \ 1098 | (~SIGN_Y(i) & SIGN_W(i) & ~MAGN_Y(i)) \ 1099 | (SIGN_Y(i) & ~SIGN_W(i) & MAGN_Y(i)))) \ 1100 | (CLIP_FAR_BIT \ 1101 & ((~SIGN_Z(i) & SIGN_W(i)) \ 1102 | (~SIGN_Z(i) & ~SIGN_W(i) & MAGN_Z(i)) \ 1103 | (SIGN_Z(i) & SIGN_W(i) & ~MAGN_Z(i)))) \ 1104 | (CLIP_NEAR_BIT \ 1105 & ((SIGN_Z(i) & SIGN_W(i)) \ 1106 | (~SIGN_Z(i) & SIGN_W(i) & ~MAGN_Z(i)) \ 1107 | (SIGN_Z(i) & ~SIGN_W(i) & MAGN_Z(i)))) 1108 1109#define CLIP_VALUE8(i) \ 1110 CLIP_VALUE(i + 0), CLIP_VALUE(i + 1), CLIP_VALUE(i + 2), CLIP_VALUE(i + 3), \ 1111 CLIP_VALUE(i + 4), CLIP_VALUE(i + 5), CLIP_VALUE(i + 6), CLIP_VALUE(i + 7) 1112 1113.rodata 1114 1115clip_table: 1116 .byte CLIP_VALUE8(0x00) 1117 .byte CLIP_VALUE8(0x08) 1118 .byte CLIP_VALUE8(0x10) 1119 .byte CLIP_VALUE8(0x18) 1120 .byte CLIP_VALUE8(0x20) 1121 .byte CLIP_VALUE8(0x28) 1122 .byte CLIP_VALUE8(0x30) 1123 .byte CLIP_VALUE8(0x38) 1124 .byte CLIP_VALUE8(0x40) 1125 .byte CLIP_VALUE8(0x48) 1126 .byte CLIP_VALUE8(0x50) 1127 .byte CLIP_VALUE8(0x58) 1128 .byte CLIP_VALUE8(0x60) 1129 .byte CLIP_VALUE8(0x68) 1130 .byte CLIP_VALUE8(0x70) 1131 .byte CLIP_VALUE8(0x78) 1132#else 1133 1134.const 1135ASSUME NOTHING 1136 1137clip_table: 1138 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(4), HEX(5), HEX(4), HEX(6) 1139 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(8), HEX(9), HEX(8), HEX(a) 1140 .byte HEX(20), HEX(21), HEX(20), HEX(22), HEX(24), HEX(25), HEX(24), HEX(26) 1141 .byte HEX(20), HEX(21), HEX(20), HEX(22), HEX(28), HEX(29), HEX(28), HEX(2a) 1142 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(4), HEX(5), HEX(4), HEX(6) 1143 .byte HEX(0), HEX(1), HEX(0), HEX(2), HEX(8), HEX(9), HEX(8), HEX(a) 1144 .byte HEX(10), HEX(11), HEX(10), HEX(12), HEX(14), HEX(15), HEX(14), HEX(16) 1145 .byte HEX(10), HEX(11), HEX(10), HEX(12), HEX(18), HEX(19), HEX(18), HEX(1a) 1146 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(37), HEX(35), HEX(37), HEX(36) 1147 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(3b), HEX(39), HEX(3b), HEX(3a) 1148 .byte HEX(2f), HEX(2d), HEX(2f), HEX(2e), HEX(27), HEX(25), HEX(27), HEX(26) 1149 .byte HEX(2f), HEX(2d), HEX(2f), HEX(2e), HEX(2b), HEX(29), HEX(2b), HEX(2a) 1150 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(37), HEX(35), HEX(37), HEX(36) 1151 .byte HEX(3f), HEX(3d), HEX(3f), HEX(3e), HEX(3b), HEX(39), HEX(3b), HEX(3a) 1152 .byte HEX(1f), HEX(1d), HEX(1f), HEX(1e), HEX(17), HEX(15), HEX(17), HEX(16) 1153 .byte HEX(1f), HEX(1d), HEX(1f), HEX(1e), HEX(1b), HEX(19), HEX(1b), HEX(1a) 1154 1155#endif 1156 1157.code 1158 1159/* 1160 * cliptest - 1161 * 1162 * inputs: 1163 * ecx = # points 1164 * esi = points 1165 * edi = clipmask[] 1166 * 1167 * inputs/outputs: 1168 * al = ormask 1169 * ah = andmask 1170 */ 1171 1172cliptest: 1173 test ecx, ecx 1174 jz cliptest_end 1175 1176 push ebp 1177 push ebx 1178 1179.align 4 1180cliptest_loop: 1181 mov ebp, S(3) 1182 mov ebx, S(2) 1183 1184 xor edx, edx 1185 add ebp, ebp /* %ebp = abs(S(3))*2 ; carry = sign of S(3) */ 1186 1187 adc edx, edx 1188 add ebx, ebx /* %ebx = abs(S(2))*2 ; carry = sign of S(2) */ 1189 1190 adc edx, edx 1191 cmp ebp, ebx /* carry = abs(S(2))*2 > abs(S(3))*2 */ 1192 1193 adc edx, edx 1194 mov ebx, S(1) 1195 1196 add ebx, ebx /* %ebx = abs(S(1))*2 ; carry = sign of S(1) */ 1197 1198 adc edx, edx 1199 cmp ebp, ebx /* carry = abs(S(1))*2 > abs(S(3))*2 */ 1200 1201 adc edx, edx 1202 mov ebx, S(0) 1203 1204 add ebx, ebx /* %ebx = abs(S(0))*2 ; carry = sign of S(0) */ 1205 1206 adc edx, edx 1207 cmp ebp, ebx /* carry = abs(S(0))*2 > abs(S(3))*2 */ 1208 1209 adc edx, edx 1210 1211 lea esi, S(4) 1212 1213 mov bl, byte ptr [edi] 1214 mov dl, byte ptr [clip_table + edx] 1215 1216 or bl, dl 1217 or al, dl 1218 1219 and ah, dl 1220 mov [edi], bl 1221 1222 inc edi 1223 dec ecx 1224 1225 jnz cliptest_loop 1226 1227 pop ebx 1228 pop ebp 1229cliptest_end: 1230 ret 1231 1232/* 1233 * void asm_project_and_cliptest_general( GLuint n, GLfloat d[][4], GLfloat m[16], 1234 * GLfloat s[][4], GLubyte clipmask[], 1235 * GLubyte *ormask, GLubyte *andmask ); 1236 */ 1237PUBLIC _asm_project_and_cliptest_general 1238_asm_project_and_cliptest_general: 1239.align 4 1240 push esi 1241 push edi 1242 mov ecx, [esp + 12] /* ecx = n */ 1243 mov edi, [esp + 16] /* edi = d */ 1244 mov edx, [esp + 20] /* edx = m */ 1245 mov esi, [esp + 24] /* esi = s */ 1246 1247 push esi 1248 push edx 1249 push edi 1250 push ecx 1251 call _asm_transform_points4_general 1252 add esp, DEC(16) 1253 1254 mov edi, [esp + 32] /* ormask */ 1255 mov esi, [esp + 36] /* andmask */ 1256 mov al, [edi] 1257 mov ah, [esi] 1258 1259 mov ecx, [esp + 12] /* ecx = n */ 1260 mov edi, [esp + 28] /* edi = clipmask */ 1261 mov esi, [esp + 16] /* esi = d */ 1262 1263 call cliptest 1264 1265 mov edi, [esp + 32] /* ormask */ 1266 mov esi, [esp + 36] /* andmask */ 1267 mov [edi], al 1268 mov [esi], ah 1269 1270 pop edi 1271 pop esi 1272 ret 1273 1274 1275/* 1276 * void asm_project_and_cliptest_identity( GLuint n, GLfloat d[][4], 1277 * GLfloat s[][4], GLubyte clipmask[], 1278 * GLubyte *ormask, GLubyte *andmask ); 1279 */ 1280PUBLIC _asm_project_and_cliptest_identity 1281_asm_project_and_cliptest_identity: 1282.align 4 1283 push esi 1284 push edi 1285 mov ecx, [esp + 12] /* ecx = n */ 1286 mov edi, [esp + 16] /* edi = d */ 1287 mov esi, [esp + 20] /* esi = s */ 1288 1289 push esi 1290 push edi 1291 push ecx 1292 1293 call _asm_transform_points4_identity 1294 1295 add esp, DEC(12) 1296 1297 mov edi, [esp + 28] /* ormask */ 1298 mov esi, [esp + 32] /* andmask */ 1299 mov al, [edi] 1300 mov ah, [esi] 1301 1302 mov ecx, [esp + 12] /* ecx = n */ 1303 mov edi, [esp + 24] /* edi = clipmask */ 1304 mov esi, [esp + 16] /* esi = d */ 1305 1306 call cliptest 1307 1308 mov edi, [esp + 28] /* ormask */ 1309 mov esi, [esp + 32] /* andmask */ 1310 mov [edi], al 1311 mov [esi], ah 1312 1313 pop edi 1314 pop esi 1315 ret 1316 1317/* 1318 * void asm_project_and_cliptest_ortho( GLuint n, GLfloat d[][4], GLfloat m[16], 1319 * GLfloat s[][4], GLubyte clipmask[], 1320 * GLubyte *ormask, GLubyte *andmask ); 1321 */ 1322PUBLIC _asm_project_and_cliptest_ortho 1323_asm_project_and_cliptest_ortho: 1324.align 4 1325 push esi 1326 push edi 1327 mov ecx, [esp + 12] /* ecx = n */ 1328 mov edi, [esp + 16] /* edi = d */ 1329 mov edx, [esp + 20] /* edx = m */ 1330 mov esi, [esp + 24] /* esi = s */ 1331 1332 push esi 1333 push edx 1334 push edi 1335 push ecx 1336 1337 call _asm_transform_points4_ortho 1338 1339 add esp, DEC(16) 1340 1341 mov edi, [esp + 32] /* ormask */ 1342 mov esi, [esp + 36] /* andmask */ 1343 mov al, [edi] 1344 mov ah, [esi] 1345 1346 mov ecx, [esp + 12] /* ecx = n */ 1347 mov edi, [esp + 28] /* edi = clipmask */ 1348 mov esi, [esp + 16] /* esi = d */ 1349 1350 call cliptest 1351 1352 mov edi, [esp + 32] /* ormask */ 1353 mov esi, [esp + 36] /* andmask */ 1354 mov [edi], al 1355 mov [esi], ah 1356 1357 pop edi 1358 pop esi 1359 ret 1360 1361/* 1362 * void asm_project_and_cliptest_perspective( GLuint n, GLfloat d[][4], GLfloat m[16], 1363 * GLfloat s[][4], GLubyte clipmask[], 1364 * GLubyte *ormask, GLubyte *andmask ); 1365 */ 1366PUBLIC _asm_project_and_cliptest_perspective 1367_asm_project_and_cliptest_perspective: 1368.align 4 1369 push esi 1370 push edi 1371 mov ecx, [esp + 12] /* ecx = n */ 1372 mov edi, [esp + 16] /* edi = d */ 1373 mov edx, [esp + 20] /* edx = m */ 1374 mov esi, [esp + 24] /* esi = s */ 1375 1376 push esi 1377 push edx 1378 push edi 1379 push ecx 1380 1381 call _asm_transform_points4_perspective 1382 1383 add esp, DEC(16) 1384 1385 mov edi, [esp + 32] /* ormask */ 1386 mov esi, [esp + 36] /* andmask */ 1387 mov al, [edi] 1388 mov ah, [esi] 1389 1390 mov ecx, [esp + 12] /* ecx = n */ 1391 mov edi, [esp + 28] /* edi = clipmask */ 1392 mov esi, [esp + 16] /* esi = d */ 1393 1394 call cliptest 1395 1396 mov edi, [esp + 32] /* ormask */ 1397 mov esi, [esp + 36] /* andmask */ 1398 mov byte ptr [edi], al 1399 mov byte ptr [esi], ah 1400 1401 pop edi 1402 pop esi 1403 ret 1404 1405 1406/* 1407 * unsigned int inverse_nofp( float f ); 1408 * 1409 * Calculate the inverse of a float without using the FPU. 1410 * This function returns a float in eax, so it's return 1411 * type should be 'int' when called from C (and converted 1412 * to float with pointer/union abuse). 1413 */ 1414.align 4 1415inverse_nofp: 1416 1417 /* get mantissa in eax */ 1418 mov ecx, [esp + 4] 1419 and ecx, HEX(7fffff) 1420 1421 /* set implicit integer */ 1422 or ecx, HEX(800000) 1423 1424 /* div 0x10000:0x00000000 by mantissa */ 1425 xor eax, eax 1426 mov edx, HEX(10000) 1427 1428 div ecx 1429 1430 /* round result */ 1431 shr eax, DEC(1) 1432 adc eax, DEC(0) 1433 1434 /* get exponent in ecx */ 1435 mov ecx, HEX(7f800000) 1436 mov edx, [esp + 4] 1437 and ecx, edx 1438 1439 /* negate exponent and decrement it */ 1440 mov edx, HEX(7E800000) 1441 sub edx, ecx 1442 1443 /* if bit 24 is set, shift and adjust exponent */ 1444 test eax, HEX(1000000) 1445 jz inverse_nofp_combine 1446 1447 shr eax, HEX(1) 1448 add edx, HEX(800000) 1449 1450 /* combine mantissa and exponent, then set sign */ 1451inverse_nofp_combine: 1452 and eax, HEX(7fffff) 1453 mov ecx, [esp + 4] 1454 or eax, edx 1455 and ecx, HEX(80000000) 1456 or eax, ecx 1457 1458 ret 1459 1460 1461/* 1462 * void gl_xform_normals_3fv( GLuint n, GLfloat d[][4], GLfloat m[16], 1463 * GLfloat s[][4], GLboolean normalize ); 1464 */ 1465PUBLIC _gl_xform_normals_3fv 1466_gl_xform_normals_3fv: 1467.align 4 1468 push esi 1469 push edi 1470 mov ecx, [esp + 12] /* ecx = n */ 1471 mov edi, [esp + 16] /* edi = d */ 1472 mov edx, [esp + 20] /* edx = m */ 1473 mov esi, [esp + 24] /* esi = s */ 1474 1475 test ecx, ecx 1476 jz _gl_xform_normals_3fv_end 1477 1478.align 4 1479_gl_xform_normals_3fv_loop: 1480 fld S(0) 1481 fmul M(0, 0) 1482 fld S(0) 1483 fmul M(1, 0) 1484 fld S(0) 1485 fmul M(2, 0) 1486 1487 fld S(1) 1488 fmul M(0, 1) 1489 fld S(1) 1490 fmul M(1, 1) 1491 fld S(1) 1492 fmul M(2, 1) 1493 1494 /* 1495 * st(5) = S(0) * M(0, 0) 1496 * st(4) = S(0) * M(1, 0) 1497 * st(3) = S(0) * M(2, 0) 1498 * st(2) = S(1) * M(0, 1) 1499 * st(1) = S(1) * M(1, 1) 1500 * st(0) = S(1) * M(2, 1) 1501 */ 1502 1503 fxch st(2) /* 2 1 0 3 4 5 */ 1504 faddp st(5), st /* 1 0 3 4 5 */ 1505 faddp st(3), st /* 0 3 4 5 */ 1506 faddp st(1), st /* 3 4 5 */ 1507 1508 /* 1509 * st(2) = S(0) * M(0, 0) + S(1) * M(0, 1) 1510 * st(1) = S(0) * M(1, 0) + S(1) * M(1, 1) 1511 * st(0) = S(0) * M(2, 0) + S(1) * M(2, 1) 1512 */ 1513 1514 fld S(2) 1515 fmul M(0, 2) 1516 fld S(2) 1517 fmul M(1, 2) 1518 fld S(2) 1519 fmul M(2, 2) 1520 1521 /* 1522 * st(5) = S(0) * M(0, 0) + S(1) * M(0, 1) 1523 * st(4) = S(0) * M(1, 0) + S(1) * M(1, 1) 1524 * st(3) = S(0) * M(2, 0) + S(1) * M(2, 1) 1525 * st(2) = S(2) * M(0, 2) 1526 * st(1) = S(2) * M(1, 2) 1527 * st(0) = S(2) * M(2, 2) 1528 */ 1529 1530 fxch st(2) /* 2 1 0 3 4 5 */ 1531 faddp st(5), st /* 1 0 3 4 5 */ 1532 faddp st(3), st /* 0 3 4 5 */ 1533 faddp st(1), st /* 3 4 5 */ 1534 1535 /* 1536 * st(2) = S(0) * M(0, 0) + S(1) * M(0, 1) + S(2) * M(0, 2) 1537 * st(1) = S(0) * M(1, 0) + S(1) * M(1, 1) + S(2) * M(1, 2) 1538 * st(0) = S(0) * M(2, 0) + S(1) * M(2, 1) + S(2) * M(2, 2) 1539 */ 1540 1541 fxch st(2) /* 2 1 0 */ 1542 fstp D(0) /* 1 0 */ 1543 fstp D(1) /* 0 */ 1544 fstp D(2) /* */ 1545 1546 lea esi, S(3) 1547 1548 dec ecx 1549 lea edi, D(3) 1550 1551 jnz _gl_xform_normals_3fv_loop 1552 1553 /* 1554 * Skip normalize if it isn't needed 1555 */ 1556 cmp dword ptr [esp + 28], DEC(0) 1557 jz _gl_xform_normals_3fv_end 1558 1559 /* Normalize required */ 1560 1561 mov esi, [esp + 12] /* esi = n */ 1562 mov edi, [esp + 16] /* edi = d */ 1563 1564 sub esp, DEC(4) /* temp var for 1.0 / len */ 1565 1566 /* 1567 * (%esp) = length of first normal 1568 */ 1569 fld D(0) 1570 fmul D(0) 1571 fld D(1) 1572 fmul D(1) 1573 fld D(2) 1574 fmul D(2) 1575 fxch st(2) 1576 faddp st(1), st 1577 faddp st(1), st 1578 fsqrt 1579 fstp dword ptr [esp] 1580 1581 jmp _gl_xform_normals_3fv_loop2_end 1582 1583.align 4 1584_gl_xform_normals_3fv_loop2: 1585 /* %st(0) = length of next normal */ 1586 fld D(3) 1587 fmul D(3) 1588 fld D(4) 1589 fmul D(4) 1590 fld D(5) 1591 fmul D(5) 1592 fxch st(2) 1593 faddp st(1), st 1594 faddp st(1), st 1595 fsqrt 1596 1597 /* 1598 * inverse the length of the current normal, which is 1599 * already at (%esp). This should overlap the prev 1600 * fsqrt nicely. 1601 */ 1602 call inverse_nofp 1603 mov [esp], eax 1604 1605 /* multiply normal by 1/len */ 1606 fld D(0) 1607 fmul dword ptr [esp] 1608 fld D(1) 1609 fmul dword ptr [esp] 1610 fld D(2) 1611 fmul dword ptr [esp] 1612 fxch st(3) 1613 fstp dword ptr [esp] /* store length of next normal */ 1614 fstp D(1) 1615 fstp D(0) 1616 fstp D(2) 1617 lea edi, D(3) 1618 1619_gl_xform_normals_3fv_loop2_end: 1620 dec esi 1621 jnz _gl_xform_normals_3fv_loop2 1622 1623 /* finish up the last normal */ 1624 call inverse_nofp 1625 mov [esp], eax 1626 fld D(0) 1627 fmul dword ptr [esp] 1628 fld D(1) 1629 fmul dword ptr [esp] 1630 fld D(2) 1631 fmul dword ptr [esp] 1632 fxch st(2) 1633 fstp D(0) 1634 fstp D(1) 1635 fstp D(2) 1636 1637 add esp, DEC(4) 1638 1639_gl_xform_normals_3fv_end: 1640 pop edi 1641 pop esi 1642 ret 1643 1644END 1645