1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelRoutine.hpp" 16 17 #include "SamplerCore.hpp" 18 #include "Constants.hpp" 19 #include "Renderer/Renderer.hpp" 20 #include "Renderer/QuadRasterizer.hpp" 21 #include "Renderer/Surface.hpp" 22 #include "Renderer/Primitive.hpp" 23 #include "Common/Debug.hpp" 24 25 namespace sw 26 { 27 extern bool complementaryDepthBuffer; 28 extern bool postBlendSRGB; 29 extern bool exactColorRounding; 30 extern bool forceClearRegisters; 31 PixelRoutine(const PixelProcessor::State & state,const PixelShader * shader)32 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) 33 : QuadRasterizer(state, shader), v(shader && shader->indirectAddressableInput) 34 { 35 if(!shader || shader->getShaderModel() < 0x0200 || forceClearRegisters) 36 { 37 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++) 38 { 39 v[i].x = Float4(0.0f); 40 v[i].y = Float4(0.0f); 41 v[i].z = Float4(0.0f); 42 v[i].w = Float4(0.0f); 43 } 44 } 45 } 46 ~PixelRoutine()47 PixelRoutine::~PixelRoutine() 48 { 49 } 50 quad(Pointer<Byte> cBuffer[RENDERTARGETS],Pointer<Byte> & zBuffer,Pointer<Byte> & sBuffer,Int cMask[4],Int & x)51 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x) 52 { 53 #if PERF_PROFILE 54 Long pipeTime = Ticks(); 55 #endif 56 57 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive(); 58 59 Int zMask[4]; // Depth mask 60 Int sMask[4]; // Stencil mask 61 62 for(unsigned int q = 0; q < state.multiSample; q++) 63 { 64 zMask[q] = cMask[q]; 65 sMask[q] = cMask[q]; 66 } 67 68 for(unsigned int q = 0; q < state.multiSample; q++) 69 { 70 stencilTest(sBuffer, q, x, sMask[q], cMask[q]); 71 } 72 73 Float4 f; 74 Float4 rhwCentroid; 75 76 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16); 77 78 if(interpolateZ()) 79 { 80 for(unsigned int q = 0; q < state.multiSample; q++) 81 { 82 Float4 x = xxxx; 83 84 if(state.multiSample > 1) 85 { 86 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4)); 87 } 88 89 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp); 90 } 91 } 92 93 Bool depthPass = false; 94 95 if(earlyDepthTest) 96 { 97 for(unsigned int q = 0; q < state.multiSample; q++) 98 { 99 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); 100 } 101 } 102 103 If(depthPass || Bool(!earlyDepthTest)) 104 { 105 #if PERF_PROFILE 106 Long interpTime = Ticks(); 107 #endif 108 109 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16); 110 111 // Centroid locations 112 Float4 XXXX = Float4(0.0f); 113 Float4 YYYY = Float4(0.0f); 114 115 if(state.centroid) 116 { 117 Float4 WWWW(1.0e-9f); 118 119 for(unsigned int q = 0; q < state.multiSample; q++) 120 { 121 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]); 122 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]); 123 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]); 124 } 125 126 WWWW = Rcp_pp(WWWW); 127 XXXX *= WWWW; 128 YYYY *= WWWW; 129 130 XXXX += xxxx; 131 YYYY += yyyy; 132 } 133 134 if(interpolateW()) 135 { 136 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false); 137 rhw = reciprocal(w, false, false, true); 138 139 if(state.centroid) 140 { 141 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false)); 142 } 143 } 144 145 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) 146 { 147 for(int component = 0; component < 4; component++) 148 { 149 if(state.interpolant[interpolant].component & (1 << component)) 150 { 151 if(!state.interpolant[interpolant].centroid) 152 { 153 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective, false); 154 } 155 else 156 { 157 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); 158 } 159 } 160 } 161 162 Float4 rcp; 163 164 switch(state.interpolant[interpolant].project) 165 { 166 case 0: 167 break; 168 case 1: 169 rcp = reciprocal(v[interpolant].y); 170 v[interpolant].x = v[interpolant].x * rcp; 171 break; 172 case 2: 173 rcp = reciprocal(v[interpolant].z); 174 v[interpolant].x = v[interpolant].x * rcp; 175 v[interpolant].y = v[interpolant].y * rcp; 176 break; 177 case 3: 178 rcp = reciprocal(v[interpolant].w); 179 v[interpolant].x = v[interpolant].x * rcp; 180 v[interpolant].y = v[interpolant].y * rcp; 181 v[interpolant].z = v[interpolant].z * rcp; 182 break; 183 } 184 } 185 186 if(state.fog.component) 187 { 188 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective, false); 189 } 190 191 setBuiltins(x, y, z, w); 192 193 #if PERF_PROFILE 194 cycles[PERF_INTERP] += Ticks() - interpTime; 195 #endif 196 197 Bool alphaPass = true; 198 199 if(colorUsed()) 200 { 201 #if PERF_PROFILE 202 Long shaderTime = Ticks(); 203 #endif 204 205 applyShader(cMask); 206 207 #if PERF_PROFILE 208 cycles[PERF_SHADER] += Ticks() - shaderTime; 209 #endif 210 211 alphaPass = alphaTest(cMask); 212 213 if((shader && shader->containsKill()) || state.alphaTestActive()) 214 { 215 for(unsigned int q = 0; q < state.multiSample; q++) 216 { 217 zMask[q] &= cMask[q]; 218 sMask[q] &= cMask[q]; 219 } 220 } 221 } 222 223 If(alphaPass) 224 { 225 if(!earlyDepthTest) 226 { 227 for(unsigned int q = 0; q < state.multiSample; q++) 228 { 229 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); 230 } 231 } 232 233 #if PERF_PROFILE 234 Long ropTime = Ticks(); 235 #endif 236 237 If(depthPass || Bool(earlyDepthTest)) 238 { 239 for(unsigned int q = 0; q < state.multiSample; q++) 240 { 241 if(state.multiSampleMask & (1 << q)) 242 { 243 writeDepth(zBuffer, q, x, z[q], zMask[q]); 244 245 if(state.occlusionEnabled) 246 { 247 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q])); 248 } 249 } 250 } 251 252 if(colorUsed()) 253 { 254 #if PERF_PROFILE 255 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4); 256 #endif 257 258 rasterOperation(f, cBuffer, x, sMask, zMask, cMask); 259 } 260 } 261 262 #if PERF_PROFILE 263 cycles[PERF_ROP] += Ticks() - ropTime; 264 #endif 265 } 266 } 267 268 for(unsigned int q = 0; q < state.multiSample; q++) 269 { 270 if(state.multiSampleMask & (1 << q)) 271 { 272 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]); 273 } 274 } 275 276 #if PERF_PROFILE 277 cycles[PERF_PIPE] += Ticks() - pipeTime; 278 #endif 279 } 280 interpolateCentroid(Float4 & x,Float4 & y,Float4 & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective)281 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective) 282 { 283 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16); 284 285 if(!flat) 286 { 287 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) + 288 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16); 289 290 if(perspective) 291 { 292 interpolant *= rhw; 293 } 294 } 295 296 return interpolant; 297 } 298 stencilTest(Pointer<Byte> & sBuffer,int q,Int & x,Int & sMask,Int & cMask)299 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask) 300 { 301 if(!state.stencilActive) 302 { 303 return; 304 } 305 306 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask) 307 308 Pointer<Byte> buffer = sBuffer + 2 * x; 309 310 if(q > 0) 311 { 312 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); 313 } 314 315 Byte8 value = *Pointer<Byte8>(buffer); 316 Byte8 valueCCW = value; 317 318 if(!state.noStencilMask) 319 { 320 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ)); 321 } 322 323 stencilTest(value, state.stencilCompareMode, false); 324 325 if(state.twoSidedStencil) 326 { 327 if(!state.noStencilMaskCCW) 328 { 329 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ)); 330 } 331 332 stencilTest(valueCCW, state.stencilCompareModeCCW, true); 333 334 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); 335 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); 336 value |= valueCCW; 337 } 338 339 sMask = SignMask(value) & cMask; 340 } 341 stencilTest(Byte8 & value,StencilCompareMode stencilCompareMode,bool CCW)342 void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW) 343 { 344 Byte8 equal; 345 346 switch(stencilCompareMode) 347 { 348 case STENCIL_ALWAYS: 349 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 350 break; 351 case STENCIL_NEVER: 352 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 353 break; 354 case STENCIL_LESS: // a < b ~ b > a 355 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 356 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 357 break; 358 case STENCIL_EQUAL: 359 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 360 break; 361 case STENCIL_NOTEQUAL: // a != b ~ !(a == b) 362 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 363 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 364 break; 365 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b) 366 equal = value; 367 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); 368 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 369 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 370 value |= equal; 371 break; 372 case STENCIL_GREATER: // a > b 373 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)); 374 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 375 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value)); 376 value = equal; 377 break; 378 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a) 379 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); 380 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); 381 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 382 break; 383 default: 384 ASSERT(false); 385 } 386 } 387 depthTest(Pointer<Byte> & zBuffer,int q,Int & x,Float4 & z,Int & sMask,Int & zMask,Int & cMask)388 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask) 389 { 390 if(!state.depthTestActive) 391 { 392 return true; 393 } 394 395 Float4 Z = z; 396 397 if(shader && shader->depthOverride()) 398 { 399 if(complementaryDepthBuffer) 400 { 401 Z = Float4(1.0f) - oDepth; 402 } 403 else 404 { 405 Z = oDepth; 406 } 407 } 408 409 Pointer<Byte> buffer; 410 Int pitch; 411 412 if(!state.quadLayoutDepthBuffer) 413 { 414 buffer = zBuffer + 4 * x; 415 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 416 } 417 else 418 { 419 buffer = zBuffer + 8 * x; 420 } 421 422 if(q > 0) 423 { 424 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); 425 } 426 427 Float4 zValue; 428 429 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) 430 { 431 if(!state.quadLayoutDepthBuffer) 432 { 433 // FIXME: Properly optimizes? 434 zValue.xy = *Pointer<Float4>(buffer); 435 zValue.zw = *Pointer<Float4>(buffer + pitch - 8); 436 } 437 else 438 { 439 zValue = *Pointer<Float4>(buffer, 16); 440 } 441 } 442 443 Int4 zTest; 444 445 switch(state.depthCompareMode) 446 { 447 case DEPTH_ALWAYS: 448 // Optimized 449 break; 450 case DEPTH_NEVER: 451 // Optimized 452 break; 453 case DEPTH_EQUAL: 454 zTest = CmpEQ(zValue, Z); 455 break; 456 case DEPTH_NOTEQUAL: 457 zTest = CmpNEQ(zValue, Z); 458 break; 459 case DEPTH_LESS: 460 if(complementaryDepthBuffer) 461 { 462 zTest = CmpLT(zValue, Z); 463 } 464 else 465 { 466 zTest = CmpNLE(zValue, Z); 467 } 468 break; 469 case DEPTH_GREATEREQUAL: 470 if(complementaryDepthBuffer) 471 { 472 zTest = CmpNLT(zValue, Z); 473 } 474 else 475 { 476 zTest = CmpLE(zValue, Z); 477 } 478 break; 479 case DEPTH_LESSEQUAL: 480 if(complementaryDepthBuffer) 481 { 482 zTest = CmpLE(zValue, Z); 483 } 484 else 485 { 486 zTest = CmpNLT(zValue, Z); 487 } 488 break; 489 case DEPTH_GREATER: 490 if(complementaryDepthBuffer) 491 { 492 zTest = CmpNLE(zValue, Z); 493 } 494 else 495 { 496 zTest = CmpLT(zValue, Z); 497 } 498 break; 499 default: 500 ASSERT(false); 501 } 502 503 switch(state.depthCompareMode) 504 { 505 case DEPTH_ALWAYS: 506 zMask = cMask; 507 break; 508 case DEPTH_NEVER: 509 zMask = 0x0; 510 break; 511 default: 512 zMask = SignMask(zTest) & cMask; 513 break; 514 } 515 516 if(state.stencilActive) 517 { 518 zMask &= sMask; 519 } 520 521 return zMask != 0; 522 } 523 alphaTest(Int & aMask,Short4 & alpha)524 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha) 525 { 526 Short4 cmp; 527 Short4 equal; 528 529 switch(state.alphaCompareMode) 530 { 531 case ALPHA_ALWAYS: 532 aMask = 0xF; 533 break; 534 case ALPHA_NEVER: 535 aMask = 0x0; 536 break; 537 case ALPHA_EQUAL: 538 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 539 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 540 break; 541 case ALPHA_NOTEQUAL: // a != b ~ !(a == b) 542 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME 543 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 544 break; 545 case ALPHA_LESS: // a < b ~ b > a 546 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha); 547 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 548 break; 549 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate 550 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 551 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 552 cmp |= equal; 553 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 554 break; 555 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b) 556 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME 557 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 558 break; 559 case ALPHA_GREATER: // a > b 560 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))); 561 aMask = SignMask(PackSigned(cmp, Short4(0x0000))); 562 break; 563 default: 564 ASSERT(false); 565 } 566 } 567 alphaToCoverage(Int cMask[4],Float4 & alpha)568 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha) 569 { 570 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0))); 571 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1))); 572 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2))); 573 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3))); 574 575 Int aMask0 = SignMask(coverage0); 576 Int aMask1 = SignMask(coverage1); 577 Int aMask2 = SignMask(coverage2); 578 Int aMask3 = SignMask(coverage3); 579 580 cMask[0] &= aMask0; 581 cMask[1] &= aMask1; 582 cMask[2] &= aMask2; 583 cMask[3] &= aMask3; 584 } 585 fogBlend(Vector4f & c0,Float4 & fog)586 void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog) 587 { 588 if(!state.fogActive) 589 { 590 return; 591 } 592 593 if(state.pixelFogMode != FOG_NONE) 594 { 595 pixelFog(fog); 596 597 fog = Min(fog, Float4(1.0f)); 598 fog = Max(fog, Float4(0.0f)); 599 } 600 601 c0.x -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0])); 602 c0.y -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1])); 603 c0.z -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2])); 604 605 c0.x *= fog; 606 c0.y *= fog; 607 c0.z *= fog; 608 609 c0.x += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0])); 610 c0.y += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1])); 611 c0.z += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2])); 612 } 613 pixelFog(Float4 & visibility)614 void PixelRoutine::pixelFog(Float4 &visibility) 615 { 616 Float4 &zw = visibility; 617 618 if(state.pixelFogMode != FOG_NONE) 619 { 620 if(state.wBasedFog) 621 { 622 zw = rhw; 623 } 624 else 625 { 626 if(complementaryDepthBuffer) 627 { 628 zw = Float4(1.0f) - z[0]; 629 } 630 else 631 { 632 zw = z[0]; 633 } 634 } 635 } 636 637 switch(state.pixelFogMode) 638 { 639 case FOG_NONE: 640 break; 641 case FOG_LINEAR: 642 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)); 643 zw += *Pointer<Float4>(data + OFFSET(DrawData,fog.offset)); 644 break; 645 case FOG_EXP: 646 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)); 647 zw = exponential2(zw, true); 648 break; 649 case FOG_EXP2: 650 zw *= zw; 651 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)); 652 zw = exponential2(zw, true); 653 break; 654 default: 655 ASSERT(false); 656 } 657 } 658 writeDepth(Pointer<Byte> & zBuffer,int q,Int & x,Float4 & z,Int & zMask)659 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask) 660 { 661 if(!state.depthWriteEnable) 662 { 663 return; 664 } 665 666 Float4 Z = z; 667 668 if(shader && shader->depthOverride()) 669 { 670 if(complementaryDepthBuffer) 671 { 672 Z = Float4(1.0f) - oDepth; 673 } 674 else 675 { 676 Z = oDepth; 677 } 678 } 679 680 Pointer<Byte> buffer; 681 Int pitch; 682 683 if(!state.quadLayoutDepthBuffer) 684 { 685 buffer = zBuffer + 4 * x; 686 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 687 } 688 else 689 { 690 buffer = zBuffer + 8 * x; 691 } 692 693 if(q > 0) 694 { 695 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB)); 696 } 697 698 Float4 zValue; 699 700 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) 701 { 702 if(!state.quadLayoutDepthBuffer) 703 { 704 // FIXME: Properly optimizes? 705 zValue.xy = *Pointer<Float4>(buffer); 706 zValue.zw = *Pointer<Float4>(buffer + pitch - 8); 707 } 708 else 709 { 710 zValue = *Pointer<Float4>(buffer, 16); 711 } 712 } 713 714 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16)); 715 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16)); 716 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue)); 717 718 if(!state.quadLayoutDepthBuffer) 719 { 720 // FIXME: Properly optimizes? 721 *Pointer<Float2>(buffer) = Float2(Z.xy); 722 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw); 723 } 724 else 725 { 726 *Pointer<Float4>(buffer, 16) = Z; 727 } 728 } 729 writeStencil(Pointer<Byte> & sBuffer,int q,Int & x,Int & sMask,Int & zMask,Int & cMask)730 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask) 731 { 732 if(!state.stencilActive) 733 { 734 return; 735 } 736 737 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP) 738 { 739 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP)) 740 { 741 return; 742 } 743 } 744 745 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW)) 746 { 747 return; 748 } 749 750 Pointer<Byte> buffer = sBuffer + 2 * x; 751 752 if(q > 0) 753 { 754 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB)); 755 } 756 757 Byte8 bufferValue = *Pointer<Byte8>(buffer); 758 759 Byte8 newValue; 760 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask); 761 762 if(!state.noStencilWriteMask) 763 { 764 Byte8 maskedValue = bufferValue; 765 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ)); 766 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ)); 767 newValue |= maskedValue; 768 } 769 770 if(state.twoSidedStencil) 771 { 772 Byte8 newValueCCW; 773 774 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask); 775 776 if(!state.noStencilWriteMaskCCW) 777 { 778 Byte8 maskedValue = bufferValue; 779 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ)); 780 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ)); 781 newValueCCW |= maskedValue; 782 } 783 784 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)); 785 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)); 786 newValue |= newValueCCW; 787 } 788 789 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask); 790 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask); 791 newValue |= bufferValue; 792 793 *Pointer<Byte4>(buffer) = Byte4(newValue); 794 } 795 stencilOperation(Byte8 & newValue,Byte8 & bufferValue,StencilOperation stencilPassOperation,StencilOperation stencilZFailOperation,StencilOperation stencilFailOperation,bool CCW,Int & zMask,Int & sMask)796 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask) 797 { 798 Byte8 &pass = newValue; 799 Byte8 fail; 800 Byte8 zFail; 801 802 stencilOperation(pass, bufferValue, stencilPassOperation, CCW); 803 804 if(stencilZFailOperation != stencilPassOperation) 805 { 806 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW); 807 } 808 809 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) 810 { 811 stencilOperation(fail, bufferValue, stencilFailOperation, CCW); 812 } 813 814 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) 815 { 816 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same 817 { 818 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask); 819 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask); 820 pass |= zFail; 821 } 822 823 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask); 824 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask); 825 pass |= fail; 826 } 827 } 828 stencilOperation(Byte8 & output,Byte8 & bufferValue,StencilOperation operation,bool CCW)829 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW) 830 { 831 switch(operation) 832 { 833 case OPERATION_KEEP: 834 output = bufferValue; 835 break; 836 case OPERATION_ZERO: 837 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); 838 break; 839 case OPERATION_REPLACE: 840 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ)); 841 break; 842 case OPERATION_INCRSAT: 843 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); 844 break; 845 case OPERATION_DECRSAT: 846 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); 847 break; 848 case OPERATION_INVERT: 849 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); 850 break; 851 case OPERATION_INCR: 852 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1); 853 break; 854 case OPERATION_DECR: 855 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1); 856 break; 857 default: 858 ASSERT(false); 859 } 860 } 861 blendFactor(Vector4s & blendFactor,const Vector4s & current,const Vector4s & pixel,BlendFactor blendFactorActive)862 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorActive) 863 { 864 switch(blendFactorActive) 865 { 866 case BLEND_ZERO: 867 // Optimized 868 break; 869 case BLEND_ONE: 870 // Optimized 871 break; 872 case BLEND_SOURCE: 873 blendFactor.x = current.x; 874 blendFactor.y = current.y; 875 blendFactor.z = current.z; 876 break; 877 case BLEND_INVSOURCE: 878 blendFactor.x = Short4(0xFFFFu) - current.x; 879 blendFactor.y = Short4(0xFFFFu) - current.y; 880 blendFactor.z = Short4(0xFFFFu) - current.z; 881 break; 882 case BLEND_DEST: 883 blendFactor.x = pixel.x; 884 blendFactor.y = pixel.y; 885 blendFactor.z = pixel.z; 886 break; 887 case BLEND_INVDEST: 888 blendFactor.x = Short4(0xFFFFu) - pixel.x; 889 blendFactor.y = Short4(0xFFFFu) - pixel.y; 890 blendFactor.z = Short4(0xFFFFu) - pixel.z; 891 break; 892 case BLEND_SOURCEALPHA: 893 blendFactor.x = current.w; 894 blendFactor.y = current.w; 895 blendFactor.z = current.w; 896 break; 897 case BLEND_INVSOURCEALPHA: 898 blendFactor.x = Short4(0xFFFFu) - current.w; 899 blendFactor.y = Short4(0xFFFFu) - current.w; 900 blendFactor.z = Short4(0xFFFFu) - current.w; 901 break; 902 case BLEND_DESTALPHA: 903 blendFactor.x = pixel.w; 904 blendFactor.y = pixel.w; 905 blendFactor.z = pixel.w; 906 break; 907 case BLEND_INVDESTALPHA: 908 blendFactor.x = Short4(0xFFFFu) - pixel.w; 909 blendFactor.y = Short4(0xFFFFu) - pixel.w; 910 blendFactor.z = Short4(0xFFFFu) - pixel.w; 911 break; 912 case BLEND_SRCALPHASAT: 913 blendFactor.x = Short4(0xFFFFu) - pixel.w; 914 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w)); 915 blendFactor.y = blendFactor.x; 916 blendFactor.z = blendFactor.x; 917 break; 918 case BLEND_CONSTANT: 919 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0])); 920 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1])); 921 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2])); 922 break; 923 case BLEND_INVCONSTANT: 924 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0])); 925 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1])); 926 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2])); 927 break; 928 case BLEND_CONSTANTALPHA: 929 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 930 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 931 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 932 break; 933 case BLEND_INVCONSTANTALPHA: 934 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 935 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 936 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 937 break; 938 default: 939 ASSERT(false); 940 } 941 } 942 blendFactorAlpha(Vector4s & blendFactor,const Vector4s & current,const Vector4s & pixel,BlendFactor blendFactorAlphaActive)943 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorAlphaActive) 944 { 945 switch(blendFactorAlphaActive) 946 { 947 case BLEND_ZERO: 948 // Optimized 949 break; 950 case BLEND_ONE: 951 // Optimized 952 break; 953 case BLEND_SOURCE: 954 blendFactor.w = current.w; 955 break; 956 case BLEND_INVSOURCE: 957 blendFactor.w = Short4(0xFFFFu) - current.w; 958 break; 959 case BLEND_DEST: 960 blendFactor.w = pixel.w; 961 break; 962 case BLEND_INVDEST: 963 blendFactor.w = Short4(0xFFFFu) - pixel.w; 964 break; 965 case BLEND_SOURCEALPHA: 966 blendFactor.w = current.w; 967 break; 968 case BLEND_INVSOURCEALPHA: 969 blendFactor.w = Short4(0xFFFFu) - current.w; 970 break; 971 case BLEND_DESTALPHA: 972 blendFactor.w = pixel.w; 973 break; 974 case BLEND_INVDESTALPHA: 975 blendFactor.w = Short4(0xFFFFu) - pixel.w; 976 break; 977 case BLEND_SRCALPHASAT: 978 blendFactor.w = Short4(0xFFFFu); 979 break; 980 case BLEND_CONSTANT: 981 case BLEND_CONSTANTALPHA: 982 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3])); 983 break; 984 case BLEND_INVCONSTANT: 985 case BLEND_INVCONSTANTALPHA: 986 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); 987 break; 988 default: 989 ASSERT(false); 990 } 991 } 992 isSRGB(int index) const993 bool PixelRoutine::isSRGB(int index) const 994 { 995 return Surface::isSRGBformat(state.targetFormat[index]); 996 } 997 readPixel(int index,Pointer<Byte> & cBuffer,Int & x,Vector4s & pixel)998 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel) 999 { 1000 Short4 c01; 1001 Short4 c23; 1002 Pointer<Byte> buffer; 1003 Pointer<Byte> buffer2; 1004 1005 switch(state.targetFormat[index]) 1006 { 1007 case FORMAT_R5G6B5: 1008 buffer = cBuffer + 2 * x; 1009 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1010 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2))); 1011 1012 pixel.x = c01 & Short4(0xF800u); 1013 pixel.y = (c01 & Short4(0x07E0u)) << 5; 1014 pixel.z = (c01 & Short4(0x001Fu)) << 11; 1015 pixel.w = Short4(0xFFFFu); 1016 break; 1017 case FORMAT_A8R8G8B8: 1018 buffer = cBuffer + 4 * x; 1019 c01 = *Pointer<Short4>(buffer); 1020 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1021 c23 = *Pointer<Short4>(buffer); 1022 pixel.z = c01; 1023 pixel.y = c01; 1024 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1025 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1026 pixel.x = pixel.z; 1027 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1028 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1029 pixel.y = pixel.z; 1030 pixel.w = pixel.x; 1031 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1032 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1033 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1034 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1035 break; 1036 case FORMAT_A8B8G8R8: 1037 case FORMAT_SRGB8_A8: 1038 buffer = cBuffer + 4 * x; 1039 c01 = *Pointer<Short4>(buffer); 1040 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1041 c23 = *Pointer<Short4>(buffer); 1042 pixel.z = c01; 1043 pixel.y = c01; 1044 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1045 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1046 pixel.x = pixel.z; 1047 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1048 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1049 pixel.y = pixel.z; 1050 pixel.w = pixel.x; 1051 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1052 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1053 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1054 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1055 break; 1056 case FORMAT_A8: 1057 buffer = cBuffer + 1 * x; 1058 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0); 1059 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1060 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1); 1061 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1062 pixel.x = Short4(0x0000); 1063 pixel.y = Short4(0x0000); 1064 pixel.z = Short4(0x0000); 1065 break; 1066 case FORMAT_R8: 1067 buffer = cBuffer + 1 * x; 1068 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0); 1069 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1070 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1); 1071 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1072 pixel.y = Short4(0x0000); 1073 pixel.z = Short4(0x0000); 1074 pixel.w = Short4(0xFFFFu); 1075 break; 1076 case FORMAT_X8R8G8B8: 1077 buffer = cBuffer + 4 * x; 1078 c01 = *Pointer<Short4>(buffer); 1079 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1080 c23 = *Pointer<Short4>(buffer); 1081 pixel.z = c01; 1082 pixel.y = c01; 1083 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1084 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1085 pixel.x = pixel.z; 1086 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1087 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1088 pixel.y = pixel.z; 1089 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x)); 1090 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1091 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1092 pixel.w = Short4(0xFFFFu); 1093 break; 1094 case FORMAT_G8R8: 1095 buffer = cBuffer + 2 * x; 1096 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0)); 1097 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1098 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1)); 1099 pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8); 1100 pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8); 1101 pixel.z = Short4(0x0000u); 1102 pixel.w = Short4(0xFFFFu); 1103 break; 1104 case FORMAT_X8B8G8R8: 1105 case FORMAT_SRGB8_X8: 1106 buffer = cBuffer + 4 * x; 1107 c01 = *Pointer<Short4>(buffer); 1108 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1109 c23 = *Pointer<Short4>(buffer); 1110 pixel.z = c01; 1111 pixel.y = c01; 1112 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23)); 1113 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23)); 1114 pixel.x = pixel.z; 1115 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y)); 1116 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y)); 1117 pixel.y = pixel.z; 1118 pixel.w = pixel.x; 1119 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z)); 1120 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y)); 1121 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w)); 1122 pixel.w = Short4(0xFFFFu); 1123 break; 1124 case FORMAT_A8G8R8B8Q: 1125 UNIMPLEMENTED(); 1126 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1127 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1128 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1129 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1130 break; 1131 case FORMAT_X8G8R8B8Q: 1132 UNIMPLEMENTED(); 1133 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1134 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0)); 1135 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8)); 1136 // pixel.w = Short4(0xFFFFu); 1137 break; 1138 case FORMAT_A16B16G16R16: 1139 buffer = cBuffer; 1140 pixel.x = *Pointer<Short4>(buffer + 8 * x); 1141 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8); 1142 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1143 pixel.z = *Pointer<Short4>(buffer + 8 * x); 1144 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8); 1145 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); 1146 break; 1147 case FORMAT_G16R16: 1148 buffer = cBuffer; 1149 pixel.x = *Pointer<Short4>(buffer + 4 * x); 1150 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1151 pixel.y = *Pointer<Short4>(buffer + 4 * x); 1152 pixel.z = pixel.x; 1153 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y)); 1154 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y)); 1155 pixel.y = pixel.z; 1156 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z)); 1157 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z)); 1158 pixel.z = Short4(0xFFFFu); 1159 pixel.w = Short4(0xFFFFu); 1160 break; 1161 default: 1162 ASSERT(false); 1163 } 1164 1165 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 1166 { 1167 sRGBtoLinear16_12_16(pixel); 1168 } 1169 } 1170 alphaBlend(int index,Pointer<Byte> & cBuffer,Vector4s & current,Int & x)1171 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x) 1172 { 1173 if(!state.alphaBlendActive) 1174 { 1175 return; 1176 } 1177 1178 Vector4s pixel; 1179 readPixel(index, cBuffer, x, pixel); 1180 1181 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor 1182 Vector4s sourceFactor; 1183 Vector4s destFactor; 1184 1185 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor); 1186 blendFactor(destFactor, current, pixel, state.destBlendFactor); 1187 1188 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) 1189 { 1190 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x)); 1191 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y)); 1192 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z)); 1193 } 1194 1195 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) 1196 { 1197 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x)); 1198 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y)); 1199 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z)); 1200 } 1201 1202 switch(state.blendOperation) 1203 { 1204 case BLENDOP_ADD: 1205 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1206 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1207 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1208 break; 1209 case BLENDOP_SUB: 1210 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1211 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1212 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1213 break; 1214 case BLENDOP_INVSUB: 1215 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x)); 1216 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y)); 1217 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z)); 1218 break; 1219 case BLENDOP_MIN: 1220 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1221 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1222 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1223 break; 1224 case BLENDOP_MAX: 1225 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x)); 1226 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y)); 1227 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z)); 1228 break; 1229 case BLENDOP_SOURCE: 1230 // No operation 1231 break; 1232 case BLENDOP_DEST: 1233 current.x = pixel.x; 1234 current.y = pixel.y; 1235 current.z = pixel.z; 1236 break; 1237 case BLENDOP_NULL: 1238 current.x = Short4(0x0000); 1239 current.y = Short4(0x0000); 1240 current.z = Short4(0x0000); 1241 break; 1242 default: 1243 ASSERT(false); 1244 } 1245 1246 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha); 1247 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha); 1248 1249 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) 1250 { 1251 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w)); 1252 } 1253 1254 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) 1255 { 1256 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w)); 1257 } 1258 1259 switch(state.blendOperationAlpha) 1260 { 1261 case BLENDOP_ADD: 1262 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1263 break; 1264 case BLENDOP_SUB: 1265 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1266 break; 1267 case BLENDOP_INVSUB: 1268 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w)); 1269 break; 1270 case BLENDOP_MIN: 1271 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1272 break; 1273 case BLENDOP_MAX: 1274 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w)); 1275 break; 1276 case BLENDOP_SOURCE: 1277 // No operation 1278 break; 1279 case BLENDOP_DEST: 1280 current.w = pixel.w; 1281 break; 1282 case BLENDOP_NULL: 1283 current.w = Short4(0x0000); 1284 break; 1285 default: 1286 ASSERT(false); 1287 } 1288 } 1289 logicOperation(int index,Pointer<Byte> & cBuffer,Vector4s & current,Int & x)1290 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x) 1291 { 1292 if(state.logicalOperation == LOGICALOP_COPY) 1293 { 1294 return; 1295 } 1296 1297 Vector4s pixel; 1298 readPixel(index, cBuffer, x, pixel); 1299 1300 switch(state.logicalOperation) 1301 { 1302 case LOGICALOP_CLEAR: 1303 current.x = UShort4(0); 1304 current.y = UShort4(0); 1305 current.z = UShort4(0); 1306 break; 1307 case LOGICALOP_SET: 1308 current.x = UShort4(0xFFFFu); 1309 current.y = UShort4(0xFFFFu); 1310 current.z = UShort4(0xFFFFu); 1311 break; 1312 case LOGICALOP_COPY: 1313 ASSERT(false); // Optimized out 1314 break; 1315 case LOGICALOP_COPY_INVERTED: 1316 current.x = ~current.x; 1317 current.y = ~current.y; 1318 current.z = ~current.z; 1319 break; 1320 case LOGICALOP_NOOP: 1321 current.x = pixel.x; 1322 current.y = pixel.y; 1323 current.z = pixel.z; 1324 break; 1325 case LOGICALOP_INVERT: 1326 current.x = ~pixel.x; 1327 current.y = ~pixel.y; 1328 current.z = ~pixel.z; 1329 break; 1330 case LOGICALOP_AND: 1331 current.x = pixel.x & current.x; 1332 current.y = pixel.y & current.y; 1333 current.z = pixel.z & current.z; 1334 break; 1335 case LOGICALOP_NAND: 1336 current.x = ~(pixel.x & current.x); 1337 current.y = ~(pixel.y & current.y); 1338 current.z = ~(pixel.z & current.z); 1339 break; 1340 case LOGICALOP_OR: 1341 current.x = pixel.x | current.x; 1342 current.y = pixel.y | current.y; 1343 current.z = pixel.z | current.z; 1344 break; 1345 case LOGICALOP_NOR: 1346 current.x = ~(pixel.x | current.x); 1347 current.y = ~(pixel.y | current.y); 1348 current.z = ~(pixel.z | current.z); 1349 break; 1350 case LOGICALOP_XOR: 1351 current.x = pixel.x ^ current.x; 1352 current.y = pixel.y ^ current.y; 1353 current.z = pixel.z ^ current.z; 1354 break; 1355 case LOGICALOP_EQUIV: 1356 current.x = ~(pixel.x ^ current.x); 1357 current.y = ~(pixel.y ^ current.y); 1358 current.z = ~(pixel.z ^ current.z); 1359 break; 1360 case LOGICALOP_AND_REVERSE: 1361 current.x = ~pixel.x & current.x; 1362 current.y = ~pixel.y & current.y; 1363 current.z = ~pixel.z & current.z; 1364 break; 1365 case LOGICALOP_AND_INVERTED: 1366 current.x = pixel.x & ~current.x; 1367 current.y = pixel.y & ~current.y; 1368 current.z = pixel.z & ~current.z; 1369 break; 1370 case LOGICALOP_OR_REVERSE: 1371 current.x = ~pixel.x | current.x; 1372 current.y = ~pixel.y | current.y; 1373 current.z = ~pixel.z | current.z; 1374 break; 1375 case LOGICALOP_OR_INVERTED: 1376 current.x = pixel.x | ~current.x; 1377 current.y = pixel.y | ~current.y; 1378 current.z = pixel.z | ~current.z; 1379 break; 1380 default: 1381 ASSERT(false); 1382 } 1383 } 1384 writeColor(int index,Pointer<Byte> & cBuffer,Int & x,Vector4s & current,Int & sMask,Int & zMask,Int & cMask)1385 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s ¤t, Int &sMask, Int &zMask, Int &cMask) 1386 { 1387 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 1388 { 1389 linearToSRGB16_12_16(current); 1390 } 1391 1392 if(exactColorRounding) 1393 { 1394 switch(state.targetFormat[index]) 1395 { 1396 case FORMAT_R5G6B5: 1397 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400)); 1398 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200)); 1399 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400)); 1400 break; 1401 case FORMAT_X8G8R8B8Q: 1402 case FORMAT_A8G8R8B8Q: 1403 case FORMAT_X8R8G8B8: 1404 case FORMAT_X8B8G8R8: 1405 case FORMAT_A8R8G8B8: 1406 case FORMAT_A8B8G8R8: 1407 case FORMAT_SRGB8_X8: 1408 case FORMAT_SRGB8_A8: 1409 case FORMAT_G8R8: 1410 case FORMAT_R8: 1411 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080); 1412 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080); 1413 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080); 1414 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080); 1415 break; 1416 default: 1417 break; 1418 } 1419 } 1420 1421 int rgbaWriteMask = state.colorWriteActive(index); 1422 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2; 1423 1424 switch(state.targetFormat[index]) 1425 { 1426 case FORMAT_R5G6B5: 1427 { 1428 current.x = current.x & Short4(0xF800u); 1429 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5; 1430 current.z = As<UShort4>(current.z) >> 11; 1431 1432 current.x = current.x | current.y | current.z; 1433 } 1434 break; 1435 case FORMAT_X8G8R8B8Q: 1436 UNIMPLEMENTED(); 1437 // current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1438 // current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1439 // current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1440 1441 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1442 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y))); 1443 break; 1444 case FORMAT_A8G8R8B8Q: 1445 UNIMPLEMENTED(); 1446 // current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1447 // current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1448 // current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1449 // current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1450 1451 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x))); 1452 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w))); 1453 break; 1454 case FORMAT_X8R8G8B8: 1455 case FORMAT_A8R8G8B8: 1456 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7) 1457 { 1458 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1459 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1460 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1461 1462 current.z = As<Short4>(PackUnsigned(current.z, current.x)); 1463 current.y = As<Short4>(PackUnsigned(current.y, current.y)); 1464 1465 current.x = current.z; 1466 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1467 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1468 current.y = current.z; 1469 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1470 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1471 } 1472 else 1473 { 1474 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1475 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1476 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1477 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1478 1479 current.z = As<Short4>(PackUnsigned(current.z, current.x)); 1480 current.y = As<Short4>(PackUnsigned(current.y, current.w)); 1481 1482 current.x = current.z; 1483 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1484 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1485 current.y = current.z; 1486 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1487 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1488 } 1489 break; 1490 case FORMAT_X8B8G8R8: 1491 case FORMAT_A8B8G8R8: 1492 case FORMAT_SRGB8_X8: 1493 case FORMAT_SRGB8_A8: 1494 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7) 1495 { 1496 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1497 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1498 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1499 1500 current.z = As<Short4>(PackUnsigned(current.x, current.z)); 1501 current.y = As<Short4>(PackUnsigned(current.y, current.y)); 1502 1503 current.x = current.z; 1504 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1505 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1506 current.y = current.z; 1507 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1508 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1509 } 1510 else 1511 { 1512 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1513 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1514 current.z = As<Short4>(As<UShort4>(current.z) >> 8); 1515 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1516 1517 current.z = As<Short4>(PackUnsigned(current.x, current.z)); 1518 current.y = As<Short4>(PackUnsigned(current.y, current.w)); 1519 1520 current.x = current.z; 1521 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y)); 1522 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y)); 1523 current.y = current.z; 1524 current.z = As<Short4>(UnpackLow(current.z, current.x)); 1525 current.y = As<Short4>(UnpackHigh(current.y, current.x)); 1526 } 1527 break; 1528 case FORMAT_G8R8: 1529 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1530 current.y = As<Short4>(As<UShort4>(current.y) >> 8); 1531 current.x = As<Short4>(PackUnsigned(current.x, current.x)); 1532 current.y = As<Short4>(PackUnsigned(current.y, current.y)); 1533 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y)); 1534 break; 1535 case FORMAT_R8: 1536 current.x = As<Short4>(As<UShort4>(current.x) >> 8); 1537 current.x = As<Short4>(PackUnsigned(current.x, current.x)); 1538 break; 1539 case FORMAT_A8: 1540 current.w = As<Short4>(As<UShort4>(current.w) >> 8); 1541 current.w = As<Short4>(PackUnsigned(current.w, current.w)); 1542 break; 1543 case FORMAT_G16R16: 1544 current.z = current.x; 1545 current.x = As<Short4>(UnpackLow(current.x, current.y)); 1546 current.z = As<Short4>(UnpackHigh(current.z, current.y)); 1547 current.y = current.z; 1548 break; 1549 case FORMAT_A16B16G16R16: 1550 transpose4x4(current.x, current.y, current.z, current.w); 1551 break; 1552 default: 1553 ASSERT(false); 1554 } 1555 1556 Short4 c01 = current.z; 1557 Short4 c23 = current.y; 1558 1559 Int xMask; // Combination of all masks 1560 1561 if(state.depthTestActive) 1562 { 1563 xMask = zMask; 1564 } 1565 else 1566 { 1567 xMask = cMask; 1568 } 1569 1570 if(state.stencilActive) 1571 { 1572 xMask &= sMask; 1573 } 1574 1575 switch(state.targetFormat[index]) 1576 { 1577 case FORMAT_R5G6B5: 1578 { 1579 Pointer<Byte> buffer = cBuffer + 2 * x; 1580 Int value = *Pointer<Int>(buffer); 1581 1582 Int c01 = Extract(As<Int2>(current.x), 0); 1583 1584 if((bgraWriteMask & 0x00000007) != 0x00000007) 1585 { 1586 Int masked = value; 1587 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); 1588 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); 1589 c01 |= masked; 1590 } 1591 1592 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8); 1593 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8); 1594 c01 |= value; 1595 *Pointer<Int>(buffer) = c01; 1596 1597 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1598 value = *Pointer<Int>(buffer); 1599 1600 Int c23 = Extract(As<Int2>(current.x), 1); 1601 1602 if((bgraWriteMask & 0x00000007) != 0x00000007) 1603 { 1604 Int masked = value; 1605 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); 1606 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); 1607 c23 |= masked; 1608 } 1609 1610 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8); 1611 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8); 1612 c23 |= value; 1613 *Pointer<Int>(buffer) = c23; 1614 } 1615 break; 1616 case FORMAT_A8G8R8B8Q: 1617 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha? 1618 UNIMPLEMENTED(); 1619 // value = *Pointer<Short4>(cBuffer + 8 * x + 0); 1620 1621 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) || 1622 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) && 1623 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1624 // { 1625 // Short4 masked = value; 1626 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1627 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1628 // c01 |= masked; 1629 // } 1630 1631 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1632 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1633 // c01 |= value; 1634 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01; 1635 1636 // value = *Pointer<Short4>(cBuffer + 8 * x + 8); 1637 1638 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) || 1639 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) && 1640 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1641 // { 1642 // Short4 masked = value; 1643 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1644 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1645 // c23 |= masked; 1646 // } 1647 1648 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1649 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1650 // c23 |= value; 1651 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23; 1652 break; 1653 case FORMAT_A8R8G8B8: 1654 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha? 1655 { 1656 Pointer<Byte> buffer = cBuffer + x * 4; 1657 Short4 value = *Pointer<Short4>(buffer); 1658 1659 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) || 1660 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) && 1661 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1662 { 1663 Short4 masked = value; 1664 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1665 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1666 c01 |= masked; 1667 } 1668 1669 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1670 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1671 c01 |= value; 1672 *Pointer<Short4>(buffer) = c01; 1673 1674 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1675 value = *Pointer<Short4>(buffer); 1676 1677 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) || 1678 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) && 1679 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? 1680 { 1681 Short4 masked = value; 1682 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); 1683 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); 1684 c23 |= masked; 1685 } 1686 1687 #ifdef __APPLE__ 1688 // On Mac we render directly to an IOSurface that isn't vertically padded. So we 1689 // only render the bottom half of quads when it won't overflow the buffer. 1690 If ((y + 1) < yMax) 1691 #endif 1692 { 1693 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1694 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1695 c23 |= value; 1696 *Pointer<Short4>(buffer) = c23; 1697 } 1698 } 1699 break; 1700 case FORMAT_A8B8G8R8: 1701 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha? 1702 case FORMAT_SRGB8_X8: 1703 case FORMAT_SRGB8_A8: 1704 { 1705 Pointer<Byte> buffer = cBuffer + x * 4; 1706 Short4 value = *Pointer<Short4>(buffer); 1707 1708 bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) || 1709 (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) && 1710 ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh? 1711 1712 if(masked) 1713 { 1714 Short4 masked = value; 1715 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); 1716 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); 1717 c01 |= masked; 1718 } 1719 1720 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1721 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1722 c01 |= value; 1723 *Pointer<Short4>(buffer) = c01; 1724 1725 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1726 value = *Pointer<Short4>(buffer); 1727 1728 if(masked) 1729 { 1730 Short4 masked = value; 1731 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); 1732 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); 1733 c23 |= masked; 1734 } 1735 1736 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1737 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1738 c23 |= value; 1739 *Pointer<Short4>(buffer) = c23; 1740 } 1741 break; 1742 case FORMAT_G8R8: 1743 if((rgbaWriteMask & 0x00000003) != 0x0) 1744 { 1745 Pointer<Byte> buffer = cBuffer + 2 * x; 1746 Int2 value; 1747 value = Insert(value, *Pointer<Int>(buffer), 0); 1748 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1749 value = Insert(value, *Pointer<Int>(buffer + pitch), 1); 1750 1751 Int2 packedCol = As<Int2>(current.x); 1752 1753 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); 1754 if((rgbaWriteMask & 0x3) != 0x3) 1755 { 1756 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); 1757 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 1758 mergedMask &= rgbaMask; 1759 } 1760 1761 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask)); 1762 1763 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); 1764 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1)); 1765 } 1766 break; 1767 case FORMAT_R8: 1768 if(rgbaWriteMask & 0x00000001) 1769 { 1770 Pointer<Byte> buffer = cBuffer + 1 * x; 1771 Short4 value; 1772 value = Insert(value, *Pointer<Short>(buffer), 0); 1773 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 1774 value = Insert(value, *Pointer<Short>(buffer + pitch), 1); 1775 1776 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask); 1777 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask); 1778 current.x |= value; 1779 1780 *Pointer<Short>(buffer) = Extract(current.x, 0); 1781 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1); 1782 } 1783 break; 1784 case FORMAT_A8: 1785 if(rgbaWriteMask & 0x00000008) 1786 { 1787 Pointer<Byte> buffer = cBuffer + 1 * x; 1788 Short4 value; 1789 value = Insert(value, *Pointer<Short>(buffer), 0); 1790 Int pitch = *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1791 value = Insert(value, *Pointer<Short>(buffer + pitch), 1); 1792 1793 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q) + 8 * xMask); 1794 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask); 1795 current.w |= value; 1796 1797 *Pointer<Short>(buffer) = Extract(current.w, 0); 1798 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1); 1799 } 1800 break; 1801 case FORMAT_G16R16: 1802 { 1803 Pointer<Byte> buffer = cBuffer + 4 * x; 1804 1805 Short4 value = *Pointer<Short4>(buffer); 1806 1807 if((rgbaWriteMask & 0x00000003) != 0x00000003) 1808 { 1809 Short4 masked = value; 1810 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); 1811 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); 1812 current.x |= masked; 1813 } 1814 1815 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8); 1816 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); 1817 current.x |= value; 1818 *Pointer<Short4>(buffer) = current.x; 1819 1820 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1821 1822 value = *Pointer<Short4>(buffer); 1823 1824 if((rgbaWriteMask & 0x00000003) != 0x00000003) 1825 { 1826 Short4 masked = value; 1827 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); 1828 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); 1829 current.y |= masked; 1830 } 1831 1832 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8); 1833 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); 1834 current.y |= value; 1835 *Pointer<Short4>(buffer) = current.y; 1836 } 1837 break; 1838 case FORMAT_A16B16G16R16: 1839 { 1840 Pointer<Byte> buffer = cBuffer + 8 * x; 1841 1842 { 1843 Short4 value = *Pointer<Short4>(buffer); 1844 1845 if(rgbaWriteMask != 0x0000000F) 1846 { 1847 Short4 masked = value; 1848 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1849 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1850 current.x |= masked; 1851 } 1852 1853 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8); 1854 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8); 1855 current.x |= value; 1856 *Pointer<Short4>(buffer) = current.x; 1857 } 1858 1859 { 1860 Short4 value = *Pointer<Short4>(buffer + 8); 1861 1862 if(rgbaWriteMask != 0x0000000F) 1863 { 1864 Short4 masked = value; 1865 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1866 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1867 current.y |= masked; 1868 } 1869 1870 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8); 1871 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8); 1872 current.y |= value; 1873 *Pointer<Short4>(buffer + 8) = current.y; 1874 } 1875 1876 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 1877 1878 { 1879 Short4 value = *Pointer<Short4>(buffer); 1880 1881 if(rgbaWriteMask != 0x0000000F) 1882 { 1883 Short4 masked = value; 1884 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1885 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1886 current.z |= masked; 1887 } 1888 1889 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8); 1890 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8); 1891 current.z |= value; 1892 *Pointer<Short4>(buffer) = current.z; 1893 } 1894 1895 { 1896 Short4 value = *Pointer<Short4>(buffer + 8); 1897 1898 if(rgbaWriteMask != 0x0000000F) 1899 { 1900 Short4 masked = value; 1901 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); 1902 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); 1903 current.w |= masked; 1904 } 1905 1906 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8); 1907 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8); 1908 current.w |= value; 1909 *Pointer<Short4>(buffer + 8) = current.w; 1910 } 1911 } 1912 break; 1913 default: 1914 ASSERT(false); 1915 } 1916 } 1917 blendFactor(Vector4f & blendFactor,const Vector4f & oC,const Vector4f & pixel,BlendFactor blendFactorActive)1918 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive) 1919 { 1920 switch(blendFactorActive) 1921 { 1922 case BLEND_ZERO: 1923 // Optimized 1924 break; 1925 case BLEND_ONE: 1926 // Optimized 1927 break; 1928 case BLEND_SOURCE: 1929 blendFactor.x = oC.x; 1930 blendFactor.y = oC.y; 1931 blendFactor.z = oC.z; 1932 break; 1933 case BLEND_INVSOURCE: 1934 blendFactor.x = Float4(1.0f) - oC.x; 1935 blendFactor.y = Float4(1.0f) - oC.y; 1936 blendFactor.z = Float4(1.0f) - oC.z; 1937 break; 1938 case BLEND_DEST: 1939 blendFactor.x = pixel.x; 1940 blendFactor.y = pixel.y; 1941 blendFactor.z = pixel.z; 1942 break; 1943 case BLEND_INVDEST: 1944 blendFactor.x = Float4(1.0f) - pixel.x; 1945 blendFactor.y = Float4(1.0f) - pixel.y; 1946 blendFactor.z = Float4(1.0f) - pixel.z; 1947 break; 1948 case BLEND_SOURCEALPHA: 1949 blendFactor.x = oC.w; 1950 blendFactor.y = oC.w; 1951 blendFactor.z = oC.w; 1952 break; 1953 case BLEND_INVSOURCEALPHA: 1954 blendFactor.x = Float4(1.0f) - oC.w; 1955 blendFactor.y = Float4(1.0f) - oC.w; 1956 blendFactor.z = Float4(1.0f) - oC.w; 1957 break; 1958 case BLEND_DESTALPHA: 1959 blendFactor.x = pixel.w; 1960 blendFactor.y = pixel.w; 1961 blendFactor.z = pixel.w; 1962 break; 1963 case BLEND_INVDESTALPHA: 1964 blendFactor.x = Float4(1.0f) - pixel.w; 1965 blendFactor.y = Float4(1.0f) - pixel.w; 1966 blendFactor.z = Float4(1.0f) - pixel.w; 1967 break; 1968 case BLEND_SRCALPHASAT: 1969 blendFactor.x = Float4(1.0f) - pixel.w; 1970 blendFactor.x = Min(blendFactor.x, oC.w); 1971 blendFactor.y = blendFactor.x; 1972 blendFactor.z = blendFactor.x; 1973 break; 1974 case BLEND_CONSTANT: 1975 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0])); 1976 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1])); 1977 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2])); 1978 break; 1979 case BLEND_INVCONSTANT: 1980 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0])); 1981 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1])); 1982 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2])); 1983 break; 1984 default: 1985 ASSERT(false); 1986 } 1987 } 1988 blendFactorAlpha(Vector4f & blendFactor,const Vector4f & oC,const Vector4f & pixel,BlendFactor blendFactorAlphaActive)1989 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive) 1990 { 1991 switch(blendFactorAlphaActive) 1992 { 1993 case BLEND_ZERO: 1994 // Optimized 1995 break; 1996 case BLEND_ONE: 1997 // Optimized 1998 break; 1999 case BLEND_SOURCE: 2000 blendFactor.w = oC.w; 2001 break; 2002 case BLEND_INVSOURCE: 2003 blendFactor.w = Float4(1.0f) - oC.w; 2004 break; 2005 case BLEND_DEST: 2006 blendFactor.w = pixel.w; 2007 break; 2008 case BLEND_INVDEST: 2009 blendFactor.w = Float4(1.0f) - pixel.w; 2010 break; 2011 case BLEND_SOURCEALPHA: 2012 blendFactor.w = oC.w; 2013 break; 2014 case BLEND_INVSOURCEALPHA: 2015 blendFactor.w = Float4(1.0f) - oC.w; 2016 break; 2017 case BLEND_DESTALPHA: 2018 blendFactor.w = pixel.w; 2019 break; 2020 case BLEND_INVDESTALPHA: 2021 blendFactor.w = Float4(1.0f) - pixel.w; 2022 break; 2023 case BLEND_SRCALPHASAT: 2024 blendFactor.w = Float4(1.0f); 2025 break; 2026 case BLEND_CONSTANT: 2027 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3])); 2028 break; 2029 case BLEND_INVCONSTANT: 2030 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3])); 2031 break; 2032 default: 2033 ASSERT(false); 2034 } 2035 } 2036 alphaBlend(int index,Pointer<Byte> & cBuffer,Vector4f & oC,Int & x)2037 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x) 2038 { 2039 if(!state.alphaBlendActive) 2040 { 2041 return; 2042 } 2043 2044 Pointer<Byte> buffer; 2045 Vector4f pixel; 2046 2047 Vector4s color; 2048 Short4 c01; 2049 Short4 c23; 2050 2051 Float4 one; 2052 if(Surface::isFloatFormat(state.targetFormat[index])) 2053 { 2054 one = Float4(1.0f); 2055 } 2056 else if(Surface::isNonNormalizedInteger(state.targetFormat[index])) 2057 { 2058 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF)); 2059 } 2060 2061 switch(state.targetFormat[index]) 2062 { 2063 case FORMAT_R32I: 2064 case FORMAT_R32UI: 2065 case FORMAT_R32F: 2066 buffer = cBuffer; 2067 // FIXME: movlps 2068 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0); 2069 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4); 2070 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2071 // FIXME: movhps 2072 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0); 2073 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4); 2074 pixel.y = pixel.z = pixel.w = one; 2075 break; 2076 case FORMAT_G32R32I: 2077 case FORMAT_G32R32UI: 2078 case FORMAT_G32R32F: 2079 buffer = cBuffer; 2080 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16); 2081 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2082 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16); 2083 pixel.z = pixel.x; 2084 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x0202); 2085 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0x1313); 2086 pixel.y = pixel.z; 2087 pixel.z = pixel.w = one; 2088 break; 2089 case FORMAT_X32B32G32R32F: 2090 case FORMAT_A32B32G32R32F: 2091 case FORMAT_X32B32G32R32F_UNSIGNED: 2092 case FORMAT_A32B32G32R32I: 2093 case FORMAT_A32B32G32R32UI: 2094 buffer = cBuffer; 2095 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16); 2096 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16); 2097 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2098 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16); 2099 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16); 2100 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); 2101 if(state.targetFormat[index] == FORMAT_X32B32G32R32F || 2102 state.targetFormat[index] == FORMAT_X32B32G32R32F_UNSIGNED) 2103 { 2104 pixel.w = Float4(1.0f); 2105 } 2106 break; 2107 default: 2108 ASSERT(false); 2109 } 2110 2111 if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) 2112 { 2113 sRGBtoLinear(pixel.x); 2114 sRGBtoLinear(pixel.y); 2115 sRGBtoLinear(pixel.z); 2116 } 2117 2118 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor 2119 Vector4f sourceFactor; 2120 Vector4f destFactor; 2121 2122 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor); 2123 blendFactor(destFactor, oC, pixel, state.destBlendFactor); 2124 2125 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) 2126 { 2127 oC.x *= sourceFactor.x; 2128 oC.y *= sourceFactor.y; 2129 oC.z *= sourceFactor.z; 2130 } 2131 2132 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) 2133 { 2134 pixel.x *= destFactor.x; 2135 pixel.y *= destFactor.y; 2136 pixel.z *= destFactor.z; 2137 } 2138 2139 switch(state.blendOperation) 2140 { 2141 case BLENDOP_ADD: 2142 oC.x += pixel.x; 2143 oC.y += pixel.y; 2144 oC.z += pixel.z; 2145 break; 2146 case BLENDOP_SUB: 2147 oC.x -= pixel.x; 2148 oC.y -= pixel.y; 2149 oC.z -= pixel.z; 2150 break; 2151 case BLENDOP_INVSUB: 2152 oC.x = pixel.x - oC.x; 2153 oC.y = pixel.y - oC.y; 2154 oC.z = pixel.z - oC.z; 2155 break; 2156 case BLENDOP_MIN: 2157 oC.x = Min(oC.x, pixel.x); 2158 oC.y = Min(oC.y, pixel.y); 2159 oC.z = Min(oC.z, pixel.z); 2160 break; 2161 case BLENDOP_MAX: 2162 oC.x = Max(oC.x, pixel.x); 2163 oC.y = Max(oC.y, pixel.y); 2164 oC.z = Max(oC.z, pixel.z); 2165 break; 2166 case BLENDOP_SOURCE: 2167 // No operation 2168 break; 2169 case BLENDOP_DEST: 2170 oC.x = pixel.x; 2171 oC.y = pixel.y; 2172 oC.z = pixel.z; 2173 break; 2174 case BLENDOP_NULL: 2175 oC.x = Float4(0.0f); 2176 oC.y = Float4(0.0f); 2177 oC.z = Float4(0.0f); 2178 break; 2179 default: 2180 ASSERT(false); 2181 } 2182 2183 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha); 2184 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha); 2185 2186 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) 2187 { 2188 oC.w *= sourceFactor.w; 2189 } 2190 2191 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) 2192 { 2193 pixel.w *= destFactor.w; 2194 } 2195 2196 switch(state.blendOperationAlpha) 2197 { 2198 case BLENDOP_ADD: 2199 oC.w += pixel.w; 2200 break; 2201 case BLENDOP_SUB: 2202 oC.w -= pixel.w; 2203 break; 2204 case BLENDOP_INVSUB: 2205 pixel.w -= oC.w; 2206 oC.w = pixel.w; 2207 break; 2208 case BLENDOP_MIN: 2209 oC.w = Min(oC.w, pixel.w); 2210 break; 2211 case BLENDOP_MAX: 2212 oC.w = Max(oC.w, pixel.w); 2213 break; 2214 case BLENDOP_SOURCE: 2215 // No operation 2216 break; 2217 case BLENDOP_DEST: 2218 oC.w = pixel.w; 2219 break; 2220 case BLENDOP_NULL: 2221 oC.w = Float4(0.0f); 2222 break; 2223 default: 2224 ASSERT(false); 2225 } 2226 } 2227 writeColor(int index,Pointer<Byte> & cBuffer,Int & x,Vector4f & oC,Int & sMask,Int & zMask,Int & cMask)2228 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask) 2229 { 2230 switch(state.targetFormat[index]) 2231 { 2232 case FORMAT_R32F: 2233 case FORMAT_R32I: 2234 case FORMAT_R32UI: 2235 case FORMAT_R16I: 2236 case FORMAT_R16UI: 2237 case FORMAT_R8I: 2238 case FORMAT_R8UI: 2239 break; 2240 case FORMAT_G32R32F: 2241 case FORMAT_G32R32I: 2242 case FORMAT_G32R32UI: 2243 case FORMAT_G16R16I: 2244 case FORMAT_G16R16UI: 2245 case FORMAT_G8R8I: 2246 case FORMAT_G8R8UI: 2247 oC.z = oC.x; 2248 oC.x = UnpackLow(oC.x, oC.y); 2249 oC.z = UnpackHigh(oC.z, oC.y); 2250 oC.y = oC.z; 2251 break; 2252 case FORMAT_X32B32G32R32F: 2253 case FORMAT_A32B32G32R32F: 2254 case FORMAT_X32B32G32R32F_UNSIGNED: 2255 case FORMAT_A32B32G32R32I: 2256 case FORMAT_A32B32G32R32UI: 2257 case FORMAT_A16B16G16R16I: 2258 case FORMAT_A16B16G16R16UI: 2259 case FORMAT_A8B8G8R8I: 2260 case FORMAT_A8B8G8R8UI: 2261 transpose4x4(oC.x, oC.y, oC.z, oC.w); 2262 break; 2263 default: 2264 ASSERT(false); 2265 } 2266 2267 int rgbaWriteMask = state.colorWriteActive(index); 2268 2269 Int xMask; // Combination of all masks 2270 2271 if(state.depthTestActive) 2272 { 2273 xMask = zMask; 2274 } 2275 else 2276 { 2277 xMask = cMask; 2278 } 2279 2280 if(state.stencilActive) 2281 { 2282 xMask &= sMask; 2283 } 2284 2285 Pointer<Byte> buffer; 2286 Float4 value; 2287 2288 switch(state.targetFormat[index]) 2289 { 2290 case FORMAT_R32F: 2291 case FORMAT_R32I: 2292 case FORMAT_R32UI: 2293 if(rgbaWriteMask & 0x00000001) 2294 { 2295 buffer = cBuffer + 4 * x; 2296 2297 // FIXME: movlps 2298 value.x = *Pointer<Float>(buffer + 0); 2299 value.y = *Pointer<Float>(buffer + 4); 2300 2301 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2302 2303 // FIXME: movhps 2304 value.z = *Pointer<Float>(buffer + 0); 2305 value.w = *Pointer<Float>(buffer + 4); 2306 2307 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16)); 2308 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16)); 2309 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2310 2311 // FIXME: movhps 2312 *Pointer<Float>(buffer + 0) = oC.x.z; 2313 *Pointer<Float>(buffer + 4) = oC.x.w; 2314 2315 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2316 2317 // FIXME: movlps 2318 *Pointer<Float>(buffer + 0) = oC.x.x; 2319 *Pointer<Float>(buffer + 4) = oC.x.y; 2320 } 2321 break; 2322 case FORMAT_R16I: 2323 case FORMAT_R16UI: 2324 if(rgbaWriteMask & 0x00000001) 2325 { 2326 buffer = cBuffer + 2 * x; 2327 2328 UShort4 xyzw; 2329 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0)); 2330 2331 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2332 2333 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1)); 2334 value = As<Float4>(Int4(xyzw)); 2335 2336 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); 2337 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); 2338 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2339 2340 if(state.targetFormat[index] == FORMAT_R16I) 2341 { 2342 Float component = oC.x.z; 2343 *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); 2344 component = oC.x.w; 2345 *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); 2346 2347 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2348 2349 component = oC.x.x; 2350 *Pointer<Short>(buffer + 0) = Short(As<Int>(component)); 2351 component = oC.x.y; 2352 *Pointer<Short>(buffer + 2) = Short(As<Int>(component)); 2353 } 2354 else // FORMAT_R16UI 2355 { 2356 Float component = oC.x.z; 2357 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); 2358 component = oC.x.w; 2359 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); 2360 2361 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2362 2363 component = oC.x.x; 2364 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component)); 2365 component = oC.x.y; 2366 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component)); 2367 } 2368 } 2369 break; 2370 case FORMAT_R8I: 2371 case FORMAT_R8UI: 2372 if(rgbaWriteMask & 0x00000001) 2373 { 2374 buffer = cBuffer + x; 2375 2376 UInt xyzw, packedCol; 2377 2378 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF; 2379 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2380 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16; 2381 2382 Short4 tmpCol = Short4(As<Int4>(oC.x)); 2383 if(state.targetFormat[index] == FORMAT_R8I) 2384 { 2385 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol)); 2386 } 2387 else 2388 { 2389 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol)); 2390 } 2391 packedCol = Extract(As<Int2>(tmpCol), 0); 2392 2393 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) | 2394 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask)); 2395 2396 *Pointer<UShort>(buffer) = UShort(packedCol >> 16); 2397 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2398 *Pointer<UShort>(buffer) = UShort(packedCol); 2399 } 2400 break; 2401 case FORMAT_G32R32F: 2402 case FORMAT_G32R32I: 2403 case FORMAT_G32R32UI: 2404 buffer = cBuffer + 8 * x; 2405 2406 value = *Pointer<Float4>(buffer); 2407 2408 if((rgbaWriteMask & 0x00000003) != 0x00000003) 2409 { 2410 Float4 masked = value; 2411 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); 2412 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); 2413 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); 2414 } 2415 2416 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16)); 2417 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16)); 2418 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2419 *Pointer<Float4>(buffer) = oC.x; 2420 2421 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2422 2423 value = *Pointer<Float4>(buffer); 2424 2425 if((rgbaWriteMask & 0x00000003) != 0x00000003) 2426 { 2427 Float4 masked; 2428 2429 masked = value; 2430 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); 2431 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); 2432 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); 2433 } 2434 2435 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16)); 2436 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16)); 2437 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); 2438 *Pointer<Float4>(buffer) = oC.y; 2439 break; 2440 case FORMAT_G16R16I: 2441 case FORMAT_G16R16UI: 2442 if((rgbaWriteMask & 0x00000003) != 0x0) 2443 { 2444 buffer = cBuffer + 4 * x; 2445 2446 UInt2 rgbaMask; 2447 UShort4 packedCol = UShort4(As<Int4>(oC.x)); 2448 UShort4 value = *Pointer<UShort4>(buffer); 2449 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); 2450 if((rgbaWriteMask & 0x3) != 0x3) 2451 { 2452 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0])); 2453 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 2454 mergedMask &= rgbaMask; 2455 } 2456 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); 2457 2458 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2459 2460 packedCol = UShort4(As<Int4>(oC.y)); 2461 value = *Pointer<UShort4>(buffer); 2462 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); 2463 if((rgbaWriteMask & 0x3) != 0x3) 2464 { 2465 mergedMask &= rgbaMask; 2466 } 2467 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask); 2468 } 2469 break; 2470 case FORMAT_G8R8I: 2471 case FORMAT_G8R8UI: 2472 if((rgbaWriteMask & 0x00000003) != 0x0) 2473 { 2474 buffer = cBuffer + 2 * x; 2475 2476 Int2 xyzw, packedCol; 2477 2478 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0); 2479 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2480 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1); 2481 2482 if(state.targetFormat[index] == FORMAT_G8R8I) 2483 { 2484 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2485 } 2486 else 2487 { 2488 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2489 } 2490 2491 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8); 2492 if((rgbaWriteMask & 0x3) != 0x3) 2493 { 2494 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); 2495 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask)); 2496 mergedMask &= rgbaMask; 2497 } 2498 2499 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask)); 2500 2501 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1)); 2502 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2503 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0)); 2504 } 2505 break; 2506 case FORMAT_X32B32G32R32F: 2507 case FORMAT_A32B32G32R32F: 2508 case FORMAT_X32B32G32R32F_UNSIGNED: 2509 case FORMAT_A32B32G32R32I: 2510 case FORMAT_A32B32G32R32UI: 2511 buffer = cBuffer + 16 * x; 2512 2513 { 2514 value = *Pointer<Float4>(buffer, 16); 2515 2516 if(rgbaWriteMask != 0x0000000F) 2517 { 2518 Float4 masked = value; 2519 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2520 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2521 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked)); 2522 } 2523 2524 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); 2525 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16)); 2526 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value)); 2527 *Pointer<Float4>(buffer, 16) = oC.x; 2528 } 2529 2530 { 2531 value = *Pointer<Float4>(buffer + 16, 16); 2532 2533 if(rgbaWriteMask != 0x0000000F) 2534 { 2535 Float4 masked = value; 2536 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2537 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2538 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked)); 2539 } 2540 2541 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16)); 2542 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16)); 2543 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value)); 2544 *Pointer<Float4>(buffer + 16, 16) = oC.y; 2545 } 2546 2547 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 2548 2549 { 2550 value = *Pointer<Float4>(buffer, 16); 2551 2552 if(rgbaWriteMask != 0x0000000F) 2553 { 2554 Float4 masked = value; 2555 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2556 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2557 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked)); 2558 } 2559 2560 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16)); 2561 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16)); 2562 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value)); 2563 *Pointer<Float4>(buffer, 16) = oC.z; 2564 } 2565 2566 { 2567 value = *Pointer<Float4>(buffer + 16, 16); 2568 2569 if(rgbaWriteMask != 0x0000000F) 2570 { 2571 Float4 masked = value; 2572 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); 2573 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); 2574 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked)); 2575 } 2576 2577 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16)); 2578 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16)); 2579 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value)); 2580 *Pointer<Float4>(buffer + 16, 16) = oC.w; 2581 } 2582 break; 2583 case FORMAT_A16B16G16R16I: 2584 case FORMAT_A16B16G16R16UI: 2585 if((rgbaWriteMask & 0x0000000F) != 0x0) 2586 { 2587 buffer = cBuffer + 8 * x; 2588 2589 UInt4 rgbaMask; 2590 UShort8 value = *Pointer<UShort8>(buffer); 2591 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y))); 2592 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16); 2593 if((rgbaWriteMask & 0xF) != 0xF) 2594 { 2595 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); 2596 rgbaMask = UInt4(tmpMask, tmpMask); 2597 mergedMask &= rgbaMask; 2598 } 2599 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); 2600 2601 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2602 2603 value = *Pointer<UShort8>(buffer); 2604 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w))); 2605 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16); 2606 if((rgbaWriteMask & 0xF) != 0xF) 2607 { 2608 mergedMask &= rgbaMask; 2609 } 2610 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask); 2611 } 2612 break; 2613 case FORMAT_A8B8G8R8I: 2614 case FORMAT_A8B8G8R8UI: 2615 if((rgbaWriteMask & 0x0000000F) != 0x0) 2616 { 2617 UInt2 value, packedCol, mergedMask; 2618 2619 buffer = cBuffer + 4 * x; 2620 2621 if(state.targetFormat[index] == FORMAT_A8B8G8R8I) 2622 { 2623 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2624 } 2625 else 2626 { 2627 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y)))); 2628 } 2629 value = *Pointer<UInt2>(buffer, 16); 2630 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8); 2631 if(rgbaWriteMask != 0xF) 2632 { 2633 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); 2634 } 2635 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); 2636 2637 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])); 2638 2639 if(state.targetFormat[index] == FORMAT_A8B8G8R8I) 2640 { 2641 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); 2642 } 2643 else 2644 { 2645 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w)))); 2646 } 2647 value = *Pointer<UInt2>(buffer, 16); 2648 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8); 2649 if(rgbaWriteMask != 0xF) 2650 { 2651 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); 2652 } 2653 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); 2654 } 2655 break; 2656 default: 2657 ASSERT(false); 2658 } 2659 } 2660 convertFixed16(Float4 & cf,bool saturate)2661 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate) 2662 { 2663 return UShort4(cf * Float4(0xFFFF), saturate); 2664 } 2665 sRGBtoLinear16_12_16(Vector4s & c)2666 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c) 2667 { 2668 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16); 2669 2670 c.x = As<UShort4>(c.x) >> 4; 2671 c.y = As<UShort4>(c.y) >> 4; 2672 c.z = As<UShort4>(c.z) >> 4; 2673 2674 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); 2675 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); 2676 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); 2677 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); 2678 2679 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); 2680 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); 2681 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); 2682 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); 2683 2684 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); 2685 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); 2686 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); 2687 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); 2688 } 2689 linearToSRGB16_12_16(Vector4s & c)2690 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c) 2691 { 2692 c.x = As<UShort4>(c.x) >> 4; 2693 c.y = As<UShort4>(c.y) >> 4; 2694 c.z = As<UShort4>(c.z) >> 4; 2695 2696 linearToSRGB12_16(c); 2697 } 2698 linearToSRGB12_16(Vector4s & c)2699 void PixelRoutine::linearToSRGB12_16(Vector4s &c) 2700 { 2701 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16); 2702 2703 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0); 2704 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1); 2705 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2); 2706 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3); 2707 2708 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0); 2709 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1); 2710 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2); 2711 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3); 2712 2713 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0); 2714 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1); 2715 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2); 2716 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3); 2717 } 2718 sRGBtoLinear(const Float4 & x)2719 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2 2720 { 2721 Float4 linear = x * x; 2722 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f); 2723 2724 return Min(Max(linear, Float4(0.0f)), Float4(1.0f)); 2725 } 2726 colorUsed()2727 bool PixelRoutine::colorUsed() 2728 { 2729 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill; 2730 } 2731 } 2732