1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "SamplerCore.hpp" 16 17 #include "Constants.hpp" 18 #include "Common/Debug.hpp" 19 20 namespace 21 { applySwizzle(sw::SwizzleType swizzle,sw::Short4 & s,const sw::Vector4s & c)22 void applySwizzle(sw::SwizzleType swizzle, sw::Short4& s, const sw::Vector4s& c) 23 { 24 switch(swizzle) 25 { 26 case sw::SWIZZLE_RED: s = c.x; break; 27 case sw::SWIZZLE_GREEN: s = c.y; break; 28 case sw::SWIZZLE_BLUE: s = c.z; break; 29 case sw::SWIZZLE_ALPHA: s = c.w; break; 30 case sw::SWIZZLE_ZERO: s = sw::Short4(0x0000); break; 31 case sw::SWIZZLE_ONE: s = sw::Short4(0x1000); break; 32 default: ASSERT(false); 33 } 34 } 35 applySwizzle(sw::SwizzleType swizzle,sw::Float4 & f,const sw::Vector4f & c)36 void applySwizzle(sw::SwizzleType swizzle, sw::Float4& f, const sw::Vector4f& c) 37 { 38 switch(swizzle) 39 { 40 case sw::SWIZZLE_RED: f = c.x; break; 41 case sw::SWIZZLE_GREEN: f = c.y; break; 42 case sw::SWIZZLE_BLUE: f = c.z; break; 43 case sw::SWIZZLE_ALPHA: f = c.w; break; 44 case sw::SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break; 45 case sw::SWIZZLE_ONE: f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); break; 46 default: ASSERT(false); 47 } 48 } 49 } 50 51 namespace sw 52 { 53 extern bool colorsDefaultToZero; 54 SamplerCore(Pointer<Byte> & constants,const Sampler::State & state)55 SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler::State &state) : constants(constants), state(state) 56 { 57 } 58 sampleTexture(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Float4 & bias,Vector4f & dsx,Vector4f & dsy)59 Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy) 60 { 61 return sampleTexture(texture, u, v, w, q, q, dsx, dsy, (dsx), Implicit, true); 62 } 63 sampleTexture(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Float4 & bias,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function,bool fixed12)64 Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12) 65 { 66 Vector4s c; 67 68 #if PERF_PROFILE 69 AddAtomic(Pointer<Long>(&profiler.texOperations), 4); 70 71 if(state.compressedFormat) 72 { 73 AddAtomic(Pointer<Long>(&profiler.compressedTex), 4); 74 } 75 #endif 76 77 if(state.textureType == TEXTURE_NULL) 78 { 79 c.x = Short4(0x0000); 80 c.y = Short4(0x0000); 81 c.z = Short4(0x0000); 82 83 if(fixed12) // FIXME: Convert to fixed12 at higher level, when required 84 { 85 c.w = Short4(0x1000); 86 } 87 else 88 { 89 c.w = Short4(0xFFFFu); // FIXME 90 } 91 } 92 else 93 { 94 Float4 uuuu = u; 95 Float4 vvvv = v; 96 Float4 wwww = w; 97 Float4 qqqq = q; 98 99 Int face[4]; 100 Float lod; 101 Float anisotropy; 102 Float4 uDelta; 103 Float4 vDelta; 104 105 if(state.textureType != TEXTURE_3D) 106 { 107 if(state.textureType != TEXTURE_CUBE) 108 { 109 computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function); 110 } 111 else 112 { 113 Float4 M; 114 cubeFace(face, uuuu, vvvv, u, v, w, M); 115 computeLodCube(texture, lod, u, v, w, bias.x, dsx, dsy, M, function); 116 } 117 } 118 else 119 { 120 computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function); 121 } 122 123 if(!hasFloatTexture()) 124 { 125 c = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function); 126 } 127 else 128 { 129 Vector4f cf = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function); 130 131 convertFixed12(c, cf); 132 } 133 134 if(fixed12) 135 { 136 if(!hasFloatTexture()) 137 { 138 if(state.textureFormat == FORMAT_R5G6B5) 139 { 140 c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800)); 141 c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00)); 142 c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800)); 143 } 144 else 145 { 146 for(int component = 0; component < textureComponentCount(); component++) 147 { 148 if(hasUnsignedTextureComponent(component)) 149 { 150 c[component] = As<UShort4>(c[component]) >> 4; 151 } 152 else 153 { 154 c[component] = c[component] >> 3; 155 } 156 } 157 } 158 } 159 160 if(state.textureFilter != FILTER_GATHER) 161 { 162 int componentCount = textureComponentCount(); 163 short defaultColorValue = colorsDefaultToZero ? 0x0000 : 0x1000; 164 165 switch(state.textureFormat) 166 { 167 case FORMAT_R8_SNORM: 168 case FORMAT_G8R8_SNORM: 169 case FORMAT_X8B8G8R8_SNORM: 170 case FORMAT_A8B8G8R8_SNORM: 171 case FORMAT_R8: 172 case FORMAT_R5G6B5: 173 case FORMAT_G8R8: 174 case FORMAT_R8I: 175 case FORMAT_R8UI: 176 case FORMAT_G8R8I: 177 case FORMAT_G8R8UI: 178 case FORMAT_X8B8G8R8I: 179 case FORMAT_X8B8G8R8UI: 180 case FORMAT_A8B8G8R8I: 181 case FORMAT_A8B8G8R8UI: 182 case FORMAT_R16I: 183 case FORMAT_R16UI: 184 case FORMAT_G16R16: 185 case FORMAT_G16R16I: 186 case FORMAT_G16R16UI: 187 case FORMAT_X16B16G16R16I: 188 case FORMAT_X16B16G16R16UI: 189 case FORMAT_A16B16G16R16: 190 case FORMAT_A16B16G16R16I: 191 case FORMAT_A16B16G16R16UI: 192 case FORMAT_R32I: 193 case FORMAT_R32UI: 194 case FORMAT_G32R32I: 195 case FORMAT_G32R32UI: 196 case FORMAT_X32B32G32R32I: 197 case FORMAT_X32B32G32R32UI: 198 case FORMAT_A32B32G32R32I: 199 case FORMAT_A32B32G32R32UI: 200 case FORMAT_X8R8G8B8: 201 case FORMAT_X8B8G8R8: 202 case FORMAT_A8R8G8B8: 203 case FORMAT_A8B8G8R8: 204 case FORMAT_SRGB8_X8: 205 case FORMAT_SRGB8_A8: 206 case FORMAT_V8U8: 207 case FORMAT_Q8W8V8U8: 208 case FORMAT_X8L8V8U8: 209 case FORMAT_V16U16: 210 case FORMAT_A16W16V16U16: 211 case FORMAT_Q16W16V16U16: 212 case FORMAT_YV12_BT601: 213 case FORMAT_YV12_BT709: 214 case FORMAT_YV12_JFIF: 215 if(componentCount < 2) c.y = Short4(defaultColorValue); 216 if(componentCount < 3) c.z = Short4(defaultColorValue); 217 if(componentCount < 4) c.w = Short4(0x1000); 218 break; 219 case FORMAT_A8: 220 c.w = c.x; 221 c.x = Short4(0x0000); 222 c.y = Short4(0x0000); 223 c.z = Short4(0x0000); 224 break; 225 case FORMAT_L8: 226 case FORMAT_L16: 227 c.y = c.x; 228 c.z = c.x; 229 c.w = Short4(0x1000); 230 break; 231 case FORMAT_A8L8: 232 c.w = c.y; 233 c.y = c.x; 234 c.z = c.x; 235 break; 236 case FORMAT_R32F: 237 c.y = Short4(defaultColorValue); 238 case FORMAT_G32R32F: 239 c.z = Short4(defaultColorValue); 240 case FORMAT_X32B32G32R32F: 241 case FORMAT_X32B32G32R32F_UNSIGNED: 242 c.w = Short4(0x1000); 243 case FORMAT_A32B32G32R32F: 244 break; 245 case FORMAT_D32F_LOCKABLE: 246 case FORMAT_D32FS8_TEXTURE: 247 case FORMAT_D32F_SHADOW: 248 case FORMAT_D32FS8_SHADOW: 249 c.y = c.x; 250 c.z = c.x; 251 c.w = c.x; 252 break; 253 default: 254 ASSERT(false); 255 } 256 } 257 258 if((state.swizzleR != SWIZZLE_RED) || 259 (state.swizzleG != SWIZZLE_GREEN) || 260 (state.swizzleB != SWIZZLE_BLUE) || 261 (state.swizzleA != SWIZZLE_ALPHA)) 262 { 263 const Vector4s col(c); 264 applySwizzle(state.swizzleR, c.x, col); 265 applySwizzle(state.swizzleG, c.y, col); 266 applySwizzle(state.swizzleB, c.z, col); 267 applySwizzle(state.swizzleA, c.w, col); 268 } 269 } 270 } 271 272 return c; 273 } 274 sampleTexture(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Float4 & bias,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)275 Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 276 { 277 Vector4f c; 278 279 #if PERF_PROFILE 280 AddAtomic(Pointer<Long>(&profiler.texOperations), 4); 281 282 if(state.compressedFormat) 283 { 284 AddAtomic(Pointer<Long>(&profiler.compressedTex), 4); 285 } 286 #endif 287 288 if(state.textureType == TEXTURE_NULL) 289 { 290 c.x = Float4(0.0f); 291 c.y = Float4(0.0f); 292 c.z = Float4(0.0f); 293 c.w = Float4(1.0f); 294 } 295 else 296 { 297 // FIXME: YUV is not supported by the floating point path 298 bool forceFloatFiltering = state.highPrecisionFiltering && !hasYuvFormat() && (state.textureFilter != FILTER_POINT); 299 bool seamlessCube = (state.addressingModeU == ADDRESSING_SEAMLESS); 300 bool rectangleTexture = (state.textureType == TEXTURE_RECTANGLE); 301 if(hasFloatTexture() || hasUnnormalizedIntegerTexture() || forceFloatFiltering || seamlessCube || rectangleTexture) // FIXME: Mostly identical to integer sampling 302 { 303 Float4 uuuu = u; 304 Float4 vvvv = v; 305 Float4 wwww = w; 306 Float4 qqqq = q; 307 308 Int face[4]; 309 Float lod; 310 Float anisotropy; 311 Float4 uDelta; 312 Float4 vDelta; 313 314 if(state.textureType != TEXTURE_3D) 315 { 316 if(state.textureType != TEXTURE_CUBE) 317 { 318 computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function); 319 } 320 else 321 { 322 Float4 M; 323 cubeFace(face, uuuu, vvvv, u, v, w, M); 324 computeLodCube(texture, lod, u, v, w, bias.x, dsx, dsy, M, function); 325 } 326 } 327 else 328 { 329 computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function); 330 } 331 332 c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function); 333 334 if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture()) 335 { 336 if(has16bitTextureFormat()) 337 { 338 switch(state.textureFormat) 339 { 340 case FORMAT_R5G6B5: 341 c.x *= Float4(1.0f / 0xF800); 342 c.y *= Float4(1.0f / 0xFC00); 343 c.z *= Float4(1.0f / 0xF800); 344 break; 345 default: 346 ASSERT(false); 347 } 348 } 349 else 350 { 351 for(int component = 0; component < textureComponentCount(); component++) 352 { 353 c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF); 354 } 355 } 356 } 357 } 358 else 359 { 360 Vector4s cs = sampleTexture(texture, u, v, w, q, bias, dsx, dsy, offset, function, false); 361 362 if(state.textureFormat == FORMAT_R5G6B5) 363 { 364 c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800); 365 c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00); 366 c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800); 367 } 368 else 369 { 370 for(int component = 0; component < textureComponentCount(); component++) 371 { 372 if(hasUnsignedTextureComponent(component)) 373 { 374 convertUnsigned16(c[component], cs[component]); 375 } 376 else 377 { 378 convertSigned15(c[component], cs[component]); 379 } 380 } 381 } 382 } 383 384 int componentCount = textureComponentCount(); 385 float defaultColorValue = colorsDefaultToZero ? 0.0f : 1.0f; 386 387 if(state.textureFilter != FILTER_GATHER) 388 { 389 switch(state.textureFormat) 390 { 391 case FORMAT_R8I: 392 case FORMAT_R8UI: 393 case FORMAT_R16I: 394 case FORMAT_R16UI: 395 case FORMAT_R32I: 396 case FORMAT_R32UI: 397 c.y = As<Float4>(UInt4(0)); 398 case FORMAT_G8R8I: 399 case FORMAT_G8R8UI: 400 case FORMAT_G16R16I: 401 case FORMAT_G16R16UI: 402 case FORMAT_G32R32I: 403 case FORMAT_G32R32UI: 404 c.z = As<Float4>(UInt4(0)); 405 case FORMAT_X8B8G8R8I: 406 case FORMAT_X8B8G8R8UI: 407 case FORMAT_X16B16G16R16I: 408 case FORMAT_X16B16G16R16UI: 409 case FORMAT_X32B32G32R32I: 410 case FORMAT_X32B32G32R32UI: 411 c.w = As<Float4>(UInt4(1)); 412 case FORMAT_A8B8G8R8I: 413 case FORMAT_A8B8G8R8UI: 414 case FORMAT_A16B16G16R16I: 415 case FORMAT_A16B16G16R16UI: 416 case FORMAT_A32B32G32R32I: 417 case FORMAT_A32B32G32R32UI: 418 break; 419 case FORMAT_R8_SNORM: 420 case FORMAT_G8R8_SNORM: 421 case FORMAT_X8B8G8R8_SNORM: 422 case FORMAT_A8B8G8R8_SNORM: 423 case FORMAT_R8: 424 case FORMAT_R5G6B5: 425 case FORMAT_G8R8: 426 case FORMAT_G16R16: 427 case FORMAT_A16B16G16R16: 428 case FORMAT_X8R8G8B8: 429 case FORMAT_X8B8G8R8: 430 case FORMAT_A8R8G8B8: 431 case FORMAT_A8B8G8R8: 432 case FORMAT_SRGB8_X8: 433 case FORMAT_SRGB8_A8: 434 case FORMAT_V8U8: 435 case FORMAT_Q8W8V8U8: 436 case FORMAT_X8L8V8U8: 437 case FORMAT_V16U16: 438 case FORMAT_A16W16V16U16: 439 case FORMAT_Q16W16V16U16: 440 case FORMAT_YV12_BT601: 441 case FORMAT_YV12_BT709: 442 case FORMAT_YV12_JFIF: 443 if(componentCount < 2) c.y = Float4(defaultColorValue); 444 if(componentCount < 3) c.z = Float4(defaultColorValue); 445 if(componentCount < 4) c.w = Float4(1.0f); 446 break; 447 case FORMAT_A8: 448 c.w = c.x; 449 c.x = Float4(0.0f); 450 c.y = Float4(0.0f); 451 c.z = Float4(0.0f); 452 break; 453 case FORMAT_L8: 454 case FORMAT_L16: 455 c.y = c.x; 456 c.z = c.x; 457 c.w = Float4(1.0f); 458 break; 459 case FORMAT_A8L8: 460 c.w = c.y; 461 c.y = c.x; 462 c.z = c.x; 463 break; 464 case FORMAT_R32F: 465 c.y = Float4(defaultColorValue); 466 case FORMAT_G32R32F: 467 c.z = Float4(defaultColorValue); 468 case FORMAT_X32B32G32R32F: 469 case FORMAT_X32B32G32R32F_UNSIGNED: 470 c.w = Float4(1.0f); 471 case FORMAT_A32B32G32R32F: 472 break; 473 case FORMAT_D32F_LOCKABLE: 474 case FORMAT_D32FS8_TEXTURE: 475 case FORMAT_D32F_SHADOW: 476 case FORMAT_D32FS8_SHADOW: 477 c.y = Float4(0.0f); 478 c.z = Float4(0.0f); 479 c.w = Float4(1.0f); 480 break; 481 default: 482 ASSERT(false); 483 } 484 } 485 486 if((state.swizzleR != SWIZZLE_RED) || 487 (state.swizzleG != SWIZZLE_GREEN) || 488 (state.swizzleB != SWIZZLE_BLUE) || 489 (state.swizzleA != SWIZZLE_ALPHA)) 490 { 491 const Vector4f col(c); 492 applySwizzle(state.swizzleR, c.x, col); 493 applySwizzle(state.swizzleG, c.y, col); 494 applySwizzle(state.swizzleB, c.z, col); 495 applySwizzle(state.swizzleA, c.w, col); 496 } 497 } 498 499 return c; 500 } 501 textureSize(Pointer<Byte> & texture,Float4 & lod)502 Vector4f SamplerCore::textureSize(Pointer<Byte> &texture, Float4 &lod) 503 { 504 Vector4f size; 505 506 for(int i = 0; i < 4; ++i) 507 { 508 Int baseLevel = *Pointer<Int>(texture + OFFSET(Texture, baseLevel)); 509 Int index = Min(As<UInt>(As<Int>(Extract(lod, i)) + baseLevel), MIPMAP_LEVELS - 1); 510 Pointer<Byte> mipmap = texture + OFFSET(Texture, mipmap) + index * sizeof(Mipmap); 511 size.x = Insert(size.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i); 512 size.y = Insert(size.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i); 513 size.z = Insert(size.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i); 514 } 515 516 return size; 517 } 518 border(Short4 & mask,Float4 & coordinates)519 void SamplerCore::border(Short4 &mask, Float4 &coordinates) 520 { 521 Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f))); 522 mask = As<Short4>(Int2(As<Int4>(PackSigned(border, border)))); 523 } 524 border(Int4 & mask,Float4 & coordinates)525 void SamplerCore::border(Int4 &mask, Float4 &coordinates) 526 { 527 mask = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f))); 528 } 529 offsetSample(Short4 & uvw,Pointer<Byte> & mipmap,int halfOffset,bool wrap,int count,Float & lod)530 Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod) 531 { 532 Short4 offset = *Pointer<Short4>(mipmap + halfOffset); 533 534 if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT) 535 { 536 offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f))); 537 } 538 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR) 539 { 540 offset &= Short4(CmpLE(Float4(lod), Float4(0.0f))); 541 } 542 543 if(wrap) 544 { 545 switch(count) 546 { 547 case -1: return uvw - offset; 548 case 0: return uvw; 549 case +1: return uvw + offset; 550 case 2: return uvw + offset + offset; 551 } 552 } 553 else // Clamp or mirror 554 { 555 switch(count) 556 { 557 case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset)); 558 case 0: return uvw; 559 case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset)); 560 case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset)); 561 } 562 } 563 564 return uvw; 565 } 566 sampleFilter(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],SamplerFunction function)567 Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function) 568 { 569 Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function); 570 571 if(function == Fetch) 572 { 573 return c; 574 } 575 576 if(state.mipmapFilter == MIPMAP_LINEAR) 577 { 578 Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function); 579 580 lod *= Float(1 << 16); 581 582 UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize 583 Short4 stri = utri >> 1; // FIXME: Optimize 584 585 if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri); 586 if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri); 587 if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri); 588 if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri); 589 590 utri = ~utri; 591 stri = Short4(0x7FFF) - stri; 592 593 if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri); 594 if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri); 595 if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri); 596 if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri); 597 598 c.x += cc.x; 599 c.y += cc.y; 600 c.z += cc.z; 601 c.w += cc.w; 602 603 if(!hasUnsignedTextureComponent(0)) c.x += c.x; 604 if(!hasUnsignedTextureComponent(1)) c.y += c.y; 605 if(!hasUnsignedTextureComponent(2)) c.z += c.z; 606 if(!hasUnsignedTextureComponent(3)) c.w += c.w; 607 } 608 609 Short4 borderMask; 610 611 if(state.addressingModeU == ADDRESSING_BORDER) 612 { 613 Short4 u0; 614 615 border(u0, u); 616 617 borderMask = u0; 618 } 619 620 if(state.addressingModeV == ADDRESSING_BORDER) 621 { 622 Short4 v0; 623 624 border(v0, v); 625 626 if(state.addressingModeU == ADDRESSING_BORDER) 627 { 628 borderMask &= v0; 629 } 630 else 631 { 632 borderMask = v0; 633 } 634 } 635 636 if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D) 637 { 638 Short4 s0; 639 640 border(s0, w); 641 642 if(state.addressingModeU == ADDRESSING_BORDER || 643 state.addressingModeV == ADDRESSING_BORDER) 644 { 645 borderMask &= s0; 646 } 647 else 648 { 649 borderMask = s0; 650 } 651 } 652 653 if(state.addressingModeU == ADDRESSING_BORDER || 654 state.addressingModeV == ADDRESSING_BORDER || 655 (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)) 656 { 657 Short4 b; 658 659 c.x = (borderMask & c.x) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1))); 660 c.y = (borderMask & c.y) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1))); 661 c.z = (borderMask & c.z) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1))); 662 c.w = (borderMask & c.w) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1))); 663 } 664 665 return c; 666 } 667 sampleAniso(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],bool secondLOD,SamplerFunction function)668 Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function) 669 { 670 Vector4s c; 671 672 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch) 673 { 674 c = sampleQuad(texture, u, v, w, offset, lod, face, secondLOD, function); 675 } 676 else 677 { 678 Int a = RoundInt(anisotropy); 679 680 Vector4s cSum; 681 682 cSum.x = Short4(0); 683 cSum.y = Short4(0); 684 cSum.z = Short4(0); 685 cSum.w = Short4(0); 686 687 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); 688 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); 689 UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a); 690 Short4 sw = Short4(cw >> 1); 691 692 Float4 du = uDelta; 693 Float4 dv = vDelta; 694 695 Float4 u0 = u + B * du; 696 Float4 v0 = v + B * dv; 697 698 du *= A; 699 dv *= A; 700 701 Int i = 0; 702 703 Do 704 { 705 c = sampleQuad(texture, u0, v0, w, offset, lod, face, secondLOD, function); 706 707 u0 += du; 708 v0 += dv; 709 710 if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw); 711 if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw); 712 if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw); 713 if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw); 714 715 i++; 716 } 717 Until(i >= a); 718 719 if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x); 720 if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y); 721 if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z); 722 if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w); 723 } 724 725 return c; 726 } 727 sampleQuad(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)728 Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 729 { 730 if(state.textureType != TEXTURE_3D) 731 { 732 return sampleQuad2D(texture, u, v, w, offset, lod, face, secondLOD, function); 733 } 734 else 735 { 736 return sample3D(texture, u, v, w, offset, lod, secondLOD, function); 737 } 738 } 739 sampleQuad2D(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)740 Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 741 { 742 Vector4s c; 743 744 int componentCount = textureComponentCount(); 745 bool gather = state.textureFilter == FILTER_GATHER; 746 747 Pointer<Byte> mipmap; 748 Pointer<Byte> buffer[4]; 749 750 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 751 752 bool texelFetch = (function == Fetch); 753 754 Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap); 755 Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap); 756 Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap); 757 758 if(state.textureFilter == FILTER_POINT || texelFetch) 759 { 760 c = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function); 761 } 762 else 763 { 764 Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod); 765 Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod); 766 Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod); 767 Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod); 768 769 Vector4s c0 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, buffer, function); 770 Vector4s c1 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, buffer, function); 771 Vector4s c2 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, buffer, function); 772 Vector4s c3 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, buffer, function); 773 774 if(!gather) // Blend 775 { 776 // Fractions 777 UShort4 f0u = As<UShort4>(uuuu0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width)); 778 UShort4 f0v = As<UShort4>(vvvv0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height)); 779 780 UShort4 f1u = ~f0u; 781 UShort4 f1v = ~f0v; 782 783 UShort4 f0u0v = MulHigh(f0u, f0v); 784 UShort4 f1u0v = MulHigh(f1u, f0v); 785 UShort4 f0u1v = MulHigh(f0u, f1v); 786 UShort4 f1u1v = MulHigh(f1u, f1v); 787 788 // Signed fractions 789 Short4 f1u1vs; 790 Short4 f0u1vs; 791 Short4 f1u0vs; 792 Short4 f0u0vs; 793 794 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3)) 795 { 796 f1u1vs = f1u1v >> 1; 797 f0u1vs = f0u1v >> 1; 798 f1u0vs = f1u0v >> 1; 799 f0u0vs = f0u0v >> 1; 800 } 801 802 // Bilinear interpolation 803 if(componentCount >= 1) 804 { 805 if(has16bitTextureComponents() && hasUnsignedTextureComponent(0)) 806 { 807 c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u); 808 c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u); 809 c.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v); 810 } 811 else 812 { 813 if(hasUnsignedTextureComponent(0)) 814 { 815 c0.x = MulHigh(As<UShort4>(c0.x), f1u1v); 816 c1.x = MulHigh(As<UShort4>(c1.x), f0u1v); 817 c2.x = MulHigh(As<UShort4>(c2.x), f1u0v); 818 c3.x = MulHigh(As<UShort4>(c3.x), f0u0v); 819 } 820 else 821 { 822 c0.x = MulHigh(c0.x, f1u1vs); 823 c1.x = MulHigh(c1.x, f0u1vs); 824 c2.x = MulHigh(c2.x, f1u0vs); 825 c3.x = MulHigh(c3.x, f0u0vs); 826 } 827 828 c.x = (c0.x + c1.x) + (c2.x + c3.x); 829 if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions 830 } 831 } 832 833 if(componentCount >= 2) 834 { 835 if(has16bitTextureComponents() && hasUnsignedTextureComponent(1)) 836 { 837 c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u); 838 c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u); 839 c.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v); 840 } 841 else 842 { 843 if(hasUnsignedTextureComponent(1)) 844 { 845 c0.y = MulHigh(As<UShort4>(c0.y), f1u1v); 846 c1.y = MulHigh(As<UShort4>(c1.y), f0u1v); 847 c2.y = MulHigh(As<UShort4>(c2.y), f1u0v); 848 c3.y = MulHigh(As<UShort4>(c3.y), f0u0v); 849 } 850 else 851 { 852 c0.y = MulHigh(c0.y, f1u1vs); 853 c1.y = MulHigh(c1.y, f0u1vs); 854 c2.y = MulHigh(c2.y, f1u0vs); 855 c3.y = MulHigh(c3.y, f0u0vs); 856 } 857 858 c.y = (c0.y + c1.y) + (c2.y + c3.y); 859 if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions 860 } 861 } 862 863 if(componentCount >= 3) 864 { 865 if(has16bitTextureComponents() && hasUnsignedTextureComponent(2)) 866 { 867 c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u); 868 c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u); 869 c.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v); 870 } 871 else 872 { 873 if(hasUnsignedTextureComponent(2)) 874 { 875 c0.z = MulHigh(As<UShort4>(c0.z), f1u1v); 876 c1.z = MulHigh(As<UShort4>(c1.z), f0u1v); 877 c2.z = MulHigh(As<UShort4>(c2.z), f1u0v); 878 c3.z = MulHigh(As<UShort4>(c3.z), f0u0v); 879 } 880 else 881 { 882 c0.z = MulHigh(c0.z, f1u1vs); 883 c1.z = MulHigh(c1.z, f0u1vs); 884 c2.z = MulHigh(c2.z, f1u0vs); 885 c3.z = MulHigh(c3.z, f0u0vs); 886 } 887 888 c.z = (c0.z + c1.z) + (c2.z + c3.z); 889 if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions 890 } 891 } 892 893 if(componentCount >= 4) 894 { 895 if(has16bitTextureComponents() && hasUnsignedTextureComponent(3)) 896 { 897 c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u); 898 c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u); 899 c.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v); 900 } 901 else 902 { 903 if(hasUnsignedTextureComponent(3)) 904 { 905 c0.w = MulHigh(As<UShort4>(c0.w), f1u1v); 906 c1.w = MulHigh(As<UShort4>(c1.w), f0u1v); 907 c2.w = MulHigh(As<UShort4>(c2.w), f1u0v); 908 c3.w = MulHigh(As<UShort4>(c3.w), f0u0v); 909 } 910 else 911 { 912 c0.w = MulHigh(c0.w, f1u1vs); 913 c1.w = MulHigh(c1.w, f0u1vs); 914 c2.w = MulHigh(c2.w, f1u0vs); 915 c3.w = MulHigh(c3.w, f0u0vs); 916 } 917 918 c.w = (c0.w + c1.w) + (c2.w + c3.w); 919 if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions 920 } 921 } 922 } 923 else 924 { 925 c.x = c1.x; 926 c.y = c2.x; 927 c.z = c3.x; 928 c.w = c0.x; 929 } 930 } 931 932 return c; 933 } 934 sample3D(Pointer<Byte> & texture,Float4 & u_,Float4 & v_,Float4 & w_,Vector4f & offset,Float & lod,bool secondLOD,SamplerFunction function)935 Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function) 936 { 937 Vector4s c_; 938 939 int componentCount = textureComponentCount(); 940 941 Pointer<Byte> mipmap; 942 Pointer<Byte> buffer[4]; 943 Int face[4]; 944 945 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 946 947 bool texelFetch = (function == Fetch); 948 949 Short4 uuuu = texelFetch ? Short4(As<Int4>(u_)) : address(u_, state.addressingModeU, mipmap); 950 Short4 vvvv = texelFetch ? Short4(As<Int4>(v_)) : address(v_, state.addressingModeV, mipmap); 951 Short4 wwww = texelFetch ? Short4(As<Int4>(w_)) : address(w_, state.addressingModeW, mipmap); 952 953 if(state.textureFilter == FILTER_POINT || texelFetch) 954 { 955 c_ = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function); 956 } 957 else 958 { 959 Vector4s c[2][2][2]; 960 961 Short4 u[2][2][2]; 962 Short4 v[2][2][2]; 963 Short4 s[2][2][2]; 964 965 for(int i = 0; i < 2; i++) 966 { 967 for(int j = 0; j < 2; j++) 968 { 969 for(int k = 0; k < 2; k++) 970 { 971 u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod); 972 v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod); 973 s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod); 974 } 975 } 976 } 977 978 // Fractions 979 UShort4 f0u = As<UShort4>(u[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width)); 980 UShort4 f0v = As<UShort4>(v[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height)); 981 UShort4 f0s = As<UShort4>(s[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,depth)); 982 983 UShort4 f1u = ~f0u; 984 UShort4 f1v = ~f0v; 985 UShort4 f1s = ~f0s; 986 987 UShort4 f[2][2][2]; 988 Short4 fs[2][2][2]; 989 990 f[1][1][1] = MulHigh(f1u, f1v); 991 f[0][1][1] = MulHigh(f0u, f1v); 992 f[1][0][1] = MulHigh(f1u, f0v); 993 f[0][0][1] = MulHigh(f0u, f0v); 994 f[1][1][0] = MulHigh(f1u, f1v); 995 f[0][1][0] = MulHigh(f0u, f1v); 996 f[1][0][0] = MulHigh(f1u, f0v); 997 f[0][0][0] = MulHigh(f0u, f0v); 998 999 f[1][1][1] = MulHigh(f[1][1][1], f1s); 1000 f[0][1][1] = MulHigh(f[0][1][1], f1s); 1001 f[1][0][1] = MulHigh(f[1][0][1], f1s); 1002 f[0][0][1] = MulHigh(f[0][0][1], f1s); 1003 f[1][1][0] = MulHigh(f[1][1][0], f0s); 1004 f[0][1][0] = MulHigh(f[0][1][0], f0s); 1005 f[1][0][0] = MulHigh(f[1][0][0], f0s); 1006 f[0][0][0] = MulHigh(f[0][0][0], f0s); 1007 1008 // Signed fractions 1009 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3)) 1010 { 1011 fs[0][0][0] = f[0][0][0] >> 1; 1012 fs[0][0][1] = f[0][0][1] >> 1; 1013 fs[0][1][0] = f[0][1][0] >> 1; 1014 fs[0][1][1] = f[0][1][1] >> 1; 1015 fs[1][0][0] = f[1][0][0] >> 1; 1016 fs[1][0][1] = f[1][0][1] >> 1; 1017 fs[1][1][0] = f[1][1][0] >> 1; 1018 fs[1][1][1] = f[1][1][1] >> 1; 1019 } 1020 1021 for(int i = 0; i < 2; i++) 1022 { 1023 for(int j = 0; j < 2; j++) 1024 { 1025 for(int k = 0; k < 2; k++) 1026 { 1027 c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function); 1028 1029 if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); } 1030 if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); } 1031 if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); } 1032 if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); } 1033 1034 if(i != 0 || j != 0 || k != 0) 1035 { 1036 if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x; 1037 if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y; 1038 if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z; 1039 if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w; 1040 } 1041 } 1042 } 1043 } 1044 1045 if(componentCount >= 1) c_.x = c[0][0][0].x; 1046 if(componentCount >= 2) c_.y = c[0][0][0].y; 1047 if(componentCount >= 3) c_.z = c[0][0][0].z; 1048 if(componentCount >= 4) c_.w = c[0][0][0].w; 1049 1050 // Correct for signed fractions 1051 if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x); 1052 if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y); 1053 if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z); 1054 if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w); 1055 } 1056 1057 return c_; 1058 } 1059 sampleFloatFilter(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],SamplerFunction function)1060 Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function) 1061 { 1062 Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, false, function); 1063 1064 if(function == Fetch) 1065 { 1066 return c; 1067 } 1068 1069 if(state.mipmapFilter == MIPMAP_LINEAR) 1070 { 1071 Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, true, function); 1072 1073 Float4 lod4 = Float4(Frac(lod)); 1074 1075 c.x = (cc.x - c.x) * lod4 + c.x; 1076 c.y = (cc.y - c.y) * lod4 + c.y; 1077 c.z = (cc.z - c.z) * lod4 + c.z; 1078 c.w = (cc.w - c.w) * lod4 + c.w; 1079 } 1080 1081 Int4 borderMask; 1082 1083 if(state.addressingModeU == ADDRESSING_BORDER) 1084 { 1085 Int4 u0; 1086 1087 border(u0, u); 1088 1089 borderMask = u0; 1090 } 1091 1092 if(state.addressingModeV == ADDRESSING_BORDER) 1093 { 1094 Int4 v0; 1095 1096 border(v0, v); 1097 1098 if(state.addressingModeU == ADDRESSING_BORDER) 1099 { 1100 borderMask &= v0; 1101 } 1102 else 1103 { 1104 borderMask = v0; 1105 } 1106 } 1107 1108 if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D) 1109 { 1110 Int4 s0; 1111 1112 border(s0, w); 1113 1114 if(state.addressingModeU == ADDRESSING_BORDER || 1115 state.addressingModeV == ADDRESSING_BORDER) 1116 { 1117 borderMask &= s0; 1118 } 1119 else 1120 { 1121 borderMask = s0; 1122 } 1123 } 1124 1125 if(state.addressingModeU == ADDRESSING_BORDER || 1126 state.addressingModeV == ADDRESSING_BORDER || 1127 (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)) 1128 { 1129 Int4 b; 1130 1131 c.x = As<Float4>((borderMask & As<Int4>(c.x)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0])))); 1132 c.y = As<Float4>((borderMask & As<Int4>(c.y)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1])))); 1133 c.z = As<Float4>((borderMask & As<Int4>(c.z)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2])))); 1134 c.w = As<Float4>((borderMask & As<Int4>(c.w)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3])))); 1135 } 1136 1137 return c; 1138 } 1139 sampleFloatAniso(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],bool secondLOD,SamplerFunction function)1140 Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function) 1141 { 1142 Vector4f c; 1143 1144 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch) 1145 { 1146 c = sampleFloat(texture, u, v, w, q, offset, lod, face, secondLOD, function); 1147 } 1148 else 1149 { 1150 Int a = RoundInt(anisotropy); 1151 1152 Vector4f cSum; 1153 1154 cSum.x = Float4(0.0f); 1155 cSum.y = Float4(0.0f); 1156 cSum.z = Float4(0.0f); 1157 cSum.w = Float4(0.0f); 1158 1159 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); 1160 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); 1161 1162 Float4 du = uDelta; 1163 Float4 dv = vDelta; 1164 1165 Float4 u0 = u + B * du; 1166 Float4 v0 = v + B * dv; 1167 1168 du *= A; 1169 dv *= A; 1170 1171 Int i = 0; 1172 1173 Do 1174 { 1175 c = sampleFloat(texture, u0, v0, w, q, offset, lod, face, secondLOD, function); 1176 1177 u0 += du; 1178 v0 += dv; 1179 1180 cSum.x += c.x * A; 1181 cSum.y += c.y * A; 1182 cSum.z += c.z * A; 1183 cSum.w += c.w * A; 1184 1185 i++; 1186 } 1187 Until(i >= a); 1188 1189 c.x = cSum.x; 1190 c.y = cSum.y; 1191 c.z = cSum.z; 1192 c.w = cSum.w; 1193 } 1194 1195 return c; 1196 } 1197 sampleFloat(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)1198 Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 1199 { 1200 if(state.textureType != TEXTURE_3D) 1201 { 1202 return sampleFloat2D(texture, u, v, w, q, offset, lod, face, secondLOD, function); 1203 } 1204 else 1205 { 1206 return sampleFloat3D(texture, u, v, w, offset, lod, secondLOD, function); 1207 } 1208 } 1209 sampleFloat2D(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)1210 Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 1211 { 1212 Vector4f c; 1213 1214 int componentCount = textureComponentCount(); 1215 bool gather = state.textureFilter == FILTER_GATHER; 1216 1217 Pointer<Byte> mipmap; 1218 Pointer<Byte> buffer[4]; 1219 1220 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 1221 1222 Int4 x0, x1, y0, y1, z0; 1223 Float4 fu, fv; 1224 Int4 filter = computeFilterOffset(lod); 1225 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function); 1226 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function); 1227 address(w, z0, z0, fv, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function); 1228 1229 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16); 1230 y0 *= pitchP; 1231 if(hasThirdCoordinate()) 1232 { 1233 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16); 1234 z0 *= sliceP; 1235 } 1236 1237 if(state.textureFilter == FILTER_POINT || (function == Fetch)) 1238 { 1239 c = sampleTexel(x0, y0, z0, q, mipmap, buffer, function); 1240 } 1241 else 1242 { 1243 y1 *= pitchP; 1244 1245 Vector4f c0 = sampleTexel(x0, y0, z0, q, mipmap, buffer, function); 1246 Vector4f c1 = sampleTexel(x1, y0, z0, q, mipmap, buffer, function); 1247 Vector4f c2 = sampleTexel(x0, y1, z0, q, mipmap, buffer, function); 1248 Vector4f c3 = sampleTexel(x1, y1, z0, q, mipmap, buffer, function); 1249 1250 if(!gather) // Blend 1251 { 1252 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x); 1253 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y); 1254 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z); 1255 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w); 1256 1257 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x); 1258 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y); 1259 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z); 1260 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w); 1261 1262 if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x); 1263 if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y); 1264 if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z); 1265 if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w); 1266 } 1267 else 1268 { 1269 c.x = c1.x; 1270 c.y = c2.x; 1271 c.z = c3.x; 1272 c.w = c0.x; 1273 } 1274 } 1275 1276 return c; 1277 } 1278 sampleFloat3D(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,bool secondLOD,SamplerFunction function)1279 Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function) 1280 { 1281 Vector4f c; 1282 1283 int componentCount = textureComponentCount(); 1284 1285 Pointer<Byte> mipmap; 1286 Pointer<Byte> buffer[4]; 1287 Int face[4]; 1288 1289 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 1290 1291 Int4 x0, x1, y0, y1, z0, z1; 1292 Float4 fu, fv, fw; 1293 Int4 filter = computeFilterOffset(lod); 1294 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function); 1295 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function); 1296 address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function); 1297 1298 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16); 1299 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16); 1300 y0 *= pitchP; 1301 z0 *= sliceP; 1302 1303 if(state.textureFilter == FILTER_POINT || (function == Fetch)) 1304 { 1305 c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); 1306 } 1307 else 1308 { 1309 y1 *= pitchP; 1310 z1 *= sliceP; 1311 1312 Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); 1313 Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function); 1314 Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function); 1315 Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function); 1316 Vector4f c4 = sampleTexel(x0, y0, z1, w, mipmap, buffer, function); 1317 Vector4f c5 = sampleTexel(x1, y0, z1, w, mipmap, buffer, function); 1318 Vector4f c6 = sampleTexel(x0, y1, z1, w, mipmap, buffer, function); 1319 Vector4f c7 = sampleTexel(x1, y1, z1, w, mipmap, buffer, function); 1320 1321 // Blend first slice 1322 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x); 1323 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y); 1324 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z); 1325 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w); 1326 1327 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x); 1328 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y); 1329 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z); 1330 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w); 1331 1332 if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x); 1333 if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y); 1334 if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z); 1335 if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w); 1336 1337 // Blend second slice 1338 if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x); 1339 if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y); 1340 if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z); 1341 if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w); 1342 1343 if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x); 1344 if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y); 1345 if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z); 1346 if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w); 1347 1348 if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x); 1349 if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y); 1350 if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z); 1351 if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w); 1352 1353 // Blend slices 1354 if(componentCount >= 1) c.x = c0.x + fw * (c4.x - c0.x); 1355 if(componentCount >= 2) c.y = c0.y + fw * (c4.y - c0.y); 1356 if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z); 1357 if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w); 1358 } 1359 1360 return c; 1361 } 1362 log2sqrt(Float lod)1363 Float SamplerCore::log2sqrt(Float lod) 1364 { 1365 // log2(sqrt(lod)) // Equals 0.25 * log2(lod^2). 1366 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision. 1367 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias. 1368 lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length). 1369 1370 return lod; 1371 } 1372 log2(Float lod)1373 Float SamplerCore::log2(Float lod) 1374 { 1375 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision. 1376 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias. 1377 lod *= As<Float>(Int(0x33800000)); // Scale by 0.5 * 2^-23 (mantissa length). 1378 1379 return lod; 1380 } 1381 computeLod(Pointer<Byte> & texture,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Float4 & uuuu,Float4 & vvvv,const Float & lodBias,Vector4f & dsx,Vector4f & dsy,SamplerFunction function)1382 void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function) 1383 { 1384 if(function != Lod && function != Fetch) 1385 { 1386 Float4 duvdxy; 1387 1388 if(function != Grad) // Implicit 1389 { 1390 duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx); 1391 } 1392 else 1393 { 1394 Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx); 1395 Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx); 1396 1397 duvdxy = Float4(dudxy.xz, dvdxy.xz); 1398 } 1399 1400 // Scale by texture dimensions and global LOD. 1401 Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthHeightLOD)); 1402 1403 Float4 dUV2dxy = dUVdxy * dUVdxy; 1404 Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw; 1405 1406 lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis 1407 1408 if(state.textureFilter == FILTER_ANISOTROPIC) 1409 { 1410 Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z)); 1411 1412 Float4 dudx = duvdxy.xxxx; 1413 Float4 dudy = duvdxy.yyyy; 1414 Float4 dvdx = duvdxy.zzzz; 1415 Float4 dvdy = duvdxy.wwww; 1416 1417 Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y)); 1418 uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask))); 1419 vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask))); 1420 1421 anisotropy = lod * Rcp_pp(det); 1422 anisotropy = Min(anisotropy, *Pointer<Float>(texture + OFFSET(Texture,maxAnisotropy))); 1423 1424 lod *= Rcp_pp(anisotropy * anisotropy); 1425 } 1426 1427 lod = log2sqrt(lod); // log2(sqrt(lod)) 1428 1429 if(function == Bias) 1430 { 1431 lod += lodBias; 1432 } 1433 } 1434 else if(function == Lod) 1435 { 1436 lod = lodBias; 1437 } 1438 else if(function == Fetch) 1439 { 1440 // TODO: Eliminate int-float-int conversion. 1441 lod = Float(As<Int>(lodBias)); 1442 } 1443 else if(function == Base) 1444 { 1445 lod = Float(0); 1446 } 1447 else assert(false); 1448 1449 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); 1450 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); 1451 } 1452 computeLodCube(Pointer<Byte> & texture,Float & lod,Float4 & u,Float4 & v,Float4 & w,const Float & lodBias,Vector4f & dsx,Vector4f & dsy,Float4 & M,SamplerFunction function)1453 void SamplerCore::computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function) 1454 { 1455 if(function != Lod && function != Fetch) 1456 { 1457 Float4 dudxy, dvdxy, dsdxy; 1458 1459 if(function != Grad) // Implicit 1460 { 1461 Float4 U = u * M; 1462 Float4 V = v * M; 1463 Float4 W = w * M; 1464 1465 dudxy = Abs(U - U.xxxx); 1466 dvdxy = Abs(V - V.xxxx); 1467 dsdxy = Abs(W - W.xxxx); 1468 } 1469 else 1470 { 1471 dudxy = Float4(dsx.x.xx, dsy.x.xx); 1472 dvdxy = Float4(dsx.y.xx, dsy.y.xx); 1473 dsdxy = Float4(dsx.z.xx, dsy.z.xx); 1474 1475 dudxy = Abs(dudxy * Float4(M.x)); 1476 dvdxy = Abs(dvdxy * Float4(M.x)); 1477 dsdxy = Abs(dsdxy * Float4(M.x)); 1478 } 1479 1480 // Compute the largest Manhattan distance in two dimensions. 1481 // This takes the footprint across adjacent faces into account. 1482 Float4 duvdxy = dudxy + dvdxy; 1483 Float4 dusdxy = dudxy + dsdxy; 1484 Float4 dvsdxy = dvdxy + dsdxy; 1485 1486 dudxy = Max(Max(duvdxy, dusdxy), dvsdxy); 1487 1488 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z); 1489 1490 // Scale by texture dimension and global LOD. 1491 lod *= *Pointer<Float>(texture + OFFSET(Texture,widthLOD)); 1492 1493 lod = log2(lod); 1494 1495 if(function == Bias) 1496 { 1497 lod += lodBias; 1498 } 1499 } 1500 else if(function == Lod) 1501 { 1502 lod = lodBias; 1503 } 1504 else if(function == Fetch) 1505 { 1506 // TODO: Eliminate int-float-int conversion. 1507 lod = Float(As<Int>(lodBias)); 1508 } 1509 else if(function == Base) 1510 { 1511 lod = Float(0); 1512 } 1513 else assert(false); 1514 1515 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); 1516 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); 1517 } 1518 computeLod3D(Pointer<Byte> & texture,Float & lod,Float4 & uuuu,Float4 & vvvv,Float4 & wwww,const Float & lodBias,Vector4f & dsx,Vector4f & dsy,SamplerFunction function)1519 void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function) 1520 { 1521 if(function != Lod && function != Fetch) 1522 { 1523 Float4 dudxy, dvdxy, dsdxy; 1524 1525 if(function != Grad) // Implicit 1526 { 1527 dudxy = uuuu - uuuu.xxxx; 1528 dvdxy = vvvv - vvvv.xxxx; 1529 dsdxy = wwww - wwww.xxxx; 1530 } 1531 else 1532 { 1533 dudxy = Float4(dsx.x.xx, dsy.x.xx); 1534 dvdxy = Float4(dsx.y.xx, dsy.y.xx); 1535 dsdxy = Float4(dsx.z.xx, dsy.z.xx); 1536 } 1537 1538 // Scale by texture dimensions and global LOD. 1539 dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD)); 1540 dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,heightLOD)); 1541 dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,depthLOD)); 1542 1543 dudxy *= dudxy; 1544 dvdxy *= dvdxy; 1545 dsdxy *= dsdxy; 1546 1547 dudxy += dvdxy; 1548 dudxy += dsdxy; 1549 1550 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z); 1551 1552 lod = log2sqrt(lod); // log2(sqrt(lod)) 1553 1554 if(function == Bias) 1555 { 1556 lod += lodBias; 1557 } 1558 } 1559 else if(function == Lod) 1560 { 1561 lod = lodBias; 1562 } 1563 else if(function == Fetch) 1564 { 1565 // TODO: Eliminate int-float-int conversion. 1566 lod = Float(As<Int>(lodBias)); 1567 } 1568 else if(function == Base) 1569 { 1570 lod = Float(0); 1571 } 1572 else assert(false); 1573 1574 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); 1575 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); 1576 } 1577 cubeFace(Int face[4],Float4 & U,Float4 & V,Float4 & x,Float4 & y,Float4 & z,Float4 & M)1578 void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M) 1579 { 1580 Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0 1581 Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0 1582 Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0 1583 1584 Float4 absX = Abs(x); 1585 Float4 absY = Abs(y); 1586 Float4 absZ = Abs(z); 1587 1588 Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y) 1589 Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z) 1590 Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x) 1591 Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z) 1592 Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x) 1593 Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y) 1594 1595 // FACE_POSITIVE_X = 000b 1596 // FACE_NEGATIVE_X = 001b 1597 // FACE_POSITIVE_Y = 010b 1598 // FACE_NEGATIVE_Y = 011b 1599 // FACE_POSITIVE_Z = 100b 1600 // FACE_NEGATIVE_Z = 101b 1601 1602 Int yAxis = SignMask(yMajor); 1603 Int zAxis = SignMask(zMajor); 1604 1605 Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000); 1606 Int negative = SignMask(n); 1607 1608 face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4); 1609 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4); 1610 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4); 1611 face[1] = (face[0] >> 4) & 0x7; 1612 face[2] = (face[0] >> 8) & 0x7; 1613 face[3] = (face[0] >> 12) & 0x7; 1614 face[0] &= 0x7; 1615 1616 M = Max(Max(absX, absY), absZ); 1617 1618 // U = xMajor ? (neg ^ -z) : ((zMajor & neg) ^ x) 1619 U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x)))); 1620 1621 // V = !yMajor ? -y : (n ^ z) 1622 V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z)))); 1623 1624 M = reciprocal(M) * Float4(0.5f); 1625 U = U * M + Float4(0.5f); 1626 V = V * M + Float4(0.5f); 1627 } 1628 applyOffset(Short4 & uvw,Float4 & offset,const Int4 & whd,AddressingMode mode)1629 Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode) 1630 { 1631 Int4 tmp = Int4(As<UShort4>(uvw)); 1632 tmp = tmp + As<Int4>(offset); 1633 1634 switch(mode) 1635 { 1636 case AddressingMode::ADDRESSING_WRAP: 1637 tmp = (tmp + whd * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % whd; 1638 break; 1639 case AddressingMode::ADDRESSING_CLAMP: 1640 case AddressingMode::ADDRESSING_MIRROR: 1641 case AddressingMode::ADDRESSING_MIRRORONCE: 1642 case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER 1643 tmp = Min(Max(tmp, Int4(0)), whd - Int4(1)); 1644 break; 1645 case ADDRESSING_TEXELFETCH: 1646 break; 1647 case AddressingMode::ADDRESSING_SEAMLESS: 1648 ASSERT(false); // Cube sampling doesn't support offset. 1649 default: 1650 ASSERT(false); 1651 } 1652 1653 return As<Short4>(UShort4(tmp)); 1654 } 1655 computeIndices(UInt index[4],Short4 uuuu,Short4 vvvv,Short4 wwww,Vector4f & offset,const Pointer<Byte> & mipmap,SamplerFunction function)1656 void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function) 1657 { 1658 bool texelFetch = (function == Fetch); 1659 bool hasOffset = (function.option == Offset); 1660 1661 if(!texelFetch) 1662 { 1663 uuuu = MulHigh(As<UShort4>(uuuu), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width))); 1664 vvvv = MulHigh(As<UShort4>(vvvv), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height))); 1665 } 1666 1667 if(hasOffset) 1668 { 1669 UShort4 w = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width)); 1670 uuuu = applyOffset(uuuu, offset.x, Int4(w), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU); 1671 UShort4 h = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height)); 1672 vvvv = applyOffset(vvvv, offset.y, Int4(h), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV); 1673 } 1674 1675 Short4 uuu2 = uuuu; 1676 uuuu = As<Short4>(UnpackLow(uuuu, vvvv)); 1677 uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv)); 1678 uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP)))); 1679 uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP)))); 1680 1681 if(hasThirdCoordinate()) 1682 { 1683 if(state.textureType != TEXTURE_2D_ARRAY) 1684 { 1685 if(!texelFetch) 1686 { 1687 wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth))); 1688 } 1689 1690 if(hasOffset) 1691 { 1692 UShort4 d = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth)); 1693 wwww = applyOffset(wwww, offset.z, Int4(d), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeW); 1694 } 1695 } 1696 1697 UInt4 uv(As<UInt2>(uuuu), As<UInt2>(uuu2)); 1698 uv += As<UInt4>(Int4(As<UShort4>(wwww))) * *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sliceP)); 1699 1700 index[0] = Extract(As<Int4>(uv), 0); 1701 index[1] = Extract(As<Int4>(uv), 1); 1702 index[2] = Extract(As<Int4>(uv), 2); 1703 index[3] = Extract(As<Int4>(uv), 3); 1704 } 1705 else 1706 { 1707 index[0] = Extract(As<Int2>(uuuu), 0); 1708 index[1] = Extract(As<Int2>(uuuu), 1); 1709 index[2] = Extract(As<Int2>(uuu2), 0); 1710 index[3] = Extract(As<Int2>(uuu2), 1); 1711 } 1712 1713 if(texelFetch) 1714 { 1715 Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP))); 1716 if(hasThirdCoordinate()) 1717 { 1718 size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth))); 1719 } 1720 UInt min = 0; 1721 UInt max = size - 1; 1722 1723 for(int i = 0; i < 4; i++) 1724 { 1725 index[i] = Min(Max(index[i], min), max); 1726 } 1727 } 1728 } 1729 computeIndices(UInt index[4],Int4 & uuuu,Int4 & vvvv,Int4 & wwww,const Pointer<Byte> & mipmap,SamplerFunction function)1730 void SamplerCore::computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function) 1731 { 1732 UInt4 indices = uuuu + vvvv; 1733 1734 if(hasThirdCoordinate()) 1735 { 1736 indices += As<UInt4>(wwww); 1737 } 1738 1739 for(int i = 0; i < 4; i++) 1740 { 1741 index[i] = Extract(As<Int4>(indices), i); 1742 } 1743 } 1744 sampleTexel(UInt index[4],Pointer<Byte> buffer[4])1745 Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4]) 1746 { 1747 Vector4s c; 1748 1749 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0; 1750 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0; 1751 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0; 1752 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0; 1753 1754 if(has16bitTextureFormat()) 1755 { 1756 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0); 1757 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1); 1758 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2); 1759 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3); 1760 1761 switch(state.textureFormat) 1762 { 1763 case FORMAT_R5G6B5: 1764 c.z = (c.x & Short4(0x001Fu)) << 11; 1765 c.y = (c.x & Short4(0x07E0u)) << 5; 1766 c.x = (c.x & Short4(0xF800u)); 1767 break; 1768 default: 1769 ASSERT(false); 1770 } 1771 } 1772 else if(has8bitTextureComponents()) 1773 { 1774 switch(textureComponentCount()) 1775 { 1776 case 4: 1777 { 1778 Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]]; 1779 Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]]; 1780 Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]]; 1781 Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]]; 1782 c.x = Unpack(c0, c1); 1783 c.y = Unpack(c2, c3); 1784 1785 switch(state.textureFormat) 1786 { 1787 case FORMAT_A8R8G8B8: 1788 c.z = As<Short4>(UnpackLow(c.x, c.y)); 1789 c.x = As<Short4>(UnpackHigh(c.x, c.y)); 1790 c.y = c.z; 1791 c.w = c.x; 1792 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1793 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1794 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1795 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w)); 1796 break; 1797 case FORMAT_A8B8G8R8: 1798 case FORMAT_A8B8G8R8I: 1799 case FORMAT_A8B8G8R8_SNORM: 1800 case FORMAT_Q8W8V8U8: 1801 case FORMAT_SRGB8_A8: 1802 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1803 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1804 c.y = c.x; 1805 c.w = c.z; 1806 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1807 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1808 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1809 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w)); 1810 // Propagate sign bit 1811 if(state.textureFormat == FORMAT_A8B8G8R8I) 1812 { 1813 c.x >>= 8; 1814 c.y >>= 8; 1815 c.z >>= 8; 1816 c.w >>= 8; 1817 } 1818 break; 1819 case FORMAT_A8B8G8R8UI: 1820 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1821 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1822 c.y = c.x; 1823 c.w = c.z; 1824 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0))); 1825 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0))); 1826 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0))); 1827 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0))); 1828 break; 1829 default: 1830 ASSERT(false); 1831 } 1832 } 1833 break; 1834 case 3: 1835 { 1836 Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]]; 1837 Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]]; 1838 Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]]; 1839 Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]]; 1840 c.x = Unpack(c0, c1); 1841 c.y = Unpack(c2, c3); 1842 1843 switch(state.textureFormat) 1844 { 1845 case FORMAT_X8R8G8B8: 1846 c.z = As<Short4>(UnpackLow(c.x, c.y)); 1847 c.x = As<Short4>(UnpackHigh(c.x, c.y)); 1848 c.y = c.z; 1849 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1850 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1851 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1852 break; 1853 case FORMAT_X8B8G8R8_SNORM: 1854 case FORMAT_X8B8G8R8I: 1855 case FORMAT_X8B8G8R8: 1856 case FORMAT_X8L8V8U8: 1857 case FORMAT_SRGB8_X8: 1858 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1859 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1860 c.y = c.x; 1861 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1862 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1863 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1864 // Propagate sign bit 1865 if(state.textureFormat == FORMAT_X8B8G8R8I) 1866 { 1867 c.x >>= 8; 1868 c.y >>= 8; 1869 c.z >>= 8; 1870 } 1871 break; 1872 case FORMAT_X8B8G8R8UI: 1873 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1874 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1875 c.y = c.x; 1876 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0))); 1877 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0))); 1878 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0))); 1879 break; 1880 default: 1881 ASSERT(false); 1882 } 1883 } 1884 break; 1885 case 2: 1886 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0); 1887 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1); 1888 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2); 1889 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3); 1890 1891 switch(state.textureFormat) 1892 { 1893 case FORMAT_G8R8: 1894 case FORMAT_G8R8_SNORM: 1895 case FORMAT_V8U8: 1896 case FORMAT_A8L8: 1897 c.y = (c.x & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8); 1898 c.x = (c.x & Short4(0x00FFu)) | (c.x << 8); 1899 break; 1900 case FORMAT_G8R8I: 1901 c.y = c.x >> 8; 1902 c.x = (c.x << 8) >> 8; // Propagate sign bit 1903 break; 1904 case FORMAT_G8R8UI: 1905 c.y = As<Short4>(As<UShort4>(c.x) >> 8); 1906 c.x &= Short4(0x00FFu); 1907 break; 1908 default: 1909 ASSERT(false); 1910 } 1911 break; 1912 case 1: 1913 { 1914 Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0])); 1915 Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1])); 1916 Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2])); 1917 Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3])); 1918 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb 1919 1920 switch(state.textureFormat) 1921 { 1922 case FORMAT_R8I: 1923 case FORMAT_R8UI: 1924 { 1925 Int zero(0); 1926 c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero)); 1927 // Propagate sign bit 1928 if(state.textureFormat == FORMAT_R8I) 1929 { 1930 c.x = (c.x << 8) >> 8; 1931 } 1932 } 1933 break; 1934 default: 1935 c.x = Unpack(As<Byte4>(c0)); 1936 break; 1937 } 1938 } 1939 break; 1940 default: 1941 ASSERT(false); 1942 } 1943 } 1944 else if(has16bitTextureComponents()) 1945 { 1946 switch(textureComponentCount()) 1947 { 1948 case 4: 1949 c.x = Pointer<Short4>(buffer[f0])[index[0]]; 1950 c.y = Pointer<Short4>(buffer[f1])[index[1]]; 1951 c.z = Pointer<Short4>(buffer[f2])[index[2]]; 1952 c.w = Pointer<Short4>(buffer[f3])[index[3]]; 1953 transpose4x4(c.x, c.y, c.z, c.w); 1954 break; 1955 case 3: 1956 c.x = Pointer<Short4>(buffer[f0])[index[0]]; 1957 c.y = Pointer<Short4>(buffer[f1])[index[1]]; 1958 c.z = Pointer<Short4>(buffer[f2])[index[2]]; 1959 c.w = Pointer<Short4>(buffer[f3])[index[3]]; 1960 transpose4x3(c.x, c.y, c.z, c.w); 1961 break; 1962 case 2: 1963 c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]); 1964 c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1]))); 1965 c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]); 1966 c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3]))); 1967 c.y = c.x; 1968 c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z)); 1969 c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z)); 1970 break; 1971 case 1: 1972 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0); 1973 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1); 1974 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2); 1975 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3); 1976 break; 1977 default: 1978 ASSERT(false); 1979 } 1980 } 1981 else ASSERT(false); 1982 1983 if(state.sRGB) 1984 { 1985 if(state.textureFormat == FORMAT_R5G6B5) 1986 { 1987 sRGBtoLinear16_5_16(c.x); 1988 sRGBtoLinear16_6_16(c.y); 1989 sRGBtoLinear16_5_16(c.z); 1990 } 1991 else 1992 { 1993 for(int i = 0; i < textureComponentCount(); i++) 1994 { 1995 if(isRGBComponent(i)) 1996 { 1997 sRGBtoLinear16_8_16(c[i]); 1998 } 1999 } 2000 } 2001 } 2002 2003 return c; 2004 } 2005 sampleTexel(Short4 & uuuu,Short4 & vvvv,Short4 & wwww,Vector4f & offset,Pointer<Byte> & mipmap,Pointer<Byte> buffer[4],SamplerFunction function)2006 Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function) 2007 { 2008 Vector4s c; 2009 2010 UInt index[4]; 2011 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function); 2012 2013 if(hasYuvFormat()) 2014 { 2015 // Generic YPbPr to RGB transformation 2016 // R = Y + 2 * (1 - Kr) * Pr 2017 // G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr 2018 // B = Y + 2 * (1 - Kb) * Pb 2019 2020 float Kb = 0.114f; 2021 float Kr = 0.299f; 2022 int studioSwing = 1; 2023 2024 switch(state.textureFormat) 2025 { 2026 case FORMAT_YV12_BT601: 2027 Kb = 0.114f; 2028 Kr = 0.299f; 2029 studioSwing = 1; 2030 break; 2031 case FORMAT_YV12_BT709: 2032 Kb = 0.0722f; 2033 Kr = 0.2126f; 2034 studioSwing = 1; 2035 break; 2036 case FORMAT_YV12_JFIF: 2037 Kb = 0.114f; 2038 Kr = 0.299f; 2039 studioSwing = 0; 2040 break; 2041 default: 2042 ASSERT(false); 2043 } 2044 2045 const float Kg = 1.0f - Kr - Kb; 2046 2047 const float Rr = 2 * (1 - Kr); 2048 const float Gb = -2 * Kb * (1 - Kb) / Kg; 2049 const float Gr = -2 * Kr * (1 - Kr) / Kg; 2050 const float Bb = 2 * (1 - Kb); 2051 2052 // Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240] 2053 const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f; 2054 const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f; 2055 const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f; 2056 2057 const float Rv = Vv * Rr; 2058 const float Gu = Uu * Gb; 2059 const float Gv = Vv * Gr; 2060 const float Bu = Uu * Bb; 2061 2062 const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255; 2063 const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255; 2064 const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255; 2065 2066 Int c0 = Int(buffer[0][index[0]]); 2067 Int c1 = Int(buffer[0][index[1]]); 2068 Int c2 = Int(buffer[0][index[2]]); 2069 Int c3 = Int(buffer[0][index[3]]); 2070 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb 2071 UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0))); 2072 2073 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function); 2074 c0 = Int(buffer[1][index[0]]); 2075 c1 = Int(buffer[1][index[1]]); 2076 c2 = Int(buffer[1][index[2]]); 2077 c3 = Int(buffer[1][index[3]]); 2078 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb 2079 UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0))); 2080 2081 c0 = Int(buffer[2][index[0]]); 2082 c1 = Int(buffer[2][index[1]]); 2083 c2 = Int(buffer[2][index[2]]); 2084 c3 = Int(buffer[2][index[3]]); 2085 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb 2086 UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0))); 2087 2088 const UShort4 yY = UShort4(iround(Yy * 0x4000)); 2089 const UShort4 rV = UShort4(iround(Rv * 0x4000)); 2090 const UShort4 gU = UShort4(iround(-Gu * 0x4000)); 2091 const UShort4 gV = UShort4(iround(-Gv * 0x4000)); 2092 const UShort4 bU = UShort4(iround(Bu * 0x4000)); 2093 2094 const UShort4 r0 = UShort4(iround(-R0 * 0x4000)); 2095 const UShort4 g0 = UShort4(iround(G0 * 0x4000)); 2096 const UShort4 b0 = UShort4(iround(-B0 * 0x4000)); 2097 2098 UShort4 y = MulHigh(Y, yY); 2099 UShort4 r = SubSat(y + MulHigh(V, rV), r0); 2100 UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV)); 2101 UShort4 b = SubSat(y + MulHigh(U, bU), b0); 2102 2103 c.x = Min(r, UShort4(0x3FFF)) << 2; 2104 c.y = Min(g, UShort4(0x3FFF)) << 2; 2105 c.z = Min(b, UShort4(0x3FFF)) << 2; 2106 } 2107 else 2108 { 2109 return sampleTexel(index, buffer); 2110 } 2111 2112 return c; 2113 } 2114 sampleTexel(Int4 & uuuu,Int4 & vvvv,Int4 & wwww,Float4 & z,Pointer<Byte> & mipmap,Pointer<Byte> buffer[4],SamplerFunction function)2115 Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function) 2116 { 2117 Vector4f c; 2118 2119 UInt index[4]; 2120 computeIndices(index, uuuu, vvvv, wwww, mipmap, function); 2121 2122 if(hasFloatTexture() || has32bitIntegerTextureComponents()) 2123 { 2124 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0; 2125 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0; 2126 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0; 2127 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0; 2128 2129 // Read texels 2130 switch(textureComponentCount()) 2131 { 2132 case 4: 2133 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); 2134 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); 2135 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); 2136 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); 2137 transpose4x4(c.x, c.y, c.z, c.w); 2138 break; 2139 case 3: 2140 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); 2141 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); 2142 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); 2143 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); 2144 transpose4x3(c.x, c.y, c.z, c.w); 2145 break; 2146 case 2: 2147 // FIXME: Optimal shuffling? 2148 c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8); 2149 c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8); 2150 c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8); 2151 c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8); 2152 c.y = c.x; 2153 c.x = Float4(c.x.xz, c.z.xz); 2154 c.y = Float4(c.y.yw, c.z.yw); 2155 break; 2156 case 1: 2157 // FIXME: Optimal shuffling? 2158 c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4); 2159 c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4); 2160 c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4); 2161 c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4); 2162 break; 2163 default: 2164 ASSERT(false); 2165 } 2166 2167 if(state.compare != COMPARE_BYPASS) 2168 { 2169 Float4 ref = z; 2170 2171 if(!hasFloatTexture()) 2172 { 2173 ref = Min(Max(ref, Float4(0.0f)), Float4(1.0f)); 2174 } 2175 2176 Int4 boolean; 2177 2178 switch(state.compare) 2179 { 2180 case COMPARE_LESSEQUAL: boolean = CmpLE(ref, c.x); break; 2181 case COMPARE_GREATEREQUAL: boolean = CmpNLT(ref, c.x); break; 2182 case COMPARE_LESS: boolean = CmpLT(ref, c.x); break; 2183 case COMPARE_GREATER: boolean = CmpNLE(ref, c.x); break; 2184 case COMPARE_EQUAL: boolean = CmpEQ(ref, c.x); break; 2185 case COMPARE_NOTEQUAL: boolean = CmpNEQ(ref, c.x); break; 2186 case COMPARE_ALWAYS: boolean = Int4(-1); break; 2187 case COMPARE_NEVER: boolean = Int4(0); break; 2188 default: ASSERT(false); 2189 } 2190 2191 c.x = As<Float4>(boolean & As<Int4>(Float4(1.0f))); 2192 c.y = Float4(0.0f); 2193 c.z = Float4(0.0f); 2194 c.w = Float4(1.0f); 2195 } 2196 } 2197 else 2198 { 2199 ASSERT(!hasYuvFormat()); 2200 2201 Vector4s cs = sampleTexel(index, buffer); 2202 2203 bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat); 2204 int componentCount = textureComponentCount(); 2205 for(int n = 0; n < componentCount; n++) 2206 { 2207 if(hasUnsignedTextureComponent(n)) 2208 { 2209 if(isInteger) 2210 { 2211 c[n] = As<Float4>(Int4(As<UShort4>(cs[n]))); 2212 } 2213 else 2214 { 2215 c[n] = Float4(As<UShort4>(cs[n])); 2216 } 2217 } 2218 else 2219 { 2220 if(isInteger) 2221 { 2222 c[n] = As<Float4>(Int4(cs[n])); 2223 } 2224 else 2225 { 2226 c[n] = Float4(cs[n]); 2227 } 2228 } 2229 } 2230 } 2231 2232 return c; 2233 } 2234 selectMipmap(Pointer<Byte> & texture,Pointer<Byte> buffer[4],Pointer<Byte> & mipmap,Float & lod,Int face[4],bool secondLOD)2235 void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD) 2236 { 2237 if(state.mipmapFilter == MIPMAP_NONE) 2238 { 2239 mipmap = texture + OFFSET(Texture,mipmap[0]); 2240 } 2241 else 2242 { 2243 Int ilod; 2244 2245 if(state.mipmapFilter == MIPMAP_POINT) 2246 { 2247 ilod = RoundInt(lod); 2248 } 2249 else // MIPMAP_LINEAR 2250 { 2251 ilod = Int(lod); 2252 } 2253 2254 mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap); 2255 } 2256 2257 if(state.textureType != TEXTURE_CUBE) 2258 { 2259 buffer[0] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[0])); 2260 2261 if(hasYuvFormat()) 2262 { 2263 buffer[1] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[1])); 2264 buffer[2] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[2])); 2265 } 2266 } 2267 else 2268 { 2269 for(int i = 0; i < 4; i++) 2270 { 2271 buffer[i] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*)); 2272 } 2273 } 2274 } 2275 computeFilterOffset(Float & lod)2276 Int4 SamplerCore::computeFilterOffset(Float &lod) 2277 { 2278 Int4 filter = -1; 2279 2280 if(state.textureFilter == FILTER_POINT) 2281 { 2282 filter = 0; 2283 } 2284 else if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT) 2285 { 2286 filter = CmpNLE(Float4(lod), Float4(0.0f)); 2287 } 2288 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR) 2289 { 2290 filter = CmpLE(Float4(lod), Float4(0.0f)); 2291 } 2292 2293 return filter; 2294 } 2295 address(Float4 & uw,AddressingMode addressingMode,Pointer<Byte> & mipmap)2296 Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap) 2297 { 2298 if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY) 2299 { 2300 return Short4(); // Unused 2301 } 2302 else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) 2303 { 2304 return Min(Max(Short4(RoundInt(uw)), Short4(0)), *Pointer<Short4>(mipmap + OFFSET(Mipmap, depth)) - Short4(1)); 2305 } 2306 else if(addressingMode == ADDRESSING_CLAMP || addressingMode == ADDRESSING_BORDER) 2307 { 2308 Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f)); 2309 2310 return Short4(Int4(clamp * Float4(1 << 16))); 2311 } 2312 else if(addressingMode == ADDRESSING_MIRROR) 2313 { 2314 Int4 convert = Int4(uw * Float4(1 << 16)); 2315 Int4 mirror = (convert << 15) >> 31; 2316 2317 convert ^= mirror; 2318 2319 return Short4(convert); 2320 } 2321 else if(addressingMode == ADDRESSING_MIRRORONCE) 2322 { 2323 // Absolute value 2324 Int4 convert = Int4(Abs(uw * Float4(1 << 16))); 2325 2326 // Clamp 2327 convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000); 2328 convert = As<Int4>(PackSigned(convert, convert)); 2329 2330 return As<Short4>(Int2(convert)) + Short4(0x8000u); 2331 } 2332 else // Wrap 2333 { 2334 return Short4(Int4(uw * Float4(1 << 16))); 2335 } 2336 } 2337 address(Float4 & uvw,Int4 & xyz0,Int4 & xyz1,Float4 & f,Pointer<Byte> & mipmap,Float4 & texOffset,Int4 & filter,int whd,AddressingMode addressingMode,SamplerFunction function)2338 void SamplerCore::address(Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function) 2339 { 2340 if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY) 2341 { 2342 return; // Unused 2343 } 2344 2345 Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16)); 2346 Int4 maxXYZ = dim - Int4(1); 2347 2348 if(function == Fetch) 2349 { 2350 xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ); 2351 } 2352 else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) // Note: Offset does not apply to array layers 2353 { 2354 xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ); 2355 } 2356 else 2357 { 2358 const int halfBits = 0x3EFFFFFF; // Value just under 0.5f 2359 const int oneBits = 0x3F7FFFFF; // Value just under 1.0f 2360 const int twoBits = 0x3FFFFFFF; // Value just under 2.0f 2361 2362 Float4 coord = uvw; 2363 2364 if(state.textureType == TEXTURE_RECTANGLE) 2365 { 2366 // According to https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_rectangle.txt 2367 // "CLAMP_TO_EDGE causes the s coordinate to be clamped to the range[0.5, wt - 0.5]. 2368 // CLAMP_TO_EDGE causes the t coordinate to be clamped to the range[0.5, ht - 0.5]." 2369 // Unless SwiftShader implements support for ADDRESSING_BORDER, other modes should be equivalent 2370 // to CLAMP_TO_EDGE. Rectangle textures have no support for any MIRROR or REPEAT modes. 2371 coord = Min(Max(coord, Float4(0.5f)), Float4(dim) - Float4(0.5f)); 2372 } 2373 else 2374 { 2375 switch(addressingMode) 2376 { 2377 case ADDRESSING_CLAMP: 2378 case ADDRESSING_BORDER: 2379 case ADDRESSING_SEAMLESS: 2380 { 2381 // While cube face coordinates are nominally already in the 2382 // [0, 1] range due to the projection, and numerical 2383 // imprecision is tolerated due to the border of pixels for 2384 // seamless filtering, this isn't true for inf and NaN 2385 // values. So we always clamp. 2386 Float4 one = As<Float4>(Int4(oneBits)); 2387 coord = Min(Max(coord, Float4(0.0f)), one); 2388 } 2389 break; 2390 case ADDRESSING_MIRROR: 2391 { 2392 Float4 half = As<Float4>(Int4(halfBits)); 2393 Float4 one = As<Float4>(Int4(oneBits)); 2394 Float4 two = As<Float4>(Int4(twoBits)); 2395 coord = one - Abs(two * Frac(coord * half) - one); 2396 } 2397 break; 2398 case ADDRESSING_MIRRORONCE: 2399 { 2400 Float4 half = As<Float4>(Int4(halfBits)); 2401 Float4 one = As<Float4>(Int4(oneBits)); 2402 Float4 two = As<Float4>(Int4(twoBits)); 2403 coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one); 2404 } 2405 break; 2406 default: // Wrap 2407 coord = Frac(coord); 2408 break; 2409 } 2410 2411 coord = coord * Float4(dim); 2412 } 2413 2414 if(state.textureFilter == FILTER_POINT || 2415 state.textureFilter == FILTER_GATHER) 2416 { 2417 xyz0 = Int4(coord); 2418 } 2419 else 2420 { 2421 if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR || 2422 state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT) 2423 { 2424 coord -= As<Float4>(As<Int4>(Float4(0.5f)) & filter); 2425 } 2426 else 2427 { 2428 coord -= Float4(0.5f); 2429 } 2430 2431 Float4 floor = Floor(coord); 2432 xyz0 = Int4(floor); 2433 f = coord - floor; 2434 } 2435 2436 if(function.option == Offset) 2437 { 2438 xyz0 += As<Int4>(texOffset); 2439 } 2440 2441 if(addressingMode == ADDRESSING_SEAMLESS) 2442 { 2443 xyz0 += Int4(1); 2444 } 2445 2446 xyz1 = xyz0 - filter; // Increment 2447 2448 if(function.option == Offset) 2449 { 2450 switch(addressingMode) 2451 { 2452 case ADDRESSING_SEAMLESS: 2453 ASSERT(false); // Cube sampling doesn't support offset. 2454 case ADDRESSING_MIRROR: 2455 case ADDRESSING_MIRRORONCE: 2456 case ADDRESSING_BORDER: 2457 // FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, and ADDRESSING_BORDER. 2458 // Fall through to Clamp. 2459 case ADDRESSING_CLAMP: 2460 xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ); 2461 xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ); 2462 break; 2463 default: // Wrap 2464 xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim; 2465 xyz1 = (xyz1 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim; 2466 break; 2467 } 2468 } 2469 else if(state.textureFilter != FILTER_POINT) 2470 { 2471 switch(addressingMode) 2472 { 2473 case ADDRESSING_SEAMLESS: 2474 break; 2475 case ADDRESSING_MIRROR: 2476 case ADDRESSING_MIRRORONCE: 2477 case ADDRESSING_BORDER: 2478 case ADDRESSING_CLAMP: 2479 xyz0 = Max(xyz0, Int4(0)); 2480 xyz1 = Min(xyz1, maxXYZ); 2481 break; 2482 default: // Wrap 2483 { 2484 Int4 under = CmpLT(xyz0, Int4(0)); 2485 xyz0 = (under & maxXYZ) | (~under & xyz0); // xyz < 0 ? dim - 1 : xyz // TODO: IfThenElse() 2486 2487 Int4 nover = CmpLT(xyz1, dim); 2488 xyz1 = nover & xyz1; // xyz >= dim ? 0 : xyz 2489 } 2490 break; 2491 } 2492 } 2493 } 2494 } 2495 convertFixed12(Short4 & cs,Float4 & cf)2496 void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf) 2497 { 2498 cs = RoundShort4(cf * Float4(0x1000)); 2499 } 2500 convertFixed12(Vector4s & cs,Vector4f & cf)2501 void SamplerCore::convertFixed12(Vector4s &cs, Vector4f &cf) 2502 { 2503 convertFixed12(cs.x, cf.x); 2504 convertFixed12(cs.y, cf.y); 2505 convertFixed12(cs.z, cf.z); 2506 convertFixed12(cs.w, cf.w); 2507 } 2508 convertSigned12(Float4 & cf,Short4 & cs)2509 void SamplerCore::convertSigned12(Float4 &cf, Short4 &cs) 2510 { 2511 cf = Float4(cs) * Float4(1.0f / 0x0FFE); 2512 } 2513 2514 // void SamplerCore::convertSigned12(Vector4f &cf, Vector4s &cs) 2515 // { 2516 // convertSigned12(cf.x, cs.x); 2517 // convertSigned12(cf.y, cs.y); 2518 // convertSigned12(cf.z, cs.z); 2519 // convertSigned12(cf.w, cs.w); 2520 // } 2521 convertSigned15(Float4 & cf,Short4 & cs)2522 void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs) 2523 { 2524 cf = Float4(cs) * Float4(1.0f / 0x7FFF); 2525 } 2526 convertUnsigned16(Float4 & cf,Short4 & cs)2527 void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs) 2528 { 2529 cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF); 2530 } 2531 sRGBtoLinear16_8_16(Short4 & c)2532 void SamplerCore::sRGBtoLinear16_8_16(Short4 &c) 2533 { 2534 c = As<UShort4>(c) >> 8; 2535 2536 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_16)); 2537 2538 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); 2539 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); 2540 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2); 2541 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3); 2542 } 2543 sRGBtoLinear16_6_16(Short4 & c)2544 void SamplerCore::sRGBtoLinear16_6_16(Short4 &c) 2545 { 2546 c = As<UShort4>(c) >> 10; 2547 2548 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_16)); 2549 2550 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); 2551 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); 2552 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2); 2553 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3); 2554 } 2555 sRGBtoLinear16_5_16(Short4 & c)2556 void SamplerCore::sRGBtoLinear16_5_16(Short4 &c) 2557 { 2558 c = As<UShort4>(c) >> 11; 2559 2560 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_16)); 2561 2562 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); 2563 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); 2564 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2); 2565 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3); 2566 } 2567 hasFloatTexture() const2568 bool SamplerCore::hasFloatTexture() const 2569 { 2570 return Surface::isFloatFormat(state.textureFormat); 2571 } 2572 hasUnnormalizedIntegerTexture() const2573 bool SamplerCore::hasUnnormalizedIntegerTexture() const 2574 { 2575 return Surface::isNonNormalizedInteger(state.textureFormat); 2576 } 2577 hasUnsignedTextureComponent(int component) const2578 bool SamplerCore::hasUnsignedTextureComponent(int component) const 2579 { 2580 return Surface::isUnsignedComponent(state.textureFormat, component); 2581 } 2582 textureComponentCount() const2583 int SamplerCore::textureComponentCount() const 2584 { 2585 return Surface::componentCount(state.textureFormat); 2586 } 2587 hasThirdCoordinate() const2588 bool SamplerCore::hasThirdCoordinate() const 2589 { 2590 return (state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY); 2591 } 2592 has16bitTextureFormat() const2593 bool SamplerCore::has16bitTextureFormat() const 2594 { 2595 switch(state.textureFormat) 2596 { 2597 case FORMAT_R5G6B5: 2598 return true; 2599 case FORMAT_R8_SNORM: 2600 case FORMAT_G8R8_SNORM: 2601 case FORMAT_X8B8G8R8_SNORM: 2602 case FORMAT_A8B8G8R8_SNORM: 2603 case FORMAT_R8I: 2604 case FORMAT_R8UI: 2605 case FORMAT_G8R8I: 2606 case FORMAT_G8R8UI: 2607 case FORMAT_X8B8G8R8I: 2608 case FORMAT_X8B8G8R8UI: 2609 case FORMAT_A8B8G8R8I: 2610 case FORMAT_A8B8G8R8UI: 2611 case FORMAT_R32I: 2612 case FORMAT_R32UI: 2613 case FORMAT_G32R32I: 2614 case FORMAT_G32R32UI: 2615 case FORMAT_X32B32G32R32I: 2616 case FORMAT_X32B32G32R32UI: 2617 case FORMAT_A32B32G32R32I: 2618 case FORMAT_A32B32G32R32UI: 2619 case FORMAT_G8R8: 2620 case FORMAT_X8R8G8B8: 2621 case FORMAT_X8B8G8R8: 2622 case FORMAT_A8R8G8B8: 2623 case FORMAT_A8B8G8R8: 2624 case FORMAT_SRGB8_X8: 2625 case FORMAT_SRGB8_A8: 2626 case FORMAT_V8U8: 2627 case FORMAT_Q8W8V8U8: 2628 case FORMAT_X8L8V8U8: 2629 case FORMAT_R32F: 2630 case FORMAT_G32R32F: 2631 case FORMAT_X32B32G32R32F: 2632 case FORMAT_A32B32G32R32F: 2633 case FORMAT_X32B32G32R32F_UNSIGNED: 2634 case FORMAT_A8: 2635 case FORMAT_R8: 2636 case FORMAT_L8: 2637 case FORMAT_A8L8: 2638 case FORMAT_D32F_LOCKABLE: 2639 case FORMAT_D32FS8_TEXTURE: 2640 case FORMAT_D32F_SHADOW: 2641 case FORMAT_D32FS8_SHADOW: 2642 case FORMAT_L16: 2643 case FORMAT_G16R16: 2644 case FORMAT_A16B16G16R16: 2645 case FORMAT_V16U16: 2646 case FORMAT_A16W16V16U16: 2647 case FORMAT_Q16W16V16U16: 2648 case FORMAT_R16I: 2649 case FORMAT_R16UI: 2650 case FORMAT_G16R16I: 2651 case FORMAT_G16R16UI: 2652 case FORMAT_X16B16G16R16I: 2653 case FORMAT_X16B16G16R16UI: 2654 case FORMAT_A16B16G16R16I: 2655 case FORMAT_A16B16G16R16UI: 2656 case FORMAT_YV12_BT601: 2657 case FORMAT_YV12_BT709: 2658 case FORMAT_YV12_JFIF: 2659 return false; 2660 default: 2661 ASSERT(false); 2662 } 2663 2664 return false; 2665 } 2666 has8bitTextureComponents() const2667 bool SamplerCore::has8bitTextureComponents() const 2668 { 2669 switch(state.textureFormat) 2670 { 2671 case FORMAT_G8R8: 2672 case FORMAT_X8R8G8B8: 2673 case FORMAT_X8B8G8R8: 2674 case FORMAT_A8R8G8B8: 2675 case FORMAT_A8B8G8R8: 2676 case FORMAT_SRGB8_X8: 2677 case FORMAT_SRGB8_A8: 2678 case FORMAT_V8U8: 2679 case FORMAT_Q8W8V8U8: 2680 case FORMAT_X8L8V8U8: 2681 case FORMAT_A8: 2682 case FORMAT_R8: 2683 case FORMAT_L8: 2684 case FORMAT_A8L8: 2685 case FORMAT_R8_SNORM: 2686 case FORMAT_G8R8_SNORM: 2687 case FORMAT_X8B8G8R8_SNORM: 2688 case FORMAT_A8B8G8R8_SNORM: 2689 case FORMAT_R8I: 2690 case FORMAT_R8UI: 2691 case FORMAT_G8R8I: 2692 case FORMAT_G8R8UI: 2693 case FORMAT_X8B8G8R8I: 2694 case FORMAT_X8B8G8R8UI: 2695 case FORMAT_A8B8G8R8I: 2696 case FORMAT_A8B8G8R8UI: 2697 return true; 2698 case FORMAT_R5G6B5: 2699 case FORMAT_R32F: 2700 case FORMAT_G32R32F: 2701 case FORMAT_X32B32G32R32F: 2702 case FORMAT_A32B32G32R32F: 2703 case FORMAT_X32B32G32R32F_UNSIGNED: 2704 case FORMAT_D32F_LOCKABLE: 2705 case FORMAT_D32FS8_TEXTURE: 2706 case FORMAT_D32F_SHADOW: 2707 case FORMAT_D32FS8_SHADOW: 2708 case FORMAT_L16: 2709 case FORMAT_G16R16: 2710 case FORMAT_A16B16G16R16: 2711 case FORMAT_V16U16: 2712 case FORMAT_A16W16V16U16: 2713 case FORMAT_Q16W16V16U16: 2714 case FORMAT_R32I: 2715 case FORMAT_R32UI: 2716 case FORMAT_G32R32I: 2717 case FORMAT_G32R32UI: 2718 case FORMAT_X32B32G32R32I: 2719 case FORMAT_X32B32G32R32UI: 2720 case FORMAT_A32B32G32R32I: 2721 case FORMAT_A32B32G32R32UI: 2722 case FORMAT_R16I: 2723 case FORMAT_R16UI: 2724 case FORMAT_G16R16I: 2725 case FORMAT_G16R16UI: 2726 case FORMAT_X16B16G16R16I: 2727 case FORMAT_X16B16G16R16UI: 2728 case FORMAT_A16B16G16R16I: 2729 case FORMAT_A16B16G16R16UI: 2730 case FORMAT_YV12_BT601: 2731 case FORMAT_YV12_BT709: 2732 case FORMAT_YV12_JFIF: 2733 return false; 2734 default: 2735 ASSERT(false); 2736 } 2737 2738 return false; 2739 } 2740 has16bitTextureComponents() const2741 bool SamplerCore::has16bitTextureComponents() const 2742 { 2743 switch(state.textureFormat) 2744 { 2745 case FORMAT_R5G6B5: 2746 case FORMAT_R8_SNORM: 2747 case FORMAT_G8R8_SNORM: 2748 case FORMAT_X8B8G8R8_SNORM: 2749 case FORMAT_A8B8G8R8_SNORM: 2750 case FORMAT_R8I: 2751 case FORMAT_R8UI: 2752 case FORMAT_G8R8I: 2753 case FORMAT_G8R8UI: 2754 case FORMAT_X8B8G8R8I: 2755 case FORMAT_X8B8G8R8UI: 2756 case FORMAT_A8B8G8R8I: 2757 case FORMAT_A8B8G8R8UI: 2758 case FORMAT_R32I: 2759 case FORMAT_R32UI: 2760 case FORMAT_G32R32I: 2761 case FORMAT_G32R32UI: 2762 case FORMAT_X32B32G32R32I: 2763 case FORMAT_X32B32G32R32UI: 2764 case FORMAT_A32B32G32R32I: 2765 case FORMAT_A32B32G32R32UI: 2766 case FORMAT_G8R8: 2767 case FORMAT_X8R8G8B8: 2768 case FORMAT_X8B8G8R8: 2769 case FORMAT_A8R8G8B8: 2770 case FORMAT_A8B8G8R8: 2771 case FORMAT_SRGB8_X8: 2772 case FORMAT_SRGB8_A8: 2773 case FORMAT_V8U8: 2774 case FORMAT_Q8W8V8U8: 2775 case FORMAT_X8L8V8U8: 2776 case FORMAT_R32F: 2777 case FORMAT_G32R32F: 2778 case FORMAT_X32B32G32R32F: 2779 case FORMAT_A32B32G32R32F: 2780 case FORMAT_X32B32G32R32F_UNSIGNED: 2781 case FORMAT_A8: 2782 case FORMAT_R8: 2783 case FORMAT_L8: 2784 case FORMAT_A8L8: 2785 case FORMAT_D32F_LOCKABLE: 2786 case FORMAT_D32FS8_TEXTURE: 2787 case FORMAT_D32F_SHADOW: 2788 case FORMAT_D32FS8_SHADOW: 2789 case FORMAT_YV12_BT601: 2790 case FORMAT_YV12_BT709: 2791 case FORMAT_YV12_JFIF: 2792 return false; 2793 case FORMAT_L16: 2794 case FORMAT_G16R16: 2795 case FORMAT_A16B16G16R16: 2796 case FORMAT_R16I: 2797 case FORMAT_R16UI: 2798 case FORMAT_G16R16I: 2799 case FORMAT_G16R16UI: 2800 case FORMAT_X16B16G16R16I: 2801 case FORMAT_X16B16G16R16UI: 2802 case FORMAT_A16B16G16R16I: 2803 case FORMAT_A16B16G16R16UI: 2804 case FORMAT_V16U16: 2805 case FORMAT_A16W16V16U16: 2806 case FORMAT_Q16W16V16U16: 2807 return true; 2808 default: 2809 ASSERT(false); 2810 } 2811 2812 return false; 2813 } 2814 has32bitIntegerTextureComponents() const2815 bool SamplerCore::has32bitIntegerTextureComponents() const 2816 { 2817 switch(state.textureFormat) 2818 { 2819 case FORMAT_R5G6B5: 2820 case FORMAT_R8_SNORM: 2821 case FORMAT_G8R8_SNORM: 2822 case FORMAT_X8B8G8R8_SNORM: 2823 case FORMAT_A8B8G8R8_SNORM: 2824 case FORMAT_R8I: 2825 case FORMAT_R8UI: 2826 case FORMAT_G8R8I: 2827 case FORMAT_G8R8UI: 2828 case FORMAT_X8B8G8R8I: 2829 case FORMAT_X8B8G8R8UI: 2830 case FORMAT_A8B8G8R8I: 2831 case FORMAT_A8B8G8R8UI: 2832 case FORMAT_G8R8: 2833 case FORMAT_X8R8G8B8: 2834 case FORMAT_X8B8G8R8: 2835 case FORMAT_A8R8G8B8: 2836 case FORMAT_A8B8G8R8: 2837 case FORMAT_SRGB8_X8: 2838 case FORMAT_SRGB8_A8: 2839 case FORMAT_V8U8: 2840 case FORMAT_Q8W8V8U8: 2841 case FORMAT_X8L8V8U8: 2842 case FORMAT_L16: 2843 case FORMAT_G16R16: 2844 case FORMAT_A16B16G16R16: 2845 case FORMAT_R16I: 2846 case FORMAT_R16UI: 2847 case FORMAT_G16R16I: 2848 case FORMAT_G16R16UI: 2849 case FORMAT_X16B16G16R16I: 2850 case FORMAT_X16B16G16R16UI: 2851 case FORMAT_A16B16G16R16I: 2852 case FORMAT_A16B16G16R16UI: 2853 case FORMAT_V16U16: 2854 case FORMAT_A16W16V16U16: 2855 case FORMAT_Q16W16V16U16: 2856 case FORMAT_R32F: 2857 case FORMAT_G32R32F: 2858 case FORMAT_X32B32G32R32F: 2859 case FORMAT_A32B32G32R32F: 2860 case FORMAT_X32B32G32R32F_UNSIGNED: 2861 case FORMAT_A8: 2862 case FORMAT_R8: 2863 case FORMAT_L8: 2864 case FORMAT_A8L8: 2865 case FORMAT_D32F_LOCKABLE: 2866 case FORMAT_D32FS8_TEXTURE: 2867 case FORMAT_D32F_SHADOW: 2868 case FORMAT_D32FS8_SHADOW: 2869 case FORMAT_YV12_BT601: 2870 case FORMAT_YV12_BT709: 2871 case FORMAT_YV12_JFIF: 2872 return false; 2873 case FORMAT_R32I: 2874 case FORMAT_R32UI: 2875 case FORMAT_G32R32I: 2876 case FORMAT_G32R32UI: 2877 case FORMAT_X32B32G32R32I: 2878 case FORMAT_X32B32G32R32UI: 2879 case FORMAT_A32B32G32R32I: 2880 case FORMAT_A32B32G32R32UI: 2881 return true; 2882 default: 2883 ASSERT(false); 2884 } 2885 2886 return false; 2887 } 2888 hasYuvFormat() const2889 bool SamplerCore::hasYuvFormat() const 2890 { 2891 switch(state.textureFormat) 2892 { 2893 case FORMAT_YV12_BT601: 2894 case FORMAT_YV12_BT709: 2895 case FORMAT_YV12_JFIF: 2896 return true; 2897 case FORMAT_R5G6B5: 2898 case FORMAT_R8_SNORM: 2899 case FORMAT_G8R8_SNORM: 2900 case FORMAT_X8B8G8R8_SNORM: 2901 case FORMAT_A8B8G8R8_SNORM: 2902 case FORMAT_R8I: 2903 case FORMAT_R8UI: 2904 case FORMAT_G8R8I: 2905 case FORMAT_G8R8UI: 2906 case FORMAT_X8B8G8R8I: 2907 case FORMAT_X8B8G8R8UI: 2908 case FORMAT_A8B8G8R8I: 2909 case FORMAT_A8B8G8R8UI: 2910 case FORMAT_R32I: 2911 case FORMAT_R32UI: 2912 case FORMAT_G32R32I: 2913 case FORMAT_G32R32UI: 2914 case FORMAT_X32B32G32R32I: 2915 case FORMAT_X32B32G32R32UI: 2916 case FORMAT_A32B32G32R32I: 2917 case FORMAT_A32B32G32R32UI: 2918 case FORMAT_G8R8: 2919 case FORMAT_X8R8G8B8: 2920 case FORMAT_X8B8G8R8: 2921 case FORMAT_A8R8G8B8: 2922 case FORMAT_A8B8G8R8: 2923 case FORMAT_SRGB8_X8: 2924 case FORMAT_SRGB8_A8: 2925 case FORMAT_V8U8: 2926 case FORMAT_Q8W8V8U8: 2927 case FORMAT_X8L8V8U8: 2928 case FORMAT_R32F: 2929 case FORMAT_G32R32F: 2930 case FORMAT_X32B32G32R32F: 2931 case FORMAT_A32B32G32R32F: 2932 case FORMAT_X32B32G32R32F_UNSIGNED: 2933 case FORMAT_A8: 2934 case FORMAT_R8: 2935 case FORMAT_L8: 2936 case FORMAT_A8L8: 2937 case FORMAT_D32F_LOCKABLE: 2938 case FORMAT_D32FS8_TEXTURE: 2939 case FORMAT_D32F_SHADOW: 2940 case FORMAT_D32FS8_SHADOW: 2941 case FORMAT_L16: 2942 case FORMAT_G16R16: 2943 case FORMAT_A16B16G16R16: 2944 case FORMAT_R16I: 2945 case FORMAT_R16UI: 2946 case FORMAT_G16R16I: 2947 case FORMAT_G16R16UI: 2948 case FORMAT_X16B16G16R16I: 2949 case FORMAT_X16B16G16R16UI: 2950 case FORMAT_A16B16G16R16I: 2951 case FORMAT_A16B16G16R16UI: 2952 case FORMAT_V16U16: 2953 case FORMAT_A16W16V16U16: 2954 case FORMAT_Q16W16V16U16: 2955 return false; 2956 default: 2957 ASSERT(false); 2958 } 2959 2960 return false; 2961 } 2962 isRGBComponent(int component) const2963 bool SamplerCore::isRGBComponent(int component) const 2964 { 2965 switch(state.textureFormat) 2966 { 2967 case FORMAT_R5G6B5: return component < 3; 2968 case FORMAT_R8_SNORM: return component < 1; 2969 case FORMAT_G8R8_SNORM: return component < 2; 2970 case FORMAT_X8B8G8R8_SNORM: return component < 3; 2971 case FORMAT_A8B8G8R8_SNORM: return component < 3; 2972 case FORMAT_R8I: return component < 1; 2973 case FORMAT_R8UI: return component < 1; 2974 case FORMAT_G8R8I: return component < 2; 2975 case FORMAT_G8R8UI: return component < 2; 2976 case FORMAT_X8B8G8R8I: return component < 3; 2977 case FORMAT_X8B8G8R8UI: return component < 3; 2978 case FORMAT_A8B8G8R8I: return component < 3; 2979 case FORMAT_A8B8G8R8UI: return component < 3; 2980 case FORMAT_R32I: return component < 1; 2981 case FORMAT_R32UI: return component < 1; 2982 case FORMAT_G32R32I: return component < 2; 2983 case FORMAT_G32R32UI: return component < 2; 2984 case FORMAT_X32B32G32R32I: return component < 3; 2985 case FORMAT_X32B32G32R32UI: return component < 3; 2986 case FORMAT_A32B32G32R32I: return component < 3; 2987 case FORMAT_A32B32G32R32UI: return component < 3; 2988 case FORMAT_G8R8: return component < 2; 2989 case FORMAT_X8R8G8B8: return component < 3; 2990 case FORMAT_X8B8G8R8: return component < 3; 2991 case FORMAT_A8R8G8B8: return component < 3; 2992 case FORMAT_A8B8G8R8: return component < 3; 2993 case FORMAT_SRGB8_X8: return component < 3; 2994 case FORMAT_SRGB8_A8: return component < 3; 2995 case FORMAT_V8U8: return false; 2996 case FORMAT_Q8W8V8U8: return false; 2997 case FORMAT_X8L8V8U8: return false; 2998 case FORMAT_R32F: return component < 1; 2999 case FORMAT_G32R32F: return component < 2; 3000 case FORMAT_X32B32G32R32F: return component < 3; 3001 case FORMAT_A32B32G32R32F: return component < 3; 3002 case FORMAT_X32B32G32R32F_UNSIGNED: return component < 3; 3003 case FORMAT_A8: return false; 3004 case FORMAT_R8: return component < 1; 3005 case FORMAT_L8: return component < 1; 3006 case FORMAT_A8L8: return component < 1; 3007 case FORMAT_D32F_LOCKABLE: return false; 3008 case FORMAT_D32FS8_TEXTURE: return false; 3009 case FORMAT_D32F_SHADOW: return false; 3010 case FORMAT_D32FS8_SHADOW: return false; 3011 case FORMAT_L16: return component < 1; 3012 case FORMAT_G16R16: return component < 2; 3013 case FORMAT_A16B16G16R16: return component < 3; 3014 case FORMAT_R16I: return component < 1; 3015 case FORMAT_R16UI: return component < 1; 3016 case FORMAT_G16R16I: return component < 2; 3017 case FORMAT_G16R16UI: return component < 2; 3018 case FORMAT_X16B16G16R16I: return component < 3; 3019 case FORMAT_X16B16G16R16UI: return component < 3; 3020 case FORMAT_A16B16G16R16I: return component < 3; 3021 case FORMAT_A16B16G16R16UI: return component < 3; 3022 case FORMAT_V16U16: return false; 3023 case FORMAT_A16W16V16U16: return false; 3024 case FORMAT_Q16W16V16U16: return false; 3025 case FORMAT_YV12_BT601: return component < 3; 3026 case FORMAT_YV12_BT709: return component < 3; 3027 case FORMAT_YV12_JFIF: return component < 3; 3028 default: 3029 ASSERT(false); 3030 } 3031 3032 return false; 3033 } 3034 } 3035