// // Copyright 2015 The ANGLE Project Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // StateManager11.cpp: Defines a class for caching D3D11 state #include "libANGLE/renderer/d3d/d3d11/StateManager11.h" #include "common/angleutils.h" #include "common/bitset_utils.h" #include "common/mathutil.h" #include "common/utilities.h" #include "libANGLE/Context.h" #include "libANGLE/Query.h" #include "libANGLE/Surface.h" #include "libANGLE/VertexArray.h" #include "libANGLE/renderer/d3d/DisplayD3D.h" #include "libANGLE/renderer/d3d/TextureD3D.h" #include "libANGLE/renderer/d3d/d3d11/Buffer11.h" #include "libANGLE/renderer/d3d/d3d11/Context11.h" #include "libANGLE/renderer/d3d/d3d11/Framebuffer11.h" #include "libANGLE/renderer/d3d/d3d11/IndexBuffer11.h" #include "libANGLE/renderer/d3d/d3d11/RenderTarget11.h" #include "libANGLE/renderer/d3d/d3d11/Renderer11.h" #include "libANGLE/renderer/d3d/d3d11/ShaderExecutable11.h" #include "libANGLE/renderer/d3d/d3d11/TextureStorage11.h" #include "libANGLE/renderer/d3d/d3d11/TransformFeedback11.h" #include "libANGLE/renderer/d3d/d3d11/VertexArray11.h" #include "libANGLE/renderer/d3d/d3d11/VertexBuffer11.h" namespace rx { namespace { bool ImageIndexConflictsWithSRV(const gl::ImageIndex &index, D3D11_SHADER_RESOURCE_VIEW_DESC desc) { unsigned mipLevel = index.getLevelIndex(); gl::TextureType textureType = index.getType(); switch (desc.ViewDimension) { case D3D11_SRV_DIMENSION_TEXTURE2D: { bool allLevels = (desc.Texture2D.MipLevels == std::numeric_limits::max()); unsigned int maxSrvMip = desc.Texture2D.MipLevels + desc.Texture2D.MostDetailedMip; maxSrvMip = allLevels ? INT_MAX : maxSrvMip; unsigned mipMin = index.getLevelIndex(); unsigned mipMax = INT_MAX; return textureType == gl::TextureType::_2D && gl::RangeUI(mipMin, mipMax) .intersects(gl::RangeUI(desc.Texture2D.MostDetailedMip, maxSrvMip)); } case D3D11_SRV_DIMENSION_TEXTURE2DARRAY: { GLint layerIndex = index.getLayerIndex(); bool allLevels = (desc.Texture2DArray.MipLevels == std::numeric_limits::max()); unsigned int maxSrvMip = desc.Texture2DArray.MipLevels + desc.Texture2DArray.MostDetailedMip; maxSrvMip = allLevels ? INT_MAX : maxSrvMip; unsigned maxSlice = desc.Texture2DArray.FirstArraySlice + desc.Texture2DArray.ArraySize; // Cube maps can be mapped to Texture2DArray SRVs return (textureType == gl::TextureType::_2DArray || textureType == gl::TextureType::CubeMap) && desc.Texture2DArray.MostDetailedMip <= mipLevel && mipLevel < maxSrvMip && desc.Texture2DArray.FirstArraySlice <= static_cast(layerIndex) && static_cast(layerIndex) < maxSlice; } case D3D11_SRV_DIMENSION_TEXTURECUBE: { bool allLevels = (desc.TextureCube.MipLevels == std::numeric_limits::max()); unsigned int maxSrvMip = desc.TextureCube.MipLevels + desc.TextureCube.MostDetailedMip; maxSrvMip = allLevels ? INT_MAX : maxSrvMip; return textureType == gl::TextureType::CubeMap && desc.TextureCube.MostDetailedMip <= mipLevel && mipLevel < maxSrvMip; } case D3D11_SRV_DIMENSION_TEXTURE3D: { bool allLevels = (desc.Texture3D.MipLevels == std::numeric_limits::max()); unsigned int maxSrvMip = desc.Texture3D.MipLevels + desc.Texture3D.MostDetailedMip; maxSrvMip = allLevels ? INT_MAX : maxSrvMip; return textureType == gl::TextureType::_3D && desc.Texture3D.MostDetailedMip <= mipLevel && mipLevel < maxSrvMip; } default: // We only handle the cases corresponding to valid image indexes UNIMPLEMENTED(); } return false; } bool ImageIndexConflictsWithUAV(const gl::ImageIndex &index, D3D11_UNORDERED_ACCESS_VIEW_DESC desc) { unsigned mipLevel = index.getLevelIndex(); gl::TextureType textureType = index.getType(); switch (desc.ViewDimension) { case D3D11_UAV_DIMENSION_TEXTURE2D: { return textureType == gl::TextureType::_2D && mipLevel == desc.Texture2D.MipSlice; } case D3D11_UAV_DIMENSION_TEXTURE2DARRAY: { GLint layerIndex = index.getLayerIndex(); unsigned mipSlice = desc.Texture2DArray.MipSlice; unsigned firstArraySlice = desc.Texture2DArray.FirstArraySlice; unsigned lastArraySlice = firstArraySlice + desc.Texture2DArray.ArraySize; return (textureType == gl::TextureType::_2DArray || textureType == gl::TextureType::CubeMap) && (mipLevel == mipSlice && gl::RangeUI(firstArraySlice, lastArraySlice) .contains(static_cast(layerIndex))); } case D3D11_UAV_DIMENSION_TEXTURE3D: { GLint layerIndex = index.getLayerIndex(); unsigned mipSlice = desc.Texture3D.MipSlice; unsigned firstWSlice = desc.Texture3D.FirstWSlice; unsigned lastWSlice = firstWSlice + desc.Texture3D.WSize; return textureType == gl::TextureType::_3D && (mipLevel == mipSlice && gl::RangeUI(firstWSlice, lastWSlice).contains(static_cast(layerIndex))); } default: return false; } } // Does *not* increment the resource ref count!! ID3D11Resource *GetViewResource(ID3D11View *view) { ID3D11Resource *resource = nullptr; ASSERT(view); view->GetResource(&resource); resource->Release(); return resource; } int GetWrapBits(GLenum wrap) { switch (wrap) { case GL_CLAMP_TO_EDGE: return 0x0; case GL_REPEAT: return 0x1; case GL_MIRRORED_REPEAT: return 0x2; case GL_CLAMP_TO_BORDER: return 0x3; default: UNREACHABLE(); return 0; } } Optional FindFirstNonInstanced( const std::vector ¤tAttributes) { for (size_t index = 0; index < currentAttributes.size(); ++index) { if (currentAttributes[index]->divisor == 0) { return Optional(index); } } return Optional::Invalid(); } void SortAttributesByLayout(const ProgramD3D &programD3D, const std::vector &vertexArrayAttribs, const std::vector ¤tValueAttribs, AttribIndexArray *sortedD3DSemanticsOut, std::vector *sortedAttributesOut) { sortedAttributesOut->clear(); const AttribIndexArray &locationToSemantic = programD3D.getAttribLocationToD3DSemantics(); const gl::ProgramExecutable &executable = programD3D.getState().getExecutable(); for (auto locationIndex : executable.getActiveAttribLocationsMask()) { int d3dSemantic = locationToSemantic[locationIndex]; if (sortedAttributesOut->size() <= static_cast(d3dSemantic)) { sortedAttributesOut->resize(d3dSemantic + 1); } (*sortedD3DSemanticsOut)[d3dSemantic] = d3dSemantic; const auto *arrayAttrib = &vertexArrayAttribs[locationIndex]; if (arrayAttrib->attribute && arrayAttrib->attribute->enabled) { (*sortedAttributesOut)[d3dSemantic] = arrayAttrib; } else { ASSERT(currentValueAttribs[locationIndex].attribute); (*sortedAttributesOut)[d3dSemantic] = ¤tValueAttribs[locationIndex]; } } } void UpdateUniformBuffer(ID3D11DeviceContext *deviceContext, UniformStorage11 *storage, const d3d11::Buffer *buffer) { deviceContext->UpdateSubresource(buffer->get(), 0, nullptr, storage->getDataPointer(0, 0), 0, 0); } size_t GetReservedBufferCount(bool usesPointSpriteEmulation) { return usesPointSpriteEmulation ? 1 : 0; } bool CullsEverything(const gl::State &glState) { return (glState.getRasterizerState().cullFace && glState.getRasterizerState().cullMode == gl::CullFaceMode::FrontAndBack); } } // anonymous namespace // StateManager11::ViewCache Implementation. template StateManager11::ViewCache::ViewCache() : mHighestUsedView(0) {} template StateManager11::ViewCache::~ViewCache() {} template void StateManager11::ViewCache::update(size_t resourceIndex, ViewType *view) { ASSERT(resourceIndex < mCurrentViews.size()); ViewRecord *record = &mCurrentViews[resourceIndex]; record->view = reinterpret_cast(view); if (view) { record->resource = reinterpret_cast(GetViewResource(view)); view->GetDesc(&record->desc); mHighestUsedView = std::max(resourceIndex + 1, mHighestUsedView); } else { record->resource = 0; if (resourceIndex + 1 == mHighestUsedView) { do { --mHighestUsedView; } while (mHighestUsedView > 0 && mCurrentViews[mHighestUsedView].view == 0); } } } template void StateManager11::ViewCache::clear() { if (mCurrentViews.empty()) { return; } memset(&mCurrentViews[0], 0, sizeof(ViewRecord) * mCurrentViews.size()); mHighestUsedView = 0; } StateManager11::SRVCache *StateManager11::getSRVCache(gl::ShaderType shaderType) { ASSERT(shaderType != gl::ShaderType::InvalidEnum); return &mCurShaderSRVs[shaderType]; } // ShaderConstants11 implementation ShaderConstants11::ShaderConstants11() : mNumActiveShaderSamplers({}) { mShaderConstantsDirty.set(); } ShaderConstants11::~ShaderConstants11() {} void ShaderConstants11::init(const gl::Caps &caps) { for (gl::ShaderType shaderType : gl::AllShaderTypes()) { mShaderSamplerMetadata[shaderType].resize(caps.maxShaderTextureImageUnits[shaderType]); mShaderReadonlyImageMetadata[shaderType].resize(caps.maxShaderImageUniforms[shaderType]); mShaderImageMetadata[shaderType].resize(caps.maxShaderImageUniforms[shaderType]); } } size_t ShaderConstants11::GetShaderConstantsStructSize(gl::ShaderType shaderType) { switch (shaderType) { case gl::ShaderType::Vertex: return sizeof(Vertex); case gl::ShaderType::Fragment: return sizeof(Pixel); case gl::ShaderType::Compute: return sizeof(Compute); // TODO(jiawei.shao@intel.com): return geometry shader constant struct size case gl::ShaderType::Geometry: return 0u; default: UNREACHABLE(); return 0u; } } size_t ShaderConstants11::getRequiredBufferSize(gl::ShaderType shaderType) const { ASSERT(shaderType != gl::ShaderType::InvalidEnum); return GetShaderConstantsStructSize(shaderType) + mShaderSamplerMetadata[shaderType].size() * sizeof(SamplerMetadata) + mShaderImageMetadata[shaderType].size() * sizeof(ImageMetadata) + mShaderReadonlyImageMetadata[shaderType].size() * sizeof(ImageMetadata); } void ShaderConstants11::markDirty() { mShaderConstantsDirty.set(); mNumActiveShaderSamplers.fill(0); } bool ShaderConstants11::updateSamplerMetadata(SamplerMetadata *data, const gl::Texture &texture, const gl::SamplerState &samplerState) { bool dirty = false; unsigned int baseLevel = texture.getTextureState().getEffectiveBaseLevel(); gl::TextureTarget target = (texture.getType() == gl::TextureType::CubeMap) ? gl::kCubeMapTextureTargetMin : gl::NonCubeTextureTypeToTarget(texture.getType()); GLenum sizedFormat = texture.getFormat(target, baseLevel).info->sizedInternalFormat; if (data->baseLevel != static_cast(baseLevel)) { data->baseLevel = static_cast(baseLevel); dirty = true; } // Some metadata is needed only for integer textures. We avoid updating the constant buffer // unnecessarily by changing the data only in case the texture is an integer texture and // the values have changed. bool needIntegerTextureMetadata = false; // internalFormatBits == 0 means a 32-bit texture in the case of integer textures. int internalFormatBits = 0; switch (sizedFormat) { case GL_RGBA32I: case GL_RGBA32UI: case GL_RGB32I: case GL_RGB32UI: case GL_RG32I: case GL_RG32UI: case GL_R32I: case GL_R32UI: needIntegerTextureMetadata = true; break; case GL_RGBA16I: case GL_RGBA16UI: case GL_RGB16I: case GL_RGB16UI: case GL_RG16I: case GL_RG16UI: case GL_R16I: case GL_R16UI: needIntegerTextureMetadata = true; internalFormatBits = 16; break; case GL_RGBA8I: case GL_RGBA8UI: case GL_RGB8I: case GL_RGB8UI: case GL_RG8I: case GL_RG8UI: case GL_R8I: case GL_R8UI: needIntegerTextureMetadata = true; internalFormatBits = 8; break; case GL_RGB10_A2UI: needIntegerTextureMetadata = true; internalFormatBits = 10; break; default: break; } if (needIntegerTextureMetadata) { if (data->internalFormatBits != internalFormatBits) { data->internalFormatBits = internalFormatBits; dirty = true; } // Pack the wrap values into one integer so we can fit all the metadata in two 4-integer // vectors. GLenum wrapS = samplerState.getWrapS(); GLenum wrapT = samplerState.getWrapT(); GLenum wrapR = samplerState.getWrapR(); int wrapModes = GetWrapBits(wrapS) | (GetWrapBits(wrapT) << 2) | (GetWrapBits(wrapR) << 4); if (data->wrapModes != wrapModes) { data->wrapModes = wrapModes; dirty = true; } const angle::ColorGeneric &borderColor(samplerState.getBorderColor()); constexpr int kBlack[4] = {}; const void *const intBorderColor = (borderColor.type == angle::ColorGeneric::Type::Float) ? kBlack : borderColor.colorI.data(); ASSERT(static_cast(borderColor.colorI.data()) == static_cast(borderColor.colorUI.data())); if (memcmp(data->intBorderColor, intBorderColor, sizeof(data->intBorderColor)) != 0) { memcpy(data->intBorderColor, intBorderColor, sizeof(data->intBorderColor)); dirty = true; } } return dirty; } bool ShaderConstants11::updateImageMetadata(ImageMetadata *data, const gl::ImageUnit &imageUnit) { bool dirty = false; if (data->layer != static_cast(imageUnit.layer)) { data->layer = static_cast(imageUnit.layer); dirty = true; } if (data->level != static_cast(imageUnit.level)) { data->level = static_cast(imageUnit.level); dirty = true; } return dirty; } void ShaderConstants11::setComputeWorkGroups(GLuint numGroupsX, GLuint numGroupsY, GLuint numGroupsZ) { mCompute.numWorkGroups[0] = numGroupsX; mCompute.numWorkGroups[1] = numGroupsY; mCompute.numWorkGroups[2] = numGroupsZ; mShaderConstantsDirty.set(gl::ShaderType::Compute); } void ShaderConstants11::setMultiviewWriteToViewportIndex(GLfloat index) { mVertex.multiviewWriteToViewportIndex = index; mPixel.multiviewWriteToViewportIndex = index; mShaderConstantsDirty.set(gl::ShaderType::Vertex); mShaderConstantsDirty.set(gl::ShaderType::Fragment); } void ShaderConstants11::onViewportChange(const gl::Rectangle &glViewport, const D3D11_VIEWPORT &dxViewport, bool is9_3, bool presentPathFast) { mShaderConstantsDirty.set(gl::ShaderType::Vertex); mShaderConstantsDirty.set(gl::ShaderType::Fragment); // On Feature Level 9_*, we must emulate large and/or negative viewports in the shaders // using viewAdjust (like the D3D9 renderer). if (is9_3) { mVertex.viewAdjust[0] = static_cast((glViewport.width - dxViewport.Width) + 2 * (glViewport.x - dxViewport.TopLeftX)) / dxViewport.Width; mVertex.viewAdjust[1] = static_cast((glViewport.height - dxViewport.Height) + 2 * (glViewport.y - dxViewport.TopLeftY)) / dxViewport.Height; mVertex.viewAdjust[2] = static_cast(glViewport.width) / dxViewport.Width; mVertex.viewAdjust[3] = static_cast(glViewport.height) / dxViewport.Height; } mPixel.viewCoords[0] = glViewport.width * 0.5f; mPixel.viewCoords[1] = glViewport.height * 0.5f; mPixel.viewCoords[2] = glViewport.x + (glViewport.width * 0.5f); mPixel.viewCoords[3] = glViewport.y + (glViewport.height * 0.5f); // Instanced pointsprite emulation requires ViewCoords to be defined in the // the vertex shader. mVertex.viewCoords[0] = mPixel.viewCoords[0]; mVertex.viewCoords[1] = mPixel.viewCoords[1]; mVertex.viewCoords[2] = mPixel.viewCoords[2]; mVertex.viewCoords[3] = mPixel.viewCoords[3]; const float zNear = dxViewport.MinDepth; const float zFar = dxViewport.MaxDepth; mPixel.depthFront[0] = (zFar - zNear) * 0.5f; mPixel.depthFront[1] = (zNear + zFar) * 0.5f; mVertex.depthRange[0] = zNear; mVertex.depthRange[1] = zFar; mVertex.depthRange[2] = zFar - zNear; mPixel.depthRange[0] = zNear; mPixel.depthRange[1] = zFar; mPixel.depthRange[2] = zFar - zNear; mPixel.viewScale[0] = 1.0f; mPixel.viewScale[1] = presentPathFast ? 1.0f : -1.0f; // Updates to the multiviewWriteToViewportIndex member are to be handled whenever the draw // framebuffer's layout is changed. mVertex.viewScale[0] = mPixel.viewScale[0]; mVertex.viewScale[1] = mPixel.viewScale[1]; } // Update the ShaderConstants with a new first vertex and return whether the update dirties them. ANGLE_INLINE bool ShaderConstants11::onFirstVertexChange(GLint firstVertex) { // firstVertex should already include baseVertex, if any. uint32_t newFirstVertex = static_cast(firstVertex); bool firstVertexDirty = (mVertex.firstVertex != newFirstVertex); if (firstVertexDirty) { mVertex.firstVertex = newFirstVertex; mShaderConstantsDirty.set(gl::ShaderType::Vertex); } return firstVertexDirty; } void ShaderConstants11::onSamplerChange(gl::ShaderType shaderType, unsigned int samplerIndex, const gl::Texture &texture, const gl::SamplerState &samplerState) { ASSERT(shaderType != gl::ShaderType::InvalidEnum); if (updateSamplerMetadata(&mShaderSamplerMetadata[shaderType][samplerIndex], texture, samplerState)) { mNumActiveShaderSamplers[shaderType] = 0; } } void ShaderConstants11::onImageChange(gl::ShaderType shaderType, unsigned int imageIndex, const gl::ImageUnit &imageUnit) { ASSERT(shaderType != gl::ShaderType::InvalidEnum); if (imageUnit.access == GL_READ_ONLY) { if (updateImageMetadata(&mShaderReadonlyImageMetadata[shaderType][imageIndex], imageUnit)) { mNumActiveShaderReadonlyImages[shaderType] = 0; } } else { if (updateImageMetadata(&mShaderImageMetadata[shaderType][imageIndex], imageUnit)) { mNumActiveShaderImages[shaderType] = 0; } } } angle::Result ShaderConstants11::updateBuffer(const gl::Context *context, Renderer11 *renderer, gl::ShaderType shaderType, const ProgramD3D &programD3D, const d3d11::Buffer &driverConstantBuffer) { // Re-upload the sampler meta-data if the current program uses more samplers // than we previously uploaded. const int numSamplers = programD3D.getUsedSamplerRange(shaderType).length(); const int numReadonlyImages = programD3D.getUsedImageRange(shaderType, true).length(); const int numImages = programD3D.getUsedImageRange(shaderType, false).length(); const bool dirty = mShaderConstantsDirty[shaderType] || (mNumActiveShaderSamplers[shaderType] < numSamplers) || (mNumActiveShaderReadonlyImages[shaderType] < numReadonlyImages) || (mNumActiveShaderImages[shaderType] < numImages); const size_t dataSize = GetShaderConstantsStructSize(shaderType); const uint8_t *samplerData = reinterpret_cast(mShaderSamplerMetadata[shaderType].data()); const size_t samplerDataSize = sizeof(SamplerMetadata) * numSamplers; const uint8_t *readonlyImageData = reinterpret_cast(mShaderReadonlyImageMetadata[shaderType].data()); const size_t readonlyImageDataSize = sizeof(ImageMetadata) * numReadonlyImages; const uint8_t *imageData = reinterpret_cast(mShaderImageMetadata[shaderType].data()); const size_t imageDataSize = sizeof(ImageMetadata) * numImages; mNumActiveShaderSamplers[shaderType] = numSamplers; mNumActiveShaderReadonlyImages[shaderType] = numReadonlyImages; mNumActiveShaderImages[shaderType] = numImages; mShaderConstantsDirty.set(shaderType, false); const uint8_t *data = nullptr; switch (shaderType) { case gl::ShaderType::Vertex: data = reinterpret_cast(&mVertex); break; case gl::ShaderType::Fragment: data = reinterpret_cast(&mPixel); break; case gl::ShaderType::Compute: data = reinterpret_cast(&mCompute); break; default: UNREACHABLE(); break; } ASSERT(driverConstantBuffer.valid()); if (!dirty) { return angle::Result::Continue; } // Previous buffer contents are discarded, so we need to refresh the whole buffer. D3D11_MAPPED_SUBRESOURCE mapping = {}; ANGLE_TRY(renderer->mapResource(context, driverConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapping)); memcpy(mapping.pData, data, dataSize); memcpy(static_cast(mapping.pData) + dataSize, samplerData, sizeof(SamplerMetadata) * numSamplers); memcpy(static_cast(mapping.pData) + dataSize + samplerDataSize, readonlyImageData, readonlyImageDataSize); memcpy( static_cast(mapping.pData) + dataSize + samplerDataSize + readonlyImageDataSize, imageData, imageDataSize); renderer->getDeviceContext()->Unmap(driverConstantBuffer.get(), 0); return angle::Result::Continue; } StateManager11::StateManager11(Renderer11 *renderer) : mRenderer(renderer), mInternalDirtyBits(), mCurSampleAlphaToCoverage(false), mCurBlendStateExt(), mCurBlendColor(0, 0, 0, 0), mCurSampleMask(0), mCurStencilRef(0), mCurStencilBackRef(0), mCurStencilSize(0), mCurScissorEnabled(false), mCurScissorRect(), mCurViewport(), mCurNear(0.0f), mCurFar(0.0f), mViewportBounds(), mRenderTargetIsDirty(true), mCurPresentPathFastEnabled(false), mCurPresentPathFastColorBufferHeight(0), mDirtyCurrentValueAttribs(), mCurrentValueAttribs(), mCurrentInputLayout(), mDirtyVertexBufferRange(gl::MAX_VERTEX_ATTRIBS, 0), mCurrentPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED), mLastAppliedDrawMode(gl::PrimitiveMode::InvalidEnum), mCullEverything(false), mDirtySwizzles(false), mAppliedIB(nullptr), mAppliedIBFormat(DXGI_FORMAT_UNKNOWN), mAppliedIBOffset(0), mIndexBufferIsDirty(false), mVertexDataManager(renderer), mIndexDataManager(renderer), mIsMultiviewEnabled(false), mIndependentBlendStates(false), mEmptySerial(mRenderer->generateSerial()), mProgramD3D(nullptr), mVertexArray11(nullptr), mFramebuffer11(nullptr) { mCurDepthStencilState.depthTest = false; mCurDepthStencilState.depthFunc = GL_LESS; mCurDepthStencilState.depthMask = true; mCurDepthStencilState.stencilTest = false; mCurDepthStencilState.stencilMask = true; mCurDepthStencilState.stencilFail = GL_KEEP; mCurDepthStencilState.stencilPassDepthFail = GL_KEEP; mCurDepthStencilState.stencilPassDepthPass = GL_KEEP; mCurDepthStencilState.stencilWritemask = static_cast(-1); mCurDepthStencilState.stencilBackFunc = GL_ALWAYS; mCurDepthStencilState.stencilBackMask = static_cast(-1); mCurDepthStencilState.stencilBackFail = GL_KEEP; mCurDepthStencilState.stencilBackPassDepthFail = GL_KEEP; mCurDepthStencilState.stencilBackPassDepthPass = GL_KEEP; mCurDepthStencilState.stencilBackWritemask = static_cast(-1); mCurRasterState.rasterizerDiscard = false; mCurRasterState.cullFace = false; mCurRasterState.cullMode = gl::CullFaceMode::Back; mCurRasterState.frontFace = GL_CCW; mCurRasterState.polygonOffsetFill = false; mCurRasterState.polygonOffsetFactor = 0.0f; mCurRasterState.polygonOffsetUnits = 0.0f; mCurRasterState.pointDrawMode = false; mCurRasterState.multiSample = false; mCurRasterState.dither = false; // Start with all internal dirty bits set except DIRTY_BIT_COMPUTE_SRVUAV_STATE and // DIRTY_BIT_GRAPHICS_SRVUAV_STATE. mInternalDirtyBits.set(); mInternalDirtyBits.reset(DIRTY_BIT_GRAPHICS_SRVUAV_STATE); mInternalDirtyBits.reset(DIRTY_BIT_COMPUTE_SRVUAV_STATE); mGraphicsDirtyBitsMask.set(); mGraphicsDirtyBitsMask.reset(DIRTY_BIT_COMPUTE_SRVUAV_STATE); mComputeDirtyBitsMask.set(DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE); mComputeDirtyBitsMask.set(DIRTY_BIT_PROGRAM_UNIFORMS); mComputeDirtyBitsMask.set(DIRTY_BIT_DRIVER_UNIFORMS); mComputeDirtyBitsMask.set(DIRTY_BIT_PROGRAM_UNIFORM_BUFFERS); mComputeDirtyBitsMask.set(DIRTY_BIT_PROGRAM_ATOMIC_COUNTER_BUFFERS); mComputeDirtyBitsMask.set(DIRTY_BIT_PROGRAM_SHADER_STORAGE_BUFFERS); mComputeDirtyBitsMask.set(DIRTY_BIT_SHADERS); mComputeDirtyBitsMask.set(DIRTY_BIT_COMPUTE_SRVUAV_STATE); // Initially all current value attributes must be updated on first use. mDirtyCurrentValueAttribs.set(); mCurrentVertexBuffers.fill(nullptr); mCurrentVertexStrides.fill(std::numeric_limits::max()); mCurrentVertexOffsets.fill(std::numeric_limits::max()); } StateManager11::~StateManager11() {} template void StateManager11::setShaderResourceInternal(gl::ShaderType shaderType, UINT resourceSlot, const SRVType *srv) { auto *currentSRVs = getSRVCache(shaderType); ASSERT(static_cast(resourceSlot) < currentSRVs->size()); const ViewRecord &record = (*currentSRVs)[resourceSlot]; if (record.view != reinterpret_cast(srv)) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); ID3D11ShaderResourceView *srvPtr = srv ? srv->get() : nullptr; if (srvPtr) { uintptr_t resource = reinterpret_cast(GetViewResource(srvPtr)); unsetConflictingUAVs(gl::PipelineType::GraphicsPipeline, gl::ShaderType::Compute, resource, nullptr); } switch (shaderType) { case gl::ShaderType::Vertex: deviceContext->VSSetShaderResources(resourceSlot, 1, &srvPtr); break; case gl::ShaderType::Fragment: deviceContext->PSSetShaderResources(resourceSlot, 1, &srvPtr); break; case gl::ShaderType::Compute: { if (srvPtr) { uintptr_t resource = reinterpret_cast(GetViewResource(srvPtr)); unsetConflictingRTVs(resource); } deviceContext->CSSetShaderResources(resourceSlot, 1, &srvPtr); break; } default: UNREACHABLE(); } currentSRVs->update(resourceSlot, srvPtr); } } template void StateManager11::setUnorderedAccessViewInternal(gl::ShaderType shaderType, UINT resourceSlot, const UAVType *uav) { ASSERT(shaderType == gl::ShaderType::Compute); ASSERT(static_cast(resourceSlot) < mCurComputeUAVs.size()); const ViewRecord &record = mCurComputeUAVs[resourceSlot]; if (record.view != reinterpret_cast(uav)) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); ID3D11UnorderedAccessView *uavPtr = uav ? uav->get() : nullptr; // We need to make sure that resource being set to UnorderedAccessView slot |resourceSlot| // is not bound on SRV. if (uavPtr) { uintptr_t resource = reinterpret_cast(GetViewResource(uavPtr)); unsetConflictingSRVs(gl::PipelineType::ComputePipeline, gl::ShaderType::Vertex, resource, nullptr, false); unsetConflictingSRVs(gl::PipelineType::ComputePipeline, gl::ShaderType::Fragment, resource, nullptr, false); unsetConflictingSRVs(gl::PipelineType::ComputePipeline, gl::ShaderType::Compute, resource, nullptr, false); } deviceContext->CSSetUnorderedAccessViews(resourceSlot, 1, &uavPtr, nullptr); mCurComputeUAVs.update(resourceSlot, uavPtr); } } void StateManager11::updateStencilSizeIfChanged(bool depthStencilInitialized, unsigned int stencilSize) { if (!depthStencilInitialized || stencilSize != mCurStencilSize) { mCurStencilSize = stencilSize; mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); } } void StateManager11::checkPresentPath(const gl::Context *context) { if (!mRenderer->presentPathFastEnabled()) return; const auto *framebuffer = context->getState().getDrawFramebuffer(); const auto *firstColorAttachment = framebuffer->getFirstColorAttachment(); const bool presentPathFastActive = UsePresentPathFast(mRenderer, firstColorAttachment); const int colorBufferHeight = firstColorAttachment ? firstColorAttachment->getSize().height : 0; if ((mCurPresentPathFastEnabled != presentPathFastActive) || (presentPathFastActive && (colorBufferHeight != mCurPresentPathFastColorBufferHeight))) { mCurPresentPathFastEnabled = presentPathFastActive; mCurPresentPathFastColorBufferHeight = colorBufferHeight; // Scissor rect may need to be vertically inverted mInternalDirtyBits.set(DIRTY_BIT_SCISSOR_STATE); // Cull Mode may need to be inverted mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); // Viewport may need to be vertically inverted invalidateViewport(context); } } angle::Result StateManager11::updateStateForCompute(const gl::Context *context, GLuint numGroupsX, GLuint numGroupsY, GLuint numGroupsZ) { mShaderConstants.setComputeWorkGroups(numGroupsX, numGroupsY, numGroupsZ); if (mProgramD3D->updateSamplerMapping() == ProgramD3D::SamplerMapping::WasDirty) { invalidateTexturesAndSamplers(); } if (mDirtySwizzles) { ANGLE_TRY(generateSwizzlesForShader(context, gl::ShaderType::Compute)); mDirtySwizzles = false; } if (mProgramD3D->anyShaderUniformsDirty()) { mInternalDirtyBits.set(DIRTY_BIT_PROGRAM_UNIFORMS); } auto dirtyBitsCopy = mInternalDirtyBits & mComputeDirtyBitsMask; mInternalDirtyBits &= ~mComputeDirtyBitsMask; for (auto iter = dirtyBitsCopy.begin(), end = dirtyBitsCopy.end(); iter != end; ++iter) { switch (*iter) { case DIRTY_BIT_COMPUTE_SRVUAV_STATE: // Avoid to call syncTexturesForCompute function two times. iter.resetLaterBit(DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE); ANGLE_TRY(syncTexturesForCompute(context)); break; case DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE: ANGLE_TRY(syncTexturesForCompute(context)); break; case DIRTY_BIT_PROGRAM_UNIFORMS: case DIRTY_BIT_DRIVER_UNIFORMS: ANGLE_TRY(applyComputeUniforms(context, mProgramD3D)); break; case DIRTY_BIT_PROGRAM_UNIFORM_BUFFERS: ANGLE_TRY(syncUniformBuffers(context)); break; case DIRTY_BIT_PROGRAM_ATOMIC_COUNTER_BUFFERS: ANGLE_TRY(syncAtomicCounterBuffers(context)); break; case DIRTY_BIT_PROGRAM_SHADER_STORAGE_BUFFERS: ANGLE_TRY(syncShaderStorageBuffers(context)); break; case DIRTY_BIT_SHADERS: ANGLE_TRY(syncProgramForCompute(context)); break; default: UNREACHABLE(); break; } } return angle::Result::Continue; } void StateManager11::syncState(const gl::Context *context, const gl::State::DirtyBits &dirtyBits) { if (!dirtyBits.any()) { return; } const gl::State &state = context->getState(); for (size_t dirtyBit : dirtyBits) { switch (dirtyBit) { case gl::State::DIRTY_BIT_BLEND_EQUATIONS: { const gl::BlendStateExt &blendStateExt = state.getBlendStateExt(); ASSERT(mCurBlendStateExt.mMaxDrawBuffers == blendStateExt.mMaxDrawBuffers); // Compare blend equations only for buffers with blending enabled because // subsequent sync stages enforce default values for buffers with blending disabled. if ((blendStateExt.mEnabledMask & mCurBlendStateExt.compareEquations(blendStateExt.mEquationColor, blendStateExt.mEquationAlpha)) .any()) { mInternalDirtyBits.set(DIRTY_BIT_BLEND_STATE); } break; } case gl::State::DIRTY_BIT_BLEND_FUNCS: { const gl::BlendStateExt &blendStateExt = state.getBlendStateExt(); ASSERT(mCurBlendStateExt.mMaxDrawBuffers == blendStateExt.mMaxDrawBuffers); // Compare blend factors only for buffers with blending enabled because // subsequent sync stages enforce default values for buffers with blending disabled. if ((blendStateExt.mEnabledMask & mCurBlendStateExt.compareFactors( blendStateExt.mSrcColor, blendStateExt.mDstColor, blendStateExt.mSrcAlpha, blendStateExt.mDstAlpha)) .any()) { mInternalDirtyBits.set(DIRTY_BIT_BLEND_STATE); } break; } case gl::State::DIRTY_BIT_BLEND_ENABLED: { if (state.getBlendStateExt().mEnabledMask != mCurBlendStateExt.mEnabledMask) { mInternalDirtyBits.set(DIRTY_BIT_BLEND_STATE); } break; } case gl::State::DIRTY_BIT_SAMPLE_ALPHA_TO_COVERAGE_ENABLED: if (state.isSampleAlphaToCoverageEnabled() != mCurSampleAlphaToCoverage) { mInternalDirtyBits.set(DIRTY_BIT_BLEND_STATE); } break; case gl::State::DIRTY_BIT_DITHER_ENABLED: if (state.getRasterizerState().dither != mCurRasterState.dither) { mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); } break; case gl::State::DIRTY_BIT_COLOR_MASK: { if (state.getBlendStateExt().mColorMask != mCurBlendStateExt.mColorMask) { mInternalDirtyBits.set(DIRTY_BIT_BLEND_STATE); } break; } case gl::State::DIRTY_BIT_BLEND_COLOR: if (state.getBlendColor() != mCurBlendColor) { mInternalDirtyBits.set(DIRTY_BIT_BLEND_STATE); } break; // Depth and stencil redundant state changes are guarded in the // frontend so for related cases here just set the dirty bit. case gl::State::DIRTY_BIT_DEPTH_MASK: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_DEPTH_TEST_ENABLED: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_DEPTH_FUNC: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_STENCIL_TEST_ENABLED: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_STENCIL_FUNCS_FRONT: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_STENCIL_FUNCS_BACK: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_STENCIL_WRITEMASK_FRONT: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_STENCIL_WRITEMASK_BACK: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_STENCIL_OPS_FRONT: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_STENCIL_OPS_BACK: mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); break; case gl::State::DIRTY_BIT_CULL_FACE_ENABLED: if (state.getRasterizerState().cullFace != mCurRasterState.cullFace) { mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); mInternalDirtyBits.set(DIRTY_BIT_PRIMITIVE_TOPOLOGY); } break; case gl::State::DIRTY_BIT_CULL_FACE: if (state.getRasterizerState().cullMode != mCurRasterState.cullMode) { mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); mInternalDirtyBits.set(DIRTY_BIT_PRIMITIVE_TOPOLOGY); } break; case gl::State::DIRTY_BIT_FRONT_FACE: if (state.getRasterizerState().frontFace != mCurRasterState.frontFace) { mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); mInternalDirtyBits.set(DIRTY_BIT_PRIMITIVE_TOPOLOGY); } break; case gl::State::DIRTY_BIT_POLYGON_OFFSET_FILL_ENABLED: if (state.getRasterizerState().polygonOffsetFill != mCurRasterState.polygonOffsetFill) { mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); } break; case gl::State::DIRTY_BIT_POLYGON_OFFSET: { const gl::RasterizerState &rasterState = state.getRasterizerState(); if (rasterState.polygonOffsetFactor != mCurRasterState.polygonOffsetFactor || rasterState.polygonOffsetUnits != mCurRasterState.polygonOffsetUnits) { mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); } break; } case gl::State::DIRTY_BIT_RASTERIZER_DISCARD_ENABLED: if (state.getRasterizerState().rasterizerDiscard != mCurRasterState.rasterizerDiscard) { mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); // Enabling/disabling rasterizer discard affects the pixel shader. invalidateShaders(); } break; case gl::State::DIRTY_BIT_SCISSOR: if (state.getScissor() != mCurScissorRect) { mInternalDirtyBits.set(DIRTY_BIT_SCISSOR_STATE); } break; case gl::State::DIRTY_BIT_SCISSOR_TEST_ENABLED: if (state.isScissorTestEnabled() != mCurScissorEnabled) { mInternalDirtyBits.set(DIRTY_BIT_SCISSOR_STATE); // Rasterizer state update needs mCurScissorsEnabled and updates when it changes mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); } break; case gl::State::DIRTY_BIT_DEPTH_RANGE: invalidateViewport(context); break; case gl::State::DIRTY_BIT_VIEWPORT: if (state.getViewport() != mCurViewport) { invalidateViewport(context); } break; case gl::State::DIRTY_BIT_DRAW_FRAMEBUFFER_BINDING: invalidateRenderTarget(); if (mIsMultiviewEnabled) { handleMultiviewDrawFramebufferChange(context); } mFramebuffer11 = GetImplAs(state.getDrawFramebuffer()); break; case gl::State::DIRTY_BIT_VERTEX_ARRAY_BINDING: invalidateVertexBuffer(); // Force invalidate the current value attributes, since the VertexArray11 keeps an // internal cache of TranslatedAttributes, and they CurrentValue attributes are // owned by the StateManager11/Context. mDirtyCurrentValueAttribs.set(); // Invalidate the cached index buffer. invalidateIndexBuffer(); mVertexArray11 = GetImplAs(state.getVertexArray()); break; case gl::State::DIRTY_BIT_UNIFORM_BUFFER_BINDINGS: invalidateProgramUniformBuffers(); break; case gl::State::DIRTY_BIT_ATOMIC_COUNTER_BUFFER_BINDING: invalidateProgramAtomicCounterBuffers(); break; case gl::State::DIRTY_BIT_SHADER_STORAGE_BUFFER_BINDING: invalidateProgramShaderStorageBuffers(); break; case gl::State::DIRTY_BIT_TEXTURE_BINDINGS: invalidateTexturesAndSamplers(); break; case gl::State::DIRTY_BIT_SAMPLER_BINDINGS: invalidateTexturesAndSamplers(); break; case gl::State::DIRTY_BIT_IMAGE_BINDINGS: // TODO(jie.a.chen@intel.com): More fine-grained update. // Currently images are updated together with textures and samplers. It would be // better to update them separately. // http://anglebug.com/2814 invalidateTexturesAndSamplers(); break; case gl::State::DIRTY_BIT_TRANSFORM_FEEDBACK_BINDING: invalidateTransformFeedback(); break; case gl::State::DIRTY_BIT_PROGRAM_BINDING: mProgramD3D = GetImplAs(state.getProgram()); break; case gl::State::DIRTY_BIT_PROGRAM_EXECUTABLE: { invalidateShaders(); invalidateTexturesAndSamplers(); invalidateProgramUniforms(); invalidateProgramUniformBuffers(); invalidateProgramAtomicCounterBuffers(); invalidateProgramShaderStorageBuffers(); invalidateDriverUniforms(); const gl::ProgramExecutable *executable = state.getProgramExecutable(); if (!executable || !executable->isCompute()) { mInternalDirtyBits.set(DIRTY_BIT_PRIMITIVE_TOPOLOGY); invalidateVertexBuffer(); invalidateRenderTarget(); // If OVR_multiview2 is enabled, the attribute divisor has to be updated for // each binding. When using compute, there could be no vertex array. if (mIsMultiviewEnabled && mVertexArray11) { ASSERT(mProgramD3D); ASSERT(mVertexArray11 == GetImplAs(state.getVertexArray())); const gl::ProgramState &programState = mProgramD3D->getState(); int numViews = programState.usesMultiview() ? programState.getNumViews() : 1; mVertexArray11->markAllAttributeDivisorsForAdjustment(numViews); } } break; } case gl::State::DIRTY_BIT_CURRENT_VALUES: { for (auto attribIndex : state.getAndResetDirtyCurrentValues()) { invalidateCurrentValueAttrib(attribIndex); } break; } case gl::State::DIRTY_BIT_PROVOKING_VERTEX: invalidateShaders(); break; default: break; } } // TODO(jmadill): Input layout and vertex buffer state. } void StateManager11::handleMultiviewDrawFramebufferChange(const gl::Context *context) { const auto &glState = context->getState(); const gl::Framebuffer *drawFramebuffer = glState.getDrawFramebuffer(); ASSERT(drawFramebuffer != nullptr); if (drawFramebuffer->isMultiview()) { // Because the base view index is applied as an offset to the 2D texture array when the // RTV is created, we just have to pass a boolean to select which code path is to be // used. mShaderConstants.setMultiviewWriteToViewportIndex(0.0f); } } angle::Result StateManager11::syncBlendState(const gl::Context *context, const gl::BlendStateExt &blendStateExt, const gl::ColorF &blendColor, unsigned int sampleMask, bool sampleAlphaToCoverage, bool emulateConstantAlpha) { const d3d11::BlendState *dxBlendState = nullptr; const d3d11::BlendStateKey &key = RenderStateCache::GetBlendStateKey( context, mFramebuffer11, blendStateExt, sampleAlphaToCoverage); ANGLE_TRY(mRenderer->getBlendState(context, key, &dxBlendState)); ASSERT(dxBlendState != nullptr); // D3D11 does not support CONSTANT_ALPHA as source or destination color factor, so ANGLE sets // the factor to CONSTANT_COLOR and swizzles the color value to aaaa. For this reason, it's // impossible to simultaneously use CONSTANT_ALPHA and CONSTANT_COLOR as source or destination // color factors in the same blend state. This is enforced in the validation layer. float blendColors[4] = {0.0f}; blendColors[0] = emulateConstantAlpha ? blendColor.alpha : blendColor.red; blendColors[1] = emulateConstantAlpha ? blendColor.alpha : blendColor.green; blendColors[2] = emulateConstantAlpha ? blendColor.alpha : blendColor.blue; blendColors[3] = blendColor.alpha; mRenderer->getDeviceContext()->OMSetBlendState(dxBlendState->get(), blendColors, sampleMask); mCurBlendStateExt = blendStateExt; mCurBlendColor = blendColor; mCurSampleMask = sampleMask; mCurSampleAlphaToCoverage = sampleAlphaToCoverage; return angle::Result::Continue; } angle::Result StateManager11::syncDepthStencilState(const gl::Context *context) { const gl::State &glState = context->getState(); mCurDepthStencilState = glState.getDepthStencilState(); mCurStencilRef = glState.getStencilRef(); mCurStencilBackRef = glState.getStencilBackRef(); // get the maximum size of the stencil ref unsigned int maxStencil = 0; if (mCurDepthStencilState.stencilTest && mCurStencilSize > 0) { maxStencil = (1 << mCurStencilSize) - 1; } ASSERT((mCurDepthStencilState.stencilWritemask & maxStencil) == (mCurDepthStencilState.stencilBackWritemask & maxStencil)); ASSERT(gl::clamp(mCurStencilRef, 0, static_cast(maxStencil)) == gl::clamp(mCurStencilBackRef, 0, static_cast(maxStencil))); ASSERT((mCurDepthStencilState.stencilMask & maxStencil) == (mCurDepthStencilState.stencilBackMask & maxStencil)); gl::DepthStencilState modifiedGLState = glState.getDepthStencilState(); ASSERT(mCurDisableDepth.valid() && mCurDisableStencil.valid()); if (mCurDisableDepth.value()) { modifiedGLState.depthTest = false; modifiedGLState.depthMask = false; } if (mCurDisableStencil.value()) { modifiedGLState.stencilTest = false; } if (!modifiedGLState.stencilTest) { modifiedGLState.stencilWritemask = 0; modifiedGLState.stencilBackWritemask = 0; } // If STENCIL_TEST is disabled in glState, stencil testing and writing should be disabled. // Verify that's true in the modifiedGLState so it is propagated to d3dState. ASSERT(glState.getDepthStencilState().stencilTest || (!modifiedGLState.stencilTest && modifiedGLState.stencilWritemask == 0 && modifiedGLState.stencilBackWritemask == 0)); const d3d11::DepthStencilState *d3dState = nullptr; ANGLE_TRY(mRenderer->getDepthStencilState(context, modifiedGLState, &d3dState)); ASSERT(d3dState); // Max D3D11 stencil reference value is 0xFF, // corresponding to the max 8 bits in a stencil buffer // GL specifies we should clamp the ref value to the // nearest bit depth when doing stencil ops static_assert(D3D11_DEFAULT_STENCIL_READ_MASK == 0xFF, "Unexpected value of D3D11_DEFAULT_STENCIL_READ_MASK"); static_assert(D3D11_DEFAULT_STENCIL_WRITE_MASK == 0xFF, "Unexpected value of D3D11_DEFAULT_STENCIL_WRITE_MASK"); UINT dxStencilRef = static_cast(gl::clamp(mCurStencilRef, 0, 0xFF)); mRenderer->getDeviceContext()->OMSetDepthStencilState(d3dState->get(), dxStencilRef); return angle::Result::Continue; } angle::Result StateManager11::syncRasterizerState(const gl::Context *context, gl::PrimitiveMode mode) { // TODO: Remove pointDrawMode and multiSample from gl::RasterizerState. gl::RasterizerState rasterState = context->getState().getRasterizerState(); rasterState.pointDrawMode = (mode == gl::PrimitiveMode::Points); rasterState.multiSample = mCurRasterState.multiSample; ID3D11RasterizerState *dxRasterState = nullptr; if (mCurPresentPathFastEnabled) { gl::RasterizerState modifiedRasterState = rasterState; // If prseent path fast is active then we need invert the front face state. // This ensures that both gl_FrontFacing is correct, and front/back culling // is performed correctly. if (modifiedRasterState.frontFace == GL_CCW) { modifiedRasterState.frontFace = GL_CW; } else { ASSERT(modifiedRasterState.frontFace == GL_CW); modifiedRasterState.frontFace = GL_CCW; } ANGLE_TRY(mRenderer->getRasterizerState(context, modifiedRasterState, mCurScissorEnabled, &dxRasterState)); } else { ANGLE_TRY(mRenderer->getRasterizerState(context, rasterState, mCurScissorEnabled, &dxRasterState)); } mRenderer->getDeviceContext()->RSSetState(dxRasterState); mCurRasterState = rasterState; return angle::Result::Continue; } void StateManager11::syncScissorRectangle(const gl::Context *context) { const auto &glState = context->getState(); gl::Framebuffer *framebuffer = glState.getDrawFramebuffer(); const gl::Rectangle &scissor = glState.getScissor(); const bool enabled = glState.isScissorTestEnabled(); mCurScissorOffset = framebuffer->getSurfaceTextureOffset(); int scissorX = scissor.x + mCurScissorOffset.x; int scissorY = scissor.y + mCurScissorOffset.y; if (mCurPresentPathFastEnabled) { scissorY = mCurPresentPathFastColorBufferHeight - scissor.height - scissor.y; } if (enabled) { D3D11_RECT rect; int x = scissorX; int y = scissorY; rect.left = std::max(0, x); rect.top = std::max(0, y); rect.right = x + std::max(0, scissor.width); rect.bottom = y + std::max(0, scissor.height); mRenderer->getDeviceContext()->RSSetScissorRects(1, &rect); } mCurScissorRect = scissor; mCurScissorEnabled = enabled; } void StateManager11::syncViewport(const gl::Context *context) { const auto &glState = context->getState(); gl::Framebuffer *framebuffer = glState.getDrawFramebuffer(); float actualZNear = gl::clamp01(glState.getNearPlane()); float actualZFar = gl::clamp01(glState.getFarPlane()); const auto &caps = context->getCaps(); int dxMaxViewportBoundsX = caps.maxViewportWidth; int dxMaxViewportBoundsY = caps.maxViewportHeight; int dxMinViewportBoundsX = -dxMaxViewportBoundsX; int dxMinViewportBoundsY = -dxMaxViewportBoundsY; bool is9_3 = mRenderer->getRenderer11DeviceCaps().featureLevel <= D3D_FEATURE_LEVEL_9_3; if (is9_3) { // Feature Level 9 viewports shouldn't exceed the dimensions of the rendertarget. dxMaxViewportBoundsX = static_cast(mViewportBounds.width); dxMaxViewportBoundsY = static_cast(mViewportBounds.height); dxMinViewportBoundsX = 0; dxMinViewportBoundsY = 0; } const auto &viewport = glState.getViewport(); int dxViewportTopLeftX = 0; int dxViewportTopLeftY = 0; int dxViewportWidth = 0; int dxViewportHeight = 0; mCurViewportOffset = framebuffer->getSurfaceTextureOffset(); dxViewportTopLeftX = gl::clamp(viewport.x + mCurViewportOffset.x, dxMinViewportBoundsX, dxMaxViewportBoundsX); dxViewportTopLeftY = gl::clamp(viewport.y + mCurViewportOffset.y, dxMinViewportBoundsY, dxMaxViewportBoundsY); dxViewportWidth = gl::clamp(viewport.width, 0, dxMaxViewportBoundsX - dxViewportTopLeftX); dxViewportHeight = gl::clamp(viewport.height, 0, dxMaxViewportBoundsY - dxViewportTopLeftY); D3D11_VIEWPORT dxViewport; dxViewport.TopLeftX = static_cast(dxViewportTopLeftX); if (mCurPresentPathFastEnabled) { // When present path fast is active and we're rendering to framebuffer 0, we must invert // the viewport in Y-axis. // NOTE: We delay the inversion until right before the call to RSSetViewports, and leave // dxViewportTopLeftY unchanged. This allows us to calculate viewAdjust below using the // unaltered dxViewportTopLeftY value. dxViewport.TopLeftY = static_cast(mCurPresentPathFastColorBufferHeight - dxViewportTopLeftY - dxViewportHeight); } else { dxViewport.TopLeftY = static_cast(dxViewportTopLeftY); } // The es 3.1 spec section 9.2 states that, "If there are no attachments, rendering // will be limited to a rectangle having a lower left of (0, 0) and an upper right of // (width, height), where width and height are the framebuffer object's default width // and height." See http://anglebug.com/1594 // If the Framebuffer has no color attachment and the default width or height is smaller // than the current viewport, use the smaller of the two sizes. // If framebuffer default width or height is 0, the params should not set. if (!framebuffer->getFirstNonNullAttachment() && (framebuffer->getDefaultWidth() || framebuffer->getDefaultHeight())) { dxViewport.Width = static_cast(std::min(viewport.width, framebuffer->getDefaultWidth())); dxViewport.Height = static_cast(std::min(viewport.height, framebuffer->getDefaultHeight())); } else { dxViewport.Width = static_cast(dxViewportWidth); dxViewport.Height = static_cast(dxViewportHeight); } dxViewport.MinDepth = actualZNear; dxViewport.MaxDepth = actualZFar; mRenderer->getDeviceContext()->RSSetViewports(1, &dxViewport); mCurViewport = viewport; mCurNear = actualZNear; mCurFar = actualZFar; const D3D11_VIEWPORT adjustViewport = {static_cast(dxViewportTopLeftX), static_cast(dxViewportTopLeftY), static_cast(dxViewportWidth), static_cast(dxViewportHeight), actualZNear, actualZFar}; mShaderConstants.onViewportChange(viewport, adjustViewport, is9_3, mCurPresentPathFastEnabled); } void StateManager11::invalidateRenderTarget() { mRenderTargetIsDirty = true; } void StateManager11::processFramebufferInvalidation(const gl::Context *context) { ASSERT(mRenderTargetIsDirty); ASSERT(context); mInternalDirtyBits.set(DIRTY_BIT_RENDER_TARGET); // The pixel shader is dependent on the output layout. invalidateShaders(); // The D3D11 blend state is heavily dependent on the current render target. mInternalDirtyBits.set(DIRTY_BIT_BLEND_STATE); gl::Framebuffer *fbo = context->getState().getDrawFramebuffer(); ASSERT(fbo); // Dirty scissor and viewport because surface texture offset might have changed. if (mCurViewportOffset != fbo->getSurfaceTextureOffset()) { mInternalDirtyBits.set(DIRTY_BIT_VIEWPORT_STATE); } if (mCurScissorOffset != fbo->getSurfaceTextureOffset()) { mInternalDirtyBits.set(DIRTY_BIT_SCISSOR_STATE); } // Disable the depth test/depth write if we are using a stencil-only attachment. // This is because ANGLE emulates stencil-only with D24S8 on D3D11 - we should neither read // nor write to the unused depth part of this emulated texture. bool disableDepth = (!fbo->hasDepth() && fbo->hasStencil()); // Similarly we disable the stencil portion of the DS attachment if the app only binds depth. bool disableStencil = (fbo->hasDepth() && !fbo->hasStencil()); if (!mCurDisableDepth.valid() || disableDepth != mCurDisableDepth.value() || !mCurDisableStencil.valid() || disableStencil != mCurDisableStencil.value()) { mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); mCurDisableDepth = disableDepth; mCurDisableStencil = disableStencil; } bool multiSample = (fbo->getSamples(context) != 0); if (multiSample != mCurRasterState.multiSample) { mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); mCurRasterState.multiSample = multiSample; } checkPresentPath(context); if (mRenderer->getRenderer11DeviceCaps().featureLevel <= D3D_FEATURE_LEVEL_9_3) { const auto *firstAttachment = fbo->getFirstNonNullAttachment(); if (firstAttachment) { const auto &size = firstAttachment->getSize(); if (mViewportBounds.width != size.width || mViewportBounds.height != size.height) { mViewportBounds = gl::Extents(size.width, size.height, 1); invalidateViewport(context); } } } } void StateManager11::invalidateBoundViews() { for (SRVCache &curShaderSRV : mCurShaderSRVs) { curShaderSRV.clear(); } invalidateRenderTarget(); } void StateManager11::invalidateVertexBuffer() { unsigned int limit = std::min(mRenderer->getNativeCaps().maxVertexAttributes, gl::MAX_VERTEX_ATTRIBS); mDirtyVertexBufferRange = gl::RangeUI(0, limit); invalidateInputLayout(); invalidateShaders(); mInternalDirtyBits.set(DIRTY_BIT_CURRENT_VALUE_ATTRIBS); } void StateManager11::invalidateViewport(const gl::Context *context) { mInternalDirtyBits.set(DIRTY_BIT_VIEWPORT_STATE); // Viewport affects the driver constants. invalidateDriverUniforms(); } void StateManager11::invalidateTexturesAndSamplers() { mInternalDirtyBits.set(DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE); invalidateSwizzles(); // Texture state affects the driver uniforms (base level, etc). invalidateDriverUniforms(); } void StateManager11::invalidateSwizzles() { mDirtySwizzles = true; } void StateManager11::invalidateProgramUniforms() { mInternalDirtyBits.set(DIRTY_BIT_PROGRAM_UNIFORMS); } void StateManager11::invalidateDriverUniforms() { mInternalDirtyBits.set(DIRTY_BIT_DRIVER_UNIFORMS); } void StateManager11::invalidateProgramUniformBuffers() { mInternalDirtyBits.set(DIRTY_BIT_PROGRAM_UNIFORM_BUFFERS); } void StateManager11::invalidateProgramAtomicCounterBuffers() { mInternalDirtyBits.set(DIRTY_BIT_PROGRAM_ATOMIC_COUNTER_BUFFERS); } void StateManager11::invalidateProgramShaderStorageBuffers() { mInternalDirtyBits.set(DIRTY_BIT_PROGRAM_SHADER_STORAGE_BUFFERS); } void StateManager11::invalidateConstantBuffer(unsigned int slot) { if (slot == d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DRIVER) { invalidateDriverUniforms(); } else if (slot == d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DEFAULT_UNIFORM_BLOCK) { invalidateProgramUniforms(); } else { invalidateProgramUniformBuffers(); } } void StateManager11::invalidateShaders() { mInternalDirtyBits.set(DIRTY_BIT_SHADERS); } void StateManager11::invalidateTransformFeedback() { // Transform feedback affects the stream-out geometry shader. invalidateShaders(); mInternalDirtyBits.set(DIRTY_BIT_TRANSFORM_FEEDBACK); // syncPrimitiveTopology checks the transform feedback state. mInternalDirtyBits.set(DIRTY_BIT_PRIMITIVE_TOPOLOGY); } void StateManager11::invalidateInputLayout() { mInternalDirtyBits.set(DIRTY_BIT_VERTEX_BUFFERS_AND_INPUT_LAYOUT); } void StateManager11::invalidateIndexBuffer() { mIndexBufferIsDirty = true; } void StateManager11::setRenderTarget(ID3D11RenderTargetView *rtv, ID3D11DepthStencilView *dsv) { if (rtv) { unsetConflictingView(gl::PipelineType::GraphicsPipeline, rtv, true); } if (dsv) { unsetConflictingView(gl::PipelineType::GraphicsPipeline, dsv, true); } mRenderer->getDeviceContext()->OMSetRenderTargets(1, &rtv, dsv); mCurRTVs.clear(); mCurRTVs.update(0, rtv); mInternalDirtyBits.set(DIRTY_BIT_RENDER_TARGET); } void StateManager11::setRenderTargets(ID3D11RenderTargetView **rtvs, UINT numRTVs, ID3D11DepthStencilView *dsv) { for (UINT rtvIndex = 0; rtvIndex < numRTVs; ++rtvIndex) { unsetConflictingView(gl::PipelineType::GraphicsPipeline, rtvs[rtvIndex], true); } if (dsv) { unsetConflictingView(gl::PipelineType::GraphicsPipeline, dsv, true); } mRenderer->getDeviceContext()->OMSetRenderTargets(numRTVs, (numRTVs > 0) ? rtvs : nullptr, dsv); mCurRTVs.clear(); for (UINT i = 0; i < numRTVs; i++) { mCurRTVs.update(i, rtvs[i]); } mInternalDirtyBits.set(DIRTY_BIT_RENDER_TARGET); } void StateManager11::onBeginQuery(Query11 *query) { mCurrentQueries.insert(query); } void StateManager11::onDeleteQueryObject(Query11 *query) { mCurrentQueries.erase(query); } angle::Result StateManager11::onMakeCurrent(const gl::Context *context) { ANGLE_TRY(ensureInitialized(context)); const gl::State &state = context->getState(); Context11 *context11 = GetImplAs(context); for (Query11 *query : mCurrentQueries) { ANGLE_TRY(query->pause(context11)); } mCurrentQueries.clear(); for (gl::QueryType type : angle::AllEnums()) { gl::Query *query = state.getActiveQuery(type); if (query != nullptr) { Query11 *query11 = GetImplAs(query); ANGLE_TRY(query11->resume(context11)); mCurrentQueries.insert(query11); } } // Reset the cache objects. mProgramD3D = nullptr; mVertexArray11 = nullptr; mFramebuffer11 = nullptr; return angle::Result::Continue; } void StateManager11::unsetConflictingView(gl::PipelineType pipeline, ID3D11View *view, bool isRenderTarget) { uintptr_t resource = reinterpret_cast(GetViewResource(view)); unsetConflictingSRVs(pipeline, gl::ShaderType::Vertex, resource, nullptr, isRenderTarget); unsetConflictingSRVs(pipeline, gl::ShaderType::Fragment, resource, nullptr, isRenderTarget); unsetConflictingSRVs(pipeline, gl::ShaderType::Compute, resource, nullptr, isRenderTarget); unsetConflictingUAVs(pipeline, gl::ShaderType::Compute, resource, nullptr); } void StateManager11::unsetConflictingSRVs(gl::PipelineType pipeline, gl::ShaderType shaderType, uintptr_t resource, const gl::ImageIndex *index, bool isRenderTarget) { auto *currentSRVs = getSRVCache(shaderType); gl::PipelineType conflictPipeline = gl::GetPipelineType(shaderType); bool foundOne = false; size_t count = std::min(currentSRVs->size(), currentSRVs->highestUsed()); for (size_t resourceIndex = 0; resourceIndex < count; ++resourceIndex) { auto &record = (*currentSRVs)[resourceIndex]; if (record.view && record.resource == resource && (!index || ImageIndexConflictsWithSRV(*index, record.desc))) { setShaderResourceInternal( shaderType, static_cast(resourceIndex), nullptr); foundOne = true; } } if (foundOne && (pipeline != conflictPipeline || isRenderTarget)) { switch (conflictPipeline) { case gl::PipelineType::GraphicsPipeline: mInternalDirtyBits.set(DIRTY_BIT_GRAPHICS_SRVUAV_STATE); break; case gl::PipelineType::ComputePipeline: mInternalDirtyBits.set(DIRTY_BIT_COMPUTE_SRVUAV_STATE); break; default: UNREACHABLE(); } } } void StateManager11::unsetConflictingUAVs(gl::PipelineType pipeline, gl::ShaderType shaderType, uintptr_t resource, const gl::ImageIndex *index) { ASSERT(shaderType == gl::ShaderType::Compute); bool foundOne = false; ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); size_t count = std::min(mCurComputeUAVs.size(), mCurComputeUAVs.highestUsed()); for (size_t resourceIndex = 0; resourceIndex < count; ++resourceIndex) { auto &record = mCurComputeUAVs[resourceIndex]; if (record.view && record.resource == resource && (!index || ImageIndexConflictsWithUAV(*index, record.desc))) { deviceContext->CSSetUnorderedAccessViews(static_cast(resourceIndex), 1, &mNullUAVs[0], nullptr); mCurComputeUAVs.update(resourceIndex, nullptr); foundOne = true; } } if (foundOne && pipeline == gl::PipelineType::GraphicsPipeline) { mInternalDirtyBits.set(DIRTY_BIT_COMPUTE_SRVUAV_STATE); } } void StateManager11::unsetConflictingRTVs(uintptr_t resource) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); size_t count = std::min(mCurRTVs.size(), mCurRTVs.highestUsed()); for (size_t resourceIndex = 0; resourceIndex < count; ++resourceIndex) { auto &record = mCurRTVs[resourceIndex]; if (record.view && record.resource == resource) { deviceContext->OMSetRenderTargets(0, nullptr, nullptr); mCurRTVs.clear(); mInternalDirtyBits.set(DIRTY_BIT_RENDER_TARGET); return; } } } void StateManager11::unsetConflictingAttachmentResources( const gl::FramebufferAttachment &attachment, ID3D11Resource *resource) { // Unbind render target SRVs from the shader here to prevent D3D11 warnings. if (attachment.type() == GL_TEXTURE) { uintptr_t resourcePtr = reinterpret_cast(resource); const gl::ImageIndex &index = attachment.getTextureImageIndex(); // The index doesn't need to be corrected for the small compressed texture workaround // because a rendertarget is never compressed. unsetConflictingSRVs(gl::PipelineType::GraphicsPipeline, gl::ShaderType::Vertex, resourcePtr, &index, false); unsetConflictingSRVs(gl::PipelineType::GraphicsPipeline, gl::ShaderType::Fragment, resourcePtr, &index, false); unsetConflictingSRVs(gl::PipelineType::GraphicsPipeline, gl::ShaderType::Compute, resourcePtr, &index, false); unsetConflictingUAVs(gl::PipelineType::GraphicsPipeline, gl::ShaderType::Compute, resourcePtr, &index); } else if (attachment.type() == GL_FRAMEBUFFER_DEFAULT) { uintptr_t resourcePtr = reinterpret_cast(resource); unsetConflictingSRVs(gl::PipelineType::GraphicsPipeline, gl::ShaderType::Vertex, resourcePtr, nullptr, false); unsetConflictingSRVs(gl::PipelineType::GraphicsPipeline, gl::ShaderType::Fragment, resourcePtr, nullptr, false); unsetConflictingSRVs(gl::PipelineType::GraphicsPipeline, gl::ShaderType::Compute, resourcePtr, nullptr, false); unsetConflictingUAVs(gl::PipelineType::GraphicsPipeline, gl::ShaderType::Compute, resourcePtr, nullptr); } } angle::Result StateManager11::ensureInitialized(const gl::Context *context) { Renderer11 *renderer = GetImplAs(context)->getRenderer(); const gl::Caps &caps = renderer->getNativeCaps(); const gl::Extensions &extensions = renderer->getNativeExtensions(); for (gl::ShaderType shaderType : gl::AllShaderTypes()) { const GLuint maxShaderTextureImageUnits = static_cast(caps.maxShaderTextureImageUnits[shaderType]); mCurShaderSRVs[shaderType].initialize(maxShaderTextureImageUnits); mForceSetShaderSamplerStates[shaderType].resize(maxShaderTextureImageUnits, true); mCurShaderSamplerStates[shaderType].resize(maxShaderTextureImageUnits); } mCurRTVs.initialize(caps.maxColorAttachments); mCurComputeUAVs.initialize(caps.maxImageUnits); // Initialize cached NULL SRV block mNullSRVs.resize(caps.maxShaderTextureImageUnits[gl::ShaderType::Fragment], nullptr); mNullUAVs.resize(caps.maxImageUnits, nullptr); mCurrentValueAttribs.resize(caps.maxVertexAttributes); mShaderConstants.init(caps); mIsMultiviewEnabled = extensions.multiview || extensions.multiview2; mIndependentBlendStates = extensions.drawBuffersIndexedAny(); // requires FL10_1 // FL9_3 is limited to 4; ES3.1 context on FL11_0 is limited to 7 mCurBlendStateExt = gl::BlendStateExt(GetImplAs(context)->getNativeCaps().maxDrawBuffers); ANGLE_TRY(mVertexDataManager.initialize(context)); mCurrentAttributes.reserve(gl::MAX_VERTEX_ATTRIBS); return angle::Result::Continue; } void StateManager11::deinitialize() { mCurrentValueAttribs.clear(); mInputLayoutCache.clear(); mVertexDataManager.deinitialize(); mIndexDataManager.deinitialize(); for (d3d11::Buffer &ShaderDriverConstantBuffer : mShaderDriverConstantBuffers) { ShaderDriverConstantBuffer.reset(); } mPointSpriteVertexBuffer.reset(); mPointSpriteIndexBuffer.reset(); } // Applies the render target surface, depth stencil surface, viewport rectangle and // scissor rectangle to the renderer angle::Result StateManager11::syncFramebuffer(const gl::Context *context) { // Check for zero-sized default framebuffer, which is a special case. // in this case we do not wish to modify any state and just silently return false. // this will not report any gl error but will cause the calling method to return. if (mFramebuffer11->getState().isDefault()) { RenderTarget11 *firstRT = mFramebuffer11->getFirstRenderTarget(); const gl::Extents &size = firstRT->getExtents(); if (size.empty()) { return angle::Result::Continue; } } RTVArray framebufferRTVs = {{}}; const auto &colorRTs = mFramebuffer11->getCachedColorRenderTargets(); size_t appliedRTIndex = 0; bool skipInactiveRTs = mRenderer->getFeatures().mrtPerfWorkaround.enabled; const auto &drawStates = mFramebuffer11->getState().getDrawBufferStates(); gl::DrawBufferMask activeProgramOutputs = mProgramD3D->getState().getActiveOutputVariables(); UINT maxExistingRT = 0; const auto &colorAttachments = mFramebuffer11->getState().getColorAttachments(); for (size_t rtIndex = 0; rtIndex < colorRTs.size(); ++rtIndex) { const RenderTarget11 *renderTarget = colorRTs[rtIndex]; // Skip inactive rendertargets if the workaround is enabled. if (skipInactiveRTs && (!renderTarget || drawStates[rtIndex] == GL_NONE || !activeProgramOutputs[rtIndex])) { continue; } if (renderTarget) { framebufferRTVs[appliedRTIndex] = renderTarget->getRenderTargetView().get(); ASSERT(framebufferRTVs[appliedRTIndex]); maxExistingRT = static_cast(appliedRTIndex) + 1; // Unset conflicting texture SRVs const gl::FramebufferAttachment &attachment = colorAttachments[rtIndex]; ASSERT(attachment.isAttached()); unsetConflictingAttachmentResources(attachment, renderTarget->getTexture().get()); } appliedRTIndex++; } // Get the depth stencil buffers ID3D11DepthStencilView *framebufferDSV = nullptr; const auto *depthStencilRenderTarget = mFramebuffer11->getCachedDepthStencilRenderTarget(); if (depthStencilRenderTarget) { framebufferDSV = depthStencilRenderTarget->getDepthStencilView().get(); ASSERT(framebufferDSV); // Unset conflicting texture SRVs const gl::FramebufferAttachment *attachment = mFramebuffer11->getState().getDepthOrStencilAttachment(); ASSERT(attachment); unsetConflictingAttachmentResources(*attachment, depthStencilRenderTarget->getTexture().get()); } ASSERT(maxExistingRT <= static_cast(context->getCaps().maxDrawBuffers)); // Apply the render target and depth stencil mRenderer->getDeviceContext()->OMSetRenderTargets(maxExistingRT, framebufferRTVs.data(), framebufferDSV); mCurRTVs.clear(); for (UINT i = 0; i < maxExistingRT; i++) { mCurRTVs.update(i, framebufferRTVs[i]); } return angle::Result::Continue; } void StateManager11::invalidateCurrentValueAttrib(size_t attribIndex) { mDirtyCurrentValueAttribs.set(attribIndex); mInternalDirtyBits.set(DIRTY_BIT_CURRENT_VALUE_ATTRIBS); invalidateInputLayout(); invalidateShaders(); } angle::Result StateManager11::syncCurrentValueAttribs( const gl::Context *context, const std::vector ¤tValues) { const gl::ProgramExecutable &executable = mProgramD3D->getState().getExecutable(); const auto &activeAttribsMask = executable.getActiveAttribLocationsMask(); const auto &dirtyActiveAttribs = (activeAttribsMask & mDirtyCurrentValueAttribs); if (!dirtyActiveAttribs.any()) { return angle::Result::Continue; } const auto &vertexAttributes = mVertexArray11->getState().getVertexAttributes(); const auto &vertexBindings = mVertexArray11->getState().getVertexBindings(); mDirtyCurrentValueAttribs = (mDirtyCurrentValueAttribs & ~dirtyActiveAttribs); for (auto attribIndex : dirtyActiveAttribs) { if (vertexAttributes[attribIndex].enabled) continue; const auto *attrib = &vertexAttributes[attribIndex]; const auto ¤tValue = currentValues[attribIndex]; TranslatedAttribute *currentValueAttrib = &mCurrentValueAttribs[attribIndex]; currentValueAttrib->currentValueType = currentValue.Type; currentValueAttrib->attribute = attrib; currentValueAttrib->binding = &vertexBindings[attrib->bindingIndex]; mDirtyVertexBufferRange.extend(static_cast(attribIndex)); ANGLE_TRY(mVertexDataManager.storeCurrentValue(context, currentValue, currentValueAttrib, static_cast(attribIndex))); } return angle::Result::Continue; } void StateManager11::setInputLayout(const d3d11::InputLayout *inputLayout) { if (setInputLayoutInternal(inputLayout)) { invalidateInputLayout(); } } bool StateManager11::setInputLayoutInternal(const d3d11::InputLayout *inputLayout) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); if (inputLayout == nullptr) { if (!mCurrentInputLayout.empty()) { deviceContext->IASetInputLayout(nullptr); mCurrentInputLayout.clear(); return true; } } else if (inputLayout->getSerial() != mCurrentInputLayout) { deviceContext->IASetInputLayout(inputLayout->get()); mCurrentInputLayout = inputLayout->getSerial(); return true; } return false; } bool StateManager11::queueVertexBufferChange(size_t bufferIndex, ID3D11Buffer *buffer, UINT stride, UINT offset) { if (buffer != mCurrentVertexBuffers[bufferIndex] || stride != mCurrentVertexStrides[bufferIndex] || offset != mCurrentVertexOffsets[bufferIndex]) { mDirtyVertexBufferRange.extend(static_cast(bufferIndex)); mCurrentVertexBuffers[bufferIndex] = buffer; mCurrentVertexStrides[bufferIndex] = stride; mCurrentVertexOffsets[bufferIndex] = offset; return true; } return false; } void StateManager11::applyVertexBufferChanges() { if (mDirtyVertexBufferRange.empty()) { return; } ASSERT(mDirtyVertexBufferRange.high() <= gl::MAX_VERTEX_ATTRIBS); UINT start = static_cast(mDirtyVertexBufferRange.low()); ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); deviceContext->IASetVertexBuffers(start, static_cast(mDirtyVertexBufferRange.length()), &mCurrentVertexBuffers[start], &mCurrentVertexStrides[start], &mCurrentVertexOffsets[start]); mDirtyVertexBufferRange = gl::RangeUI(gl::MAX_VERTEX_ATTRIBS, 0); } void StateManager11::setSingleVertexBuffer(const d3d11::Buffer *buffer, UINT stride, UINT offset) { ID3D11Buffer *native = buffer ? buffer->get() : nullptr; if (queueVertexBufferChange(0, native, stride, offset)) { invalidateInputLayout(); applyVertexBufferChanges(); } } angle::Result StateManager11::updateState(const gl::Context *context, gl::PrimitiveMode mode, GLint firstVertex, GLsizei vertexOrIndexCount, gl::DrawElementsType indexTypeOrInvalid, const void *indices, GLsizei instanceCount, GLint baseVertex, GLuint baseInstance, bool promoteDynamic) { const gl::State &glState = context->getState(); // TODO(jmadill): Use dirty bits. if (mRenderTargetIsDirty) { processFramebufferInvalidation(context); mRenderTargetIsDirty = false; } // TODO(jmadill): Use dirty bits. if (mProgramD3D->updateSamplerMapping() == ProgramD3D::SamplerMapping::WasDirty) { invalidateTexturesAndSamplers(); } // TODO(jmadill): Use dirty bits. if (mProgramD3D->anyShaderUniformsDirty()) { mInternalDirtyBits.set(DIRTY_BIT_PROGRAM_UNIFORMS); } // Swizzling can cause internal state changes with blit shaders. if (mDirtySwizzles) { ANGLE_TRY(generateSwizzles(context)); mDirtySwizzles = false; } ANGLE_TRY(mFramebuffer11->markAttachmentsDirty(context)); // TODO(jiawei.shao@intel.com): This can be recomputed only on framebuffer or multisample mask // state changes. RenderTarget11 *firstRT = mFramebuffer11->getFirstRenderTarget(); int samples = (firstRT ? firstRT->getSamples() : 0); unsigned int sampleMask = GetBlendSampleMask(glState, samples); if (sampleMask != mCurSampleMask) { mInternalDirtyBits.set(DIRTY_BIT_BLEND_STATE); } ANGLE_TRY(mVertexArray11->syncStateForDraw(context, firstVertex, vertexOrIndexCount, indexTypeOrInvalid, indices, instanceCount, baseVertex, baseInstance, promoteDynamic)); // Changes in the draw call can affect the vertex buffer translations. if (!mLastFirstVertex.valid() || mLastFirstVertex.value() != firstVertex) { mLastFirstVertex = firstVertex; invalidateInputLayout(); } // The ShaderConstants only need to be updated when the program uses vertexID if (mProgramD3D->usesVertexID()) { GLint firstVertexOnChange = firstVertex + baseVertex; ASSERT(mVertexArray11); if (mVertexArray11->hasActiveDynamicAttrib(context) && indexTypeOrInvalid != gl::DrawElementsType::InvalidEnum) { // drawElements with Dynamic attribute // the firstVertex is already including baseVertex when // doing ComputeStartVertex firstVertexOnChange = firstVertex; } if (mShaderConstants.onFirstVertexChange(firstVertexOnChange)) { mInternalDirtyBits.set(DIRTY_BIT_DRIVER_UNIFORMS); } } if (indexTypeOrInvalid != gl::DrawElementsType::InvalidEnum) { ANGLE_TRY(applyIndexBuffer(context, vertexOrIndexCount, indexTypeOrInvalid, indices)); } if (mLastAppliedDrawMode != mode) { mLastAppliedDrawMode = mode; mInternalDirtyBits.set(DIRTY_BIT_PRIMITIVE_TOPOLOGY); bool pointDrawMode = (mode == gl::PrimitiveMode::Points); if (pointDrawMode != mCurRasterState.pointDrawMode) { mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); // Changing from points to not points (or vice-versa) affects the geometry shader. invalidateShaders(); } } auto dirtyBitsCopy = mInternalDirtyBits & mGraphicsDirtyBitsMask; mInternalDirtyBits &= ~mGraphicsDirtyBitsMask; for (auto iter = dirtyBitsCopy.begin(), end = dirtyBitsCopy.end(); iter != end; ++iter) { switch (*iter) { case DIRTY_BIT_RENDER_TARGET: ANGLE_TRY(syncFramebuffer(context)); break; case DIRTY_BIT_VIEWPORT_STATE: syncViewport(context); break; case DIRTY_BIT_SCISSOR_STATE: syncScissorRectangle(context); break; case DIRTY_BIT_RASTERIZER_STATE: ANGLE_TRY(syncRasterizerState(context, mode)); break; case DIRTY_BIT_BLEND_STATE: ANGLE_TRY(syncBlendState( context, glState.getBlendStateExt(), glState.getBlendColor(), sampleMask, glState.isSampleAlphaToCoverageEnabled(), glState.hasConstantAlphaBlendFunc())); break; case DIRTY_BIT_DEPTH_STENCIL_STATE: ANGLE_TRY(syncDepthStencilState(context)); break; case DIRTY_BIT_GRAPHICS_SRVUAV_STATE: iter.resetLaterBit(DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE); ANGLE_TRY(syncTextures(context)); break; case DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE: // TODO(jmadill): More fine-grained update. ANGLE_TRY(syncTextures(context)); break; case DIRTY_BIT_PROGRAM_UNIFORMS: ANGLE_TRY(applyUniforms(context)); break; case DIRTY_BIT_DRIVER_UNIFORMS: // This must happen after viewport sync; the viewport affects builtin uniforms. ANGLE_TRY(applyDriverUniforms(context)); break; case DIRTY_BIT_PROGRAM_UNIFORM_BUFFERS: ANGLE_TRY(syncUniformBuffers(context)); break; case DIRTY_BIT_PROGRAM_ATOMIC_COUNTER_BUFFERS: // TODO(jie.a.chen@intel.com): http://anglebug.com/1729 break; case DIRTY_BIT_PROGRAM_SHADER_STORAGE_BUFFERS: // TODO(jie.a.chen@intel.com): http://anglebug.com/1951 break; case DIRTY_BIT_SHADERS: ANGLE_TRY(syncProgram(context, mode)); break; case DIRTY_BIT_CURRENT_VALUE_ATTRIBS: ANGLE_TRY(syncCurrentValueAttribs(context, glState.getVertexAttribCurrentValues())); break; case DIRTY_BIT_TRANSFORM_FEEDBACK: ANGLE_TRY(syncTransformFeedbackBuffers(context)); break; case DIRTY_BIT_VERTEX_BUFFERS_AND_INPUT_LAYOUT: ANGLE_TRY(syncVertexBuffersAndInputLayout(context, mode, firstVertex, vertexOrIndexCount, indexTypeOrInvalid, instanceCount)); break; case DIRTY_BIT_PRIMITIVE_TOPOLOGY: syncPrimitiveTopology(glState, mode); break; default: UNREACHABLE(); break; } } // Check that we haven't set any dirty bits in the flushing of the dirty bits loop, except // DIRTY_BIT_COMPUTE_SRVUAV_STATE dirty bit. ASSERT((mInternalDirtyBits & mGraphicsDirtyBitsMask).none()); return angle::Result::Continue; } void StateManager11::setShaderResourceShared(gl::ShaderType shaderType, UINT resourceSlot, const d3d11::SharedSRV *srv) { setShaderResourceInternal(shaderType, resourceSlot, srv); // TODO(jmadill): Narrower dirty region. mInternalDirtyBits.set(DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE); } void StateManager11::setShaderResource(gl::ShaderType shaderType, UINT resourceSlot, const d3d11::ShaderResourceView *srv) { setShaderResourceInternal(shaderType, resourceSlot, srv); // TODO(jmadill): Narrower dirty region. mInternalDirtyBits.set(DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE); } void StateManager11::setPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY primitiveTopology) { if (setPrimitiveTopologyInternal(primitiveTopology)) { mInternalDirtyBits.set(DIRTY_BIT_PRIMITIVE_TOPOLOGY); } } bool StateManager11::setPrimitiveTopologyInternal(D3D11_PRIMITIVE_TOPOLOGY primitiveTopology) { if (primitiveTopology != mCurrentPrimitiveTopology) { mRenderer->getDeviceContext()->IASetPrimitiveTopology(primitiveTopology); mCurrentPrimitiveTopology = primitiveTopology; return true; } else { return false; } } void StateManager11::setDrawShaders(const d3d11::VertexShader *vertexShader, const d3d11::GeometryShader *geometryShader, const d3d11::PixelShader *pixelShader) { setVertexShader(vertexShader); setGeometryShader(geometryShader); setPixelShader(pixelShader); } void StateManager11::setVertexShader(const d3d11::VertexShader *shader) { ResourceSerial serial = shader ? shader->getSerial() : ResourceSerial(0); if (serial != mAppliedShaders[gl::ShaderType::Vertex]) { ID3D11VertexShader *appliedShader = shader ? shader->get() : nullptr; mRenderer->getDeviceContext()->VSSetShader(appliedShader, nullptr, 0); mAppliedShaders[gl::ShaderType::Vertex] = serial; invalidateShaders(); } } void StateManager11::setGeometryShader(const d3d11::GeometryShader *shader) { ResourceSerial serial = shader ? shader->getSerial() : ResourceSerial(0); if (serial != mAppliedShaders[gl::ShaderType::Geometry]) { ID3D11GeometryShader *appliedShader = shader ? shader->get() : nullptr; mRenderer->getDeviceContext()->GSSetShader(appliedShader, nullptr, 0); mAppliedShaders[gl::ShaderType::Geometry] = serial; invalidateShaders(); } } void StateManager11::setPixelShader(const d3d11::PixelShader *shader) { ResourceSerial serial = shader ? shader->getSerial() : ResourceSerial(0); if (serial != mAppliedShaders[gl::ShaderType::Fragment]) { ID3D11PixelShader *appliedShader = shader ? shader->get() : nullptr; mRenderer->getDeviceContext()->PSSetShader(appliedShader, nullptr, 0); mAppliedShaders[gl::ShaderType::Fragment] = serial; invalidateShaders(); } } void StateManager11::setComputeShader(const d3d11::ComputeShader *shader) { ResourceSerial serial = shader ? shader->getSerial() : ResourceSerial(0); if (serial != mAppliedShaders[gl::ShaderType::Compute]) { ID3D11ComputeShader *appliedShader = shader ? shader->get() : nullptr; mRenderer->getDeviceContext()->CSSetShader(appliedShader, nullptr, 0); mAppliedShaders[gl::ShaderType::Compute] = serial; invalidateShaders(); } } void StateManager11::setVertexConstantBuffer(unsigned int slot, const d3d11::Buffer *buffer) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); auto ¤tSerial = mCurrentConstantBufferVS[slot]; mCurrentConstantBufferVSOffset[slot] = 0; mCurrentConstantBufferVSSize[slot] = 0; if (buffer) { if (currentSerial != buffer->getSerial()) { deviceContext->VSSetConstantBuffers(slot, 1, buffer->getPointer()); currentSerial = buffer->getSerial(); invalidateConstantBuffer(slot); } } else { if (!currentSerial.empty()) { ID3D11Buffer *nullBuffer = nullptr; deviceContext->VSSetConstantBuffers(slot, 1, &nullBuffer); currentSerial.clear(); invalidateConstantBuffer(slot); } } } void StateManager11::setPixelConstantBuffer(unsigned int slot, const d3d11::Buffer *buffer) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); auto ¤tSerial = mCurrentConstantBufferPS[slot]; mCurrentConstantBufferPSOffset[slot] = 0; mCurrentConstantBufferPSSize[slot] = 0; if (buffer) { if (currentSerial != buffer->getSerial()) { deviceContext->PSSetConstantBuffers(slot, 1, buffer->getPointer()); currentSerial = buffer->getSerial(); invalidateConstantBuffer(slot); } } else { if (!currentSerial.empty()) { ID3D11Buffer *nullBuffer = nullptr; deviceContext->PSSetConstantBuffers(slot, 1, &nullBuffer); currentSerial.clear(); invalidateConstantBuffer(slot); } } } void StateManager11::setDepthStencilState(const d3d11::DepthStencilState *depthStencilState, UINT stencilRef) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); if (depthStencilState) { deviceContext->OMSetDepthStencilState(depthStencilState->get(), stencilRef); } else { deviceContext->OMSetDepthStencilState(nullptr, stencilRef); } mInternalDirtyBits.set(DIRTY_BIT_DEPTH_STENCIL_STATE); } void StateManager11::setSimpleBlendState(const d3d11::BlendState *blendState) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); if (blendState) { deviceContext->OMSetBlendState(blendState->get(), nullptr, 0xFFFFFFFF); } else { deviceContext->OMSetBlendState(nullptr, nullptr, 0xFFFFFFFF); } mInternalDirtyBits.set(DIRTY_BIT_BLEND_STATE); } void StateManager11::setRasterizerState(const d3d11::RasterizerState *rasterizerState) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); if (rasterizerState) { deviceContext->RSSetState(rasterizerState->get()); } else { deviceContext->RSSetState(nullptr); } mInternalDirtyBits.set(DIRTY_BIT_RASTERIZER_STATE); } void StateManager11::setSimpleViewport(const gl::Extents &extents) { setSimpleViewport(extents.width, extents.height); } void StateManager11::setSimpleViewport(int width, int height) { D3D11_VIEWPORT viewport; viewport.TopLeftX = 0; viewport.TopLeftY = 0; viewport.Width = static_cast(width); viewport.Height = static_cast(height); viewport.MinDepth = 0.0f; viewport.MaxDepth = 1.0f; mRenderer->getDeviceContext()->RSSetViewports(1, &viewport); mInternalDirtyBits.set(DIRTY_BIT_VIEWPORT_STATE); } void StateManager11::setSimplePixelTextureAndSampler(const d3d11::SharedSRV &srv, const d3d11::SamplerState &samplerState) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); setShaderResourceInternal(gl::ShaderType::Fragment, 0, &srv); deviceContext->PSSetSamplers(0, 1, samplerState.getPointer()); mInternalDirtyBits.set(DIRTY_BIT_TEXTURE_AND_SAMPLER_STATE); mForceSetShaderSamplerStates[gl::ShaderType::Fragment][0] = true; } void StateManager11::setSimpleScissorRect(const gl::Rectangle &glRect) { D3D11_RECT scissorRect; scissorRect.left = glRect.x; scissorRect.right = glRect.x + glRect.width; scissorRect.top = glRect.y; scissorRect.bottom = glRect.y + glRect.height; setScissorRectD3D(scissorRect); } void StateManager11::setScissorRectD3D(const D3D11_RECT &d3dRect) { mRenderer->getDeviceContext()->RSSetScissorRects(1, &d3dRect); mInternalDirtyBits.set(DIRTY_BIT_SCISSOR_STATE); } angle::Result StateManager11::syncTextures(const gl::Context *context) { ANGLE_TRY(applyTexturesForSRVs(context, gl::ShaderType::Vertex)); ANGLE_TRY(applyTexturesForSRVs(context, gl::ShaderType::Fragment)); if (mProgramD3D->hasShaderStage(gl::ShaderType::Geometry)) { ANGLE_TRY(applyTexturesForSRVs(context, gl::ShaderType::Geometry)); } return angle::Result::Continue; } angle::Result StateManager11::setSamplerState(const gl::Context *context, gl::ShaderType type, int index, gl::Texture *texture, const gl::SamplerState &samplerState) { #if !defined(NDEBUG) // Storage should exist, texture should be complete. Only verified in Debug. TextureD3D *textureD3D = GetImplAs(texture); TextureStorage *storage = nullptr; ANGLE_TRY(textureD3D->getNativeTexture(context, &storage)); ASSERT(storage); #endif // !defined(NDEBUG) auto *deviceContext = mRenderer->getDeviceContext(); ASSERT(index < mRenderer->getNativeCaps().maxShaderTextureImageUnits[type]); if (mForceSetShaderSamplerStates[type][index] || memcmp(&samplerState, &mCurShaderSamplerStates[type][index], sizeof(gl::SamplerState)) != 0) { ID3D11SamplerState *dxSamplerState = nullptr; ANGLE_TRY(mRenderer->getSamplerState(context, samplerState, &dxSamplerState)); ASSERT(dxSamplerState != nullptr); switch (type) { case gl::ShaderType::Vertex: deviceContext->VSSetSamplers(index, 1, &dxSamplerState); break; case gl::ShaderType::Fragment: deviceContext->PSSetSamplers(index, 1, &dxSamplerState); break; case gl::ShaderType::Compute: deviceContext->CSSetSamplers(index, 1, &dxSamplerState); break; case gl::ShaderType::Geometry: deviceContext->GSSetSamplers(index, 1, &dxSamplerState); break; default: UNREACHABLE(); break; } mCurShaderSamplerStates[type][index] = samplerState; } mForceSetShaderSamplerStates[type][index] = false; // Sampler metadata that's passed to shaders in uniforms is stored separately from rest of the // sampler state since having it in contiguous memory makes it possible to memcpy to a constant // buffer, and it doesn't affect the state set by // PSSetSamplers/VSSetSamplers/CSSetSamplers/GSSetSamplers. mShaderConstants.onSamplerChange(type, index, *texture, samplerState); return angle::Result::Continue; } angle::Result StateManager11::setTextureForSampler(const gl::Context *context, gl::ShaderType type, int index, gl::Texture *texture, const gl::SamplerState &sampler) { const d3d11::SharedSRV *textureSRV = nullptr; if (texture) { TextureD3D *textureImpl = GetImplAs(texture); TextureStorage *texStorage = nullptr; ANGLE_TRY(textureImpl->getNativeTexture(context, &texStorage)); // Texture should be complete and have a storage ASSERT(texStorage); TextureStorage11 *storage11 = GetAs(texStorage); ANGLE_TRY( storage11->getSRVForSampler(context, texture->getTextureState(), sampler, &textureSRV)); // If we get an invalid SRV here, something went wrong in the texture class and we're // unexpectedly missing the shader resource view. ASSERT(textureSRV->valid()); textureImpl->resetDirty(); } ASSERT( (type == gl::ShaderType::Fragment && index < mRenderer->getNativeCaps().maxShaderTextureImageUnits[gl::ShaderType::Fragment]) || (type == gl::ShaderType::Vertex && index < mRenderer->getNativeCaps().maxShaderTextureImageUnits[gl::ShaderType::Vertex]) || (type == gl::ShaderType::Compute && index < mRenderer->getNativeCaps().maxShaderTextureImageUnits[gl::ShaderType::Compute])); setShaderResourceInternal(type, index, textureSRV); return angle::Result::Continue; } angle::Result StateManager11::setImageState(const gl::Context *context, gl::ShaderType type, int index, const gl::ImageUnit &imageUnit) { ASSERT(index < mRenderer->getNativeCaps().maxShaderImageUniforms[type]); mShaderConstants.onImageChange(type, index, imageUnit); return angle::Result::Continue; } // For each Direct3D sampler of either the pixel or vertex stage, // looks up the corresponding OpenGL texture image unit and texture type, // and sets the texture and its addressing/filtering state (or NULL when inactive). // Sampler mapping needs to be up-to-date on the program object before this is called. angle::Result StateManager11::applyTexturesForSRVs(const gl::Context *context, gl::ShaderType shaderType) { const auto &glState = context->getState(); const auto &caps = context->getCaps(); ASSERT(!mProgramD3D->isSamplerMappingDirty()); // TODO(jmadill): Use the Program's sampler bindings. const gl::ActiveTexturesCache &completeTextures = glState.getActiveTexturesCache(); const gl::RangeUI samplerRange = mProgramD3D->getUsedSamplerRange(shaderType); for (unsigned int samplerIndex = samplerRange.low(); samplerIndex < samplerRange.high(); samplerIndex++) { GLint textureUnit = mProgramD3D->getSamplerMapping(shaderType, samplerIndex, caps); ASSERT(textureUnit != -1); gl::Texture *texture = completeTextures[textureUnit]; // A nullptr texture indicates incomplete. if (texture) { gl::Sampler *samplerObject = glState.getSampler(textureUnit); const gl::SamplerState &samplerState = samplerObject ? samplerObject->getSamplerState() : texture->getSamplerState(); ANGLE_TRY(setSamplerState(context, shaderType, samplerIndex, texture, samplerState)); ANGLE_TRY( setTextureForSampler(context, shaderType, samplerIndex, texture, samplerState)); } else { gl::TextureType textureType = mProgramD3D->getSamplerTextureType(shaderType, samplerIndex); // Texture is not sampler complete or it is in use by the framebuffer. Bind the // incomplete texture. gl::Texture *incompleteTexture = nullptr; ANGLE_TRY(mRenderer->getIncompleteTexture(context, textureType, &incompleteTexture)); ANGLE_TRY(setSamplerState(context, shaderType, samplerIndex, incompleteTexture, incompleteTexture->getSamplerState())); ANGLE_TRY(setTextureForSampler(context, shaderType, samplerIndex, incompleteTexture, incompleteTexture->getSamplerState())); } } const gl::RangeUI readonlyImageRange = mProgramD3D->getUsedImageRange(shaderType, true); for (unsigned int readonlyImageIndex = readonlyImageRange.low(); readonlyImageIndex < readonlyImageRange.high(); readonlyImageIndex++) { GLint imageUnitIndex = mProgramD3D->getImageMapping(shaderType, readonlyImageIndex, true, caps); ASSERT(imageUnitIndex != -1); const gl::ImageUnit &imageUnit = glState.getImageUnit(imageUnitIndex); if (!imageUnit.layered) { ANGLE_TRY(setImageState(context, gl::ShaderType::Compute, readonlyImageIndex - readonlyImageRange.low(), imageUnit)); invalidateProgramUniforms(); } ANGLE_TRY(setTextureForImage(context, shaderType, readonlyImageIndex, true, imageUnit)); } return angle::Result::Continue; } angle::Result StateManager11::applyTexturesForUAVs(const gl::Context *context, gl::ShaderType shaderType) { ASSERT(shaderType == gl::ShaderType::Compute); const auto &glState = context->getState(); const auto &caps = context->getCaps(); const gl::RangeUI imageRange = mProgramD3D->getUsedImageRange(shaderType, false); for (unsigned int imageIndex = imageRange.low(); imageIndex < imageRange.high(); imageIndex++) { GLint imageUnitIndex = mProgramD3D->getImageMapping(shaderType, imageIndex, false, caps); ASSERT(imageUnitIndex != -1); const gl::ImageUnit &imageUnit = glState.getImageUnit(imageUnitIndex); if (!imageUnit.layered) { ANGLE_TRY(setImageState(context, gl::ShaderType::Compute, imageIndex - imageRange.low(), imageUnit)); invalidateProgramUniforms(); } ANGLE_TRY(setTextureForImage(context, shaderType, imageIndex, false, imageUnit)); } return angle::Result::Continue; } angle::Result StateManager11::syncTexturesForCompute(const gl::Context *context) { ANGLE_TRY(applyTexturesForUAVs(context, gl::ShaderType::Compute)); ANGLE_TRY(applyTexturesForSRVs(context, gl::ShaderType::Compute)); return angle::Result::Continue; } angle::Result StateManager11::setTextureForImage(const gl::Context *context, gl::ShaderType type, int index, bool readonly, const gl::ImageUnit &imageUnit) { TextureD3D *textureImpl = nullptr; if (!imageUnit.texture.get()) { // The texture is used in shader. However, there is no resource binding to it. We // should clear the corresponding UAV/SRV in case the previous view type is a buffer not a // texture. Otherwise, below error will be reported. The Unordered Access View dimension // declared in the shader code (TEXTURE2D) does not match the view type bound to slot 0 // of the Compute Shader unit (BUFFER). if (readonly) { setShaderResourceInternal(type, static_cast(index), nullptr); } else { setUnorderedAccessViewInternal( type, static_cast(index), nullptr); } return angle::Result::Continue; } textureImpl = GetImplAs(imageUnit.texture.get()); TextureStorage *texStorage = nullptr; ANGLE_TRY(textureImpl->getNativeTexture(context, &texStorage)); // Texture should be complete and have a storage ASSERT(texStorage); TextureStorage11 *storage11 = GetAs(texStorage); if (readonly) { const d3d11::SharedSRV *textureSRV = nullptr; ANGLE_TRY(storage11->getSRVForImage(context, imageUnit, &textureSRV)); // If we get an invalid SRV here, something went wrong in the texture class and we're // unexpectedly missing the shader resource view. ASSERT(textureSRV->valid()); ASSERT((index < mRenderer->getNativeCaps().maxImageUnits)); setShaderResourceInternal(type, index, textureSRV); } else { const d3d11::SharedUAV *textureUAV = nullptr; ANGLE_TRY(storage11->getUAVForImage(context, imageUnit, &textureUAV)); // If we get an invalid UAV here, something went wrong in the texture class and we're // unexpectedly missing the unordered access view. ASSERT(textureUAV->valid()); ASSERT((index < mRenderer->getNativeCaps().maxImageUnits)); setUnorderedAccessViewInternal(type, index, textureUAV); } textureImpl->resetDirty(); return angle::Result::Continue; } // Things that affect a program's dirtyness: // 1. Directly changing the program executable -> triggered in StateManager11::syncState. // 2. The vertex attribute layout -> triggered in VertexArray11::syncState/signal. // 3. The fragment shader's rendertargets -> triggered in Framebuffer11::syncState/signal. // 4. Enabling/disabling rasterizer discard. -> triggered in StateManager11::syncState. // 5. Enabling/disabling transform feedback. -> checked in StateManager11::updateState. // 6. An internal shader was used. -> triggered in StateManager11::set*Shader. // 7. Drawing with/without point sprites. -> checked in StateManager11::updateState. // TODO(jmadill): Use dirty bits for transform feedback. angle::Result StateManager11::syncProgram(const gl::Context *context, gl::PrimitiveMode drawMode) { Context11 *context11 = GetImplAs(context); ANGLE_TRY(context11->triggerDrawCallProgramRecompilation(context, drawMode)); const auto &glState = context->getState(); mProgramD3D->updateCachedInputLayout(mVertexArray11->getCurrentStateSerial(), glState); // Binaries must be compiled before the sync. ASSERT(mProgramD3D->hasVertexExecutableForCachedInputLayout()); ASSERT(mProgramD3D->hasGeometryExecutableForPrimitiveType(glState, drawMode)); ASSERT(mProgramD3D->hasPixelExecutableForCachedOutputLayout()); ShaderExecutableD3D *vertexExe = nullptr; ANGLE_TRY(mProgramD3D->getVertexExecutableForCachedInputLayout(context11, &vertexExe, nullptr)); ShaderExecutableD3D *pixelExe = nullptr; ANGLE_TRY(mProgramD3D->getPixelExecutableForCachedOutputLayout(context11, &pixelExe, nullptr)); ShaderExecutableD3D *geometryExe = nullptr; ANGLE_TRY(mProgramD3D->getGeometryExecutableForPrimitiveType(context11, glState, drawMode, &geometryExe, nullptr)); const d3d11::VertexShader *vertexShader = (vertexExe ? &GetAs(vertexExe)->getVertexShader() : nullptr); // Skip pixel shader if we're doing rasterizer discard. const d3d11::PixelShader *pixelShader = nullptr; if (!glState.getRasterizerState().rasterizerDiscard) { pixelShader = (pixelExe ? &GetAs(pixelExe)->getPixelShader() : nullptr); } const d3d11::GeometryShader *geometryShader = nullptr; if (glState.isTransformFeedbackActiveUnpaused()) { geometryShader = (vertexExe ? &GetAs(vertexExe)->getStreamOutShader() : nullptr); } else { geometryShader = (geometryExe ? &GetAs(geometryExe)->getGeometryShader() : nullptr); } setDrawShaders(vertexShader, geometryShader, pixelShader); // Explicitly clear the shaders dirty bit. mInternalDirtyBits.reset(DIRTY_BIT_SHADERS); return angle::Result::Continue; } angle::Result StateManager11::syncProgramForCompute(const gl::Context *context) { Context11 *context11 = GetImplAs(context); ANGLE_TRY(context11->triggerDispatchCallProgramRecompilation(context)); mProgramD3D->updateCachedComputeImage2DBindLayout(context); // Binaries must be compiled before the sync. ASSERT(mProgramD3D->hasComputeExecutableForCachedImage2DBindLayout()); ShaderExecutableD3D *computeExe = nullptr; ANGLE_TRY( mProgramD3D->getComputeExecutableForImage2DBindLayout(context11, &computeExe, nullptr)); const d3d11::ComputeShader *computeShader = (computeExe ? &GetAs(computeExe)->getComputeShader() : nullptr); setComputeShader(computeShader); // Explicitly clear the shaders dirty bit. mInternalDirtyBits.reset(DIRTY_BIT_SHADERS); return angle::Result::Continue; } angle::Result StateManager11::syncVertexBuffersAndInputLayout( const gl::Context *context, gl::PrimitiveMode mode, GLint firstVertex, GLsizei vertexOrIndexCount, gl::DrawElementsType indexTypeOrInvalid, GLsizei instanceCount) { const auto &vertexArrayAttribs = mVertexArray11->getTranslatedAttribs(); // Sort the attributes according to ensure we re-use similar input layouts. AttribIndexArray sortedSemanticIndices; SortAttributesByLayout(*mProgramD3D, vertexArrayAttribs, mCurrentValueAttribs, &sortedSemanticIndices, &mCurrentAttributes); D3D_FEATURE_LEVEL featureLevel = mRenderer->getRenderer11DeviceCaps().featureLevel; // If we are using FL 9_3, make sure the first attribute is not instanced if (featureLevel <= D3D_FEATURE_LEVEL_9_3 && !mCurrentAttributes.empty()) { if (mCurrentAttributes[0]->divisor > 0) { Optional firstNonInstancedIndex = FindFirstNonInstanced(mCurrentAttributes); if (firstNonInstancedIndex.valid()) { size_t index = firstNonInstancedIndex.value(); std::swap(mCurrentAttributes[0], mCurrentAttributes[index]); std::swap(sortedSemanticIndices[0], sortedSemanticIndices[index]); } } } // Update the applied input layout by querying the cache. const gl::State &state = context->getState(); const d3d11::InputLayout *inputLayout = nullptr; ANGLE_TRY(mInputLayoutCache.getInputLayout(GetImplAs(context), state, mCurrentAttributes, sortedSemanticIndices, mode, vertexOrIndexCount, instanceCount, &inputLayout)); setInputLayoutInternal(inputLayout); // Update the applied vertex buffers. ANGLE_TRY(applyVertexBuffers(context, mode, indexTypeOrInvalid, firstVertex)); return angle::Result::Continue; } angle::Result StateManager11::applyVertexBuffers(const gl::Context *context, gl::PrimitiveMode mode, gl::DrawElementsType indexTypeOrInvalid, GLint firstVertex) { bool programUsesInstancedPointSprites = mProgramD3D->usesPointSize() && mProgramD3D->usesInstancedPointSpriteEmulation(); bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == gl::PrimitiveMode::Points); // Note that if we use instance emulation, we reserve the first buffer slot. size_t reservedBuffers = GetReservedBufferCount(programUsesInstancedPointSprites); for (size_t attribIndex = 0; attribIndex < (gl::MAX_VERTEX_ATTRIBS - reservedBuffers); ++attribIndex) { ID3D11Buffer *buffer = nullptr; UINT vertexStride = 0; UINT vertexOffset = 0; if (attribIndex < mCurrentAttributes.size()) { const TranslatedAttribute &attrib = *mCurrentAttributes[attribIndex]; Buffer11 *bufferStorage = attrib.storage ? GetAs(attrib.storage) : nullptr; // If indexed pointsprite emulation is active, then we need to take a less efficent code // path. Emulated indexed pointsprite rendering requires that the vertex buffers match // exactly to the indices passed by the caller. This could expand or shrink the vertex // buffer depending on the number of points indicated by the index list or how many // duplicates are found on the index list. if (bufferStorage == nullptr) { ASSERT(attrib.vertexBuffer.get()); buffer = GetAs(attrib.vertexBuffer.get())->getBuffer().get(); } else if (instancedPointSpritesActive && indexTypeOrInvalid != gl::DrawElementsType::InvalidEnum) { ASSERT(mVertexArray11->isCachedIndexInfoValid()); TranslatedIndexData indexInfo = mVertexArray11->getCachedIndexInfo(); if (indexInfo.srcIndexData.srcBuffer != nullptr) { const uint8_t *bufferData = nullptr; ANGLE_TRY(indexInfo.srcIndexData.srcBuffer->getData(context, &bufferData)); ASSERT(bufferData != nullptr); ptrdiff_t offset = reinterpret_cast(indexInfo.srcIndexData.srcIndices); indexInfo.srcIndexData.srcBuffer = nullptr; indexInfo.srcIndexData.srcIndices = bufferData + offset; } ANGLE_TRY(bufferStorage->getEmulatedIndexedBuffer(context, &indexInfo.srcIndexData, attrib, firstVertex, &buffer)); mVertexArray11->updateCachedIndexInfo(indexInfo); } else { ANGLE_TRY(bufferStorage->getBuffer( context, BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK, &buffer)); } vertexStride = attrib.stride; ANGLE_TRY(attrib.computeOffset(context, firstVertex, &vertexOffset)); } size_t bufferIndex = reservedBuffers + attribIndex; queueVertexBufferChange(bufferIndex, buffer, vertexStride, vertexOffset); } Context11 *context11 = GetImplAs(context); // Instanced PointSprite emulation requires two additional ID3D11Buffers. A vertex buffer needs // to be created and added to the list of current buffers, strides and offsets collections. // This buffer contains the vertices for a single PointSprite quad. // An index buffer also needs to be created and applied because rendering instanced data on // D3D11 FL9_3 requires DrawIndexedInstanced() to be used. Shaders that contain gl_PointSize and // used without the GL_POINTS rendering mode require a vertex buffer because some drivers cannot // handle missing vertex data and will TDR the system. if (programUsesInstancedPointSprites) { constexpr UINT kPointSpriteVertexStride = sizeof(float) * 5; if (!mPointSpriteVertexBuffer.valid()) { static constexpr float kPointSpriteVertices[] = { // Position | TexCoord -1.0f, -1.0f, 0.0f, 0.0f, 1.0f, /* v0 */ -1.0f, 1.0f, 0.0f, 0.0f, 0.0f, /* v1 */ 1.0f, 1.0f, 0.0f, 1.0f, 0.0f, /* v2 */ 1.0f, -1.0f, 0.0f, 1.0f, 1.0f, /* v3 */ -1.0f, -1.0f, 0.0f, 0.0f, 1.0f, /* v4 */ 1.0f, 1.0f, 0.0f, 1.0f, 0.0f, /* v5 */ }; D3D11_SUBRESOURCE_DATA vertexBufferData = {kPointSpriteVertices, 0, 0}; D3D11_BUFFER_DESC vertexBufferDesc; vertexBufferDesc.ByteWidth = sizeof(kPointSpriteVertices); vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER; vertexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE; vertexBufferDesc.CPUAccessFlags = 0; vertexBufferDesc.MiscFlags = 0; vertexBufferDesc.StructureByteStride = 0; ANGLE_TRY(mRenderer->allocateResource(context11, vertexBufferDesc, &vertexBufferData, &mPointSpriteVertexBuffer)); } // Set the stride to 0 if GL_POINTS mode is not being used to instruct the driver to avoid // indexing into the vertex buffer. UINT stride = instancedPointSpritesActive ? kPointSpriteVertexStride : 0; queueVertexBufferChange(0, mPointSpriteVertexBuffer.get(), stride, 0); if (!mPointSpriteIndexBuffer.valid()) { // Create an index buffer and set it for pointsprite rendering static constexpr unsigned short kPointSpriteIndices[] = { 0, 1, 2, 3, 4, 5, }; D3D11_SUBRESOURCE_DATA indexBufferData = {kPointSpriteIndices, 0, 0}; D3D11_BUFFER_DESC indexBufferDesc; indexBufferDesc.ByteWidth = sizeof(kPointSpriteIndices); indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER; indexBufferDesc.Usage = D3D11_USAGE_IMMUTABLE; indexBufferDesc.CPUAccessFlags = 0; indexBufferDesc.MiscFlags = 0; indexBufferDesc.StructureByteStride = 0; ANGLE_TRY(mRenderer->allocateResource(context11, indexBufferDesc, &indexBufferData, &mPointSpriteIndexBuffer)); } if (instancedPointSpritesActive) { // The index buffer is applied here because Instanced PointSprite emulation uses the a // non-indexed rendering path in ANGLE (DrawArrays). This means that applyIndexBuffer() // on the renderer will not be called and setting this buffer here ensures that the // rendering path will contain the correct index buffers. syncIndexBuffer(mPointSpriteIndexBuffer.get(), DXGI_FORMAT_R16_UINT, 0); } } applyVertexBufferChanges(); return angle::Result::Continue; } angle::Result StateManager11::applyIndexBuffer(const gl::Context *context, GLsizei indexCount, gl::DrawElementsType indexType, const void *indices) { if (!mIndexBufferIsDirty) { // No streaming or index buffer application necessary. return angle::Result::Continue; } gl::DrawElementsType destElementType = mVertexArray11->getCachedDestinationIndexType(); gl::Buffer *elementArrayBuffer = mVertexArray11->getState().getElementArrayBuffer(); TranslatedIndexData indexInfo; ANGLE_TRY(mIndexDataManager.prepareIndexData(context, indexType, destElementType, indexCount, elementArrayBuffer, indices, &indexInfo)); ID3D11Buffer *buffer = nullptr; DXGI_FORMAT bufferFormat = (indexInfo.indexType == gl::DrawElementsType::UnsignedInt) ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT; if (indexInfo.storage) { Buffer11 *storage = GetAs(indexInfo.storage); ANGLE_TRY(storage->getBuffer(context, BUFFER_USAGE_INDEX, &buffer)); } else { IndexBuffer11 *indexBuffer = GetAs(indexInfo.indexBuffer); buffer = indexBuffer->getBuffer().get(); } // Track dirty indices in the index range cache. indexInfo.srcIndexData.srcIndicesChanged = syncIndexBuffer(buffer, bufferFormat, indexInfo.startOffset); mIndexBufferIsDirty = false; mVertexArray11->updateCachedIndexInfo(indexInfo); return angle::Result::Continue; } void StateManager11::setIndexBuffer(ID3D11Buffer *buffer, DXGI_FORMAT indexFormat, unsigned int offset) { if (syncIndexBuffer(buffer, indexFormat, offset)) { invalidateIndexBuffer(); } } bool StateManager11::syncIndexBuffer(ID3D11Buffer *buffer, DXGI_FORMAT indexFormat, unsigned int offset) { if (buffer != mAppliedIB || indexFormat != mAppliedIBFormat || offset != mAppliedIBOffset) { mRenderer->getDeviceContext()->IASetIndexBuffer(buffer, indexFormat, offset); mAppliedIB = buffer; mAppliedIBFormat = indexFormat; mAppliedIBOffset = offset; return true; } return false; } // Vertex buffer is invalidated outside this function. angle::Result StateManager11::updateVertexOffsetsForPointSpritesEmulation( const gl::Context *context, GLint startVertex, GLsizei emulatedInstanceId) { size_t reservedBuffers = GetReservedBufferCount(true); for (size_t attribIndex = 0; attribIndex < mCurrentAttributes.size(); ++attribIndex) { const auto &attrib = *mCurrentAttributes[attribIndex]; size_t bufferIndex = reservedBuffers + attribIndex; if (attrib.divisor > 0) { unsigned int offset = 0; ANGLE_TRY(attrib.computeOffset(context, startVertex, &offset)); offset += (attrib.stride * (emulatedInstanceId / attrib.divisor)); if (offset != mCurrentVertexOffsets[bufferIndex]) { invalidateInputLayout(); mDirtyVertexBufferRange.extend(static_cast(bufferIndex)); mCurrentVertexOffsets[bufferIndex] = offset; } } } applyVertexBufferChanges(); return angle::Result::Continue; } angle::Result StateManager11::generateSwizzle(const gl::Context *context, gl::Texture *texture) { if (!texture) { return angle::Result::Continue; } TextureD3D *textureD3D = GetImplAs(texture); ASSERT(textureD3D); TextureStorage *texStorage = nullptr; ANGLE_TRY(textureD3D->getNativeTexture(context, &texStorage)); if (texStorage) { TextureStorage11 *storage11 = GetAs(texStorage); const gl::TextureState &textureState = texture->getTextureState(); ANGLE_TRY(storage11->generateSwizzles(context, textureState.getSwizzleState())); } return angle::Result::Continue; } angle::Result StateManager11::generateSwizzlesForShader(const gl::Context *context, gl::ShaderType type) { const gl::State &glState = context->getState(); const gl::RangeUI samplerRange = mProgramD3D->getUsedSamplerRange(type); for (unsigned int i = samplerRange.low(); i < samplerRange.high(); i++) { gl::TextureType textureType = mProgramD3D->getSamplerTextureType(type, i); GLint textureUnit = mProgramD3D->getSamplerMapping(type, i, context->getCaps()); if (textureUnit != -1) { gl::Texture *texture = glState.getSamplerTexture(textureUnit, textureType); ASSERT(texture); if (texture->getTextureState().swizzleRequired()) { ANGLE_TRY(generateSwizzle(context, texture)); } } } return angle::Result::Continue; } angle::Result StateManager11::generateSwizzles(const gl::Context *context) { ANGLE_TRY(generateSwizzlesForShader(context, gl::ShaderType::Vertex)); ANGLE_TRY(generateSwizzlesForShader(context, gl::ShaderType::Fragment)); return angle::Result::Continue; } angle::Result StateManager11::applyUniformsForShader(const gl::Context *context, gl::ShaderType shaderType) { UniformStorage11 *shaderUniformStorage = GetAs(mProgramD3D->getShaderUniformStorage(shaderType)); ASSERT(shaderUniformStorage); ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); const d3d11::Buffer *shaderConstantBuffer = nullptr; ANGLE_TRY(shaderUniformStorage->getConstantBuffer(context, mRenderer, &shaderConstantBuffer)); if (shaderUniformStorage->size() > 0 && mProgramD3D->areShaderUniformsDirty(shaderType)) { UpdateUniformBuffer(deviceContext, shaderUniformStorage, shaderConstantBuffer); } unsigned int slot = d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DEFAULT_UNIFORM_BLOCK; switch (shaderType) { case gl::ShaderType::Vertex: if (mCurrentConstantBufferVS[slot] != shaderConstantBuffer->getSerial()) { deviceContext->VSSetConstantBuffers(slot, 1, shaderConstantBuffer->getPointer()); mCurrentConstantBufferVS[slot] = shaderConstantBuffer->getSerial(); mCurrentConstantBufferVSOffset[slot] = 0; mCurrentConstantBufferVSSize[slot] = 0; } break; case gl::ShaderType::Fragment: if (mCurrentConstantBufferPS[slot] != shaderConstantBuffer->getSerial()) { deviceContext->PSSetConstantBuffers(slot, 1, shaderConstantBuffer->getPointer()); mCurrentConstantBufferPS[slot] = shaderConstantBuffer->getSerial(); mCurrentConstantBufferPSOffset[slot] = 0; mCurrentConstantBufferPSSize[slot] = 0; } break; // TODO(jiawei.shao@intel.com): apply geometry shader uniforms case gl::ShaderType::Geometry: UNIMPLEMENTED(); break; default: UNREACHABLE(); break; } return angle::Result::Continue; } angle::Result StateManager11::applyUniforms(const gl::Context *context) { ANGLE_TRY(applyUniformsForShader(context, gl::ShaderType::Vertex)); ANGLE_TRY(applyUniformsForShader(context, gl::ShaderType::Fragment)); if (mProgramD3D->hasShaderStage(gl::ShaderType::Geometry)) { ANGLE_TRY(applyUniformsForShader(context, gl::ShaderType::Geometry)); } mProgramD3D->markUniformsClean(); return angle::Result::Continue; } angle::Result StateManager11::applyDriverUniformsForShader(const gl::Context *context, gl::ShaderType shaderType) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); d3d11::Buffer &shaderDriverConstantBuffer = mShaderDriverConstantBuffers[shaderType]; if (!shaderDriverConstantBuffer.valid()) { size_t requiredSize = mShaderConstants.getRequiredBufferSize(shaderType); D3D11_BUFFER_DESC constantBufferDescription = {}; d3d11::InitConstantBufferDesc(&constantBufferDescription, requiredSize); ANGLE_TRY(mRenderer->allocateResource( GetImplAs(context), constantBufferDescription, &shaderDriverConstantBuffer)); ID3D11Buffer *driverConstants = shaderDriverConstantBuffer.get(); switch (shaderType) { case gl::ShaderType::Vertex: deviceContext->VSSetConstantBuffers(d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DRIVER, 1, &driverConstants); break; case gl::ShaderType::Fragment: deviceContext->PSSetConstantBuffers(d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DRIVER, 1, &driverConstants); break; case gl::ShaderType::Geometry: deviceContext->GSSetConstantBuffers(d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DRIVER, 1, &driverConstants); break; default: UNREACHABLE(); return angle::Result::Continue; } } // Sampler metadata and driver constants need to coexist in the same constant buffer to // conserve constant buffer slots. We update both in the constant buffer if needed. ANGLE_TRY(mShaderConstants.updateBuffer(context, mRenderer, shaderType, *mProgramD3D, shaderDriverConstantBuffer)); return angle::Result::Continue; } angle::Result StateManager11::applyDriverUniforms(const gl::Context *context) { ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); ANGLE_TRY(applyDriverUniformsForShader(context, gl::ShaderType::Vertex)); ANGLE_TRY(applyDriverUniformsForShader(context, gl::ShaderType::Fragment)); if (mProgramD3D->hasShaderStage(gl::ShaderType::Geometry)) { ANGLE_TRY(applyDriverUniformsForShader(context, gl::ShaderType::Geometry)); } // needed for the point sprite geometry shader // GSSetConstantBuffers triggers device removal on 9_3, so we should only call it for ES3. if (mRenderer->isES3Capable()) { d3d11::Buffer &driverConstantBufferPS = mShaderDriverConstantBuffers[gl::ShaderType::Fragment]; if (mCurrentGeometryConstantBuffer != driverConstantBufferPS.getSerial()) { ASSERT(driverConstantBufferPS.valid()); deviceContext->GSSetConstantBuffers(0, 1, driverConstantBufferPS.getPointer()); mCurrentGeometryConstantBuffer = driverConstantBufferPS.getSerial(); } } return angle::Result::Continue; } angle::Result StateManager11::applyComputeUniforms(const gl::Context *context, ProgramD3D *programD3D) { UniformStorage11 *computeUniformStorage = GetAs(programD3D->getShaderUniformStorage(gl::ShaderType::Compute)); ASSERT(computeUniformStorage); const d3d11::Buffer *constantBuffer = nullptr; ANGLE_TRY(computeUniformStorage->getConstantBuffer(context, mRenderer, &constantBuffer)); ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); if (computeUniformStorage->size() > 0 && programD3D->areShaderUniformsDirty(gl::ShaderType::Compute)) { UpdateUniformBuffer(deviceContext, computeUniformStorage, constantBuffer); programD3D->markUniformsClean(); } if (mCurrentComputeConstantBuffer != constantBuffer->getSerial()) { deviceContext->CSSetConstantBuffers( d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DEFAULT_UNIFORM_BLOCK, 1, constantBuffer->getPointer()); mCurrentComputeConstantBuffer = constantBuffer->getSerial(); } if (!mShaderDriverConstantBuffers[gl::ShaderType::Compute].valid()) { size_t requiredSize = mShaderConstants.getRequiredBufferSize(gl::ShaderType::Compute); D3D11_BUFFER_DESC constantBufferDescription = {}; d3d11::InitConstantBufferDesc(&constantBufferDescription, requiredSize); ANGLE_TRY( mRenderer->allocateResource(GetImplAs(context), constantBufferDescription, &mShaderDriverConstantBuffers[gl::ShaderType::Compute])); ID3D11Buffer *buffer = mShaderDriverConstantBuffers[gl::ShaderType::Compute].get(); deviceContext->CSSetConstantBuffers(d3d11::RESERVED_CONSTANT_BUFFER_SLOT_DRIVER, 1, &buffer); } ANGLE_TRY(mShaderConstants.updateBuffer(context, mRenderer, gl::ShaderType::Compute, *programD3D, mShaderDriverConstantBuffers[gl::ShaderType::Compute])); return angle::Result::Continue; } angle::Result StateManager11::syncUniformBuffersForShader(const gl::Context *context, gl::ShaderType shaderType) { const auto &glState = context->getState(); ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); ID3D11DeviceContext1 *deviceContext1 = mRenderer->getDeviceContext1IfSupported(); const auto &shaderUniformBuffers = mProgramD3D->getShaderUniformBufferCache(shaderType); for (size_t bufferIndex = 0; bufferIndex < shaderUniformBuffers.size(); ++bufferIndex) { const D3DUBOCache cache = shaderUniformBuffers[bufferIndex]; if (cache.binding == -1) { continue; } const auto &uniformBuffer = glState.getIndexedUniformBuffer(cache.binding); const GLintptr uniformBufferOffset = uniformBuffer.getOffset(); const GLsizeiptr uniformBufferSize = uniformBuffer.getSize(); if (uniformBuffer.get() == nullptr) { continue; } Buffer11 *bufferStorage = GetImplAs(uniformBuffer.get()); const d3d11::Buffer *constantBuffer = nullptr; UINT firstConstant = 0; UINT numConstants = 0; ANGLE_TRY(bufferStorage->getConstantBufferRange(context, uniformBufferOffset, uniformBufferSize, &constantBuffer, &firstConstant, &numConstants)); ASSERT(constantBuffer); switch (shaderType) { case gl::ShaderType::Vertex: { if (mCurrentConstantBufferVS[bufferIndex] == constantBuffer->getSerial() && mCurrentConstantBufferVSOffset[bufferIndex] == uniformBufferOffset && mCurrentConstantBufferVSSize[bufferIndex] == uniformBufferSize) { continue; } if (firstConstant != 0 && uniformBufferSize != 0) { ASSERT(numConstants != 0); deviceContext1->VSSetConstantBuffers1(cache.registerIndex, 1, constantBuffer->getPointer(), &firstConstant, &numConstants); } else { deviceContext->VSSetConstantBuffers(cache.registerIndex, 1, constantBuffer->getPointer()); } mCurrentConstantBufferVS[cache.registerIndex] = constantBuffer->getSerial(); mCurrentConstantBufferVSOffset[cache.registerIndex] = uniformBufferOffset; mCurrentConstantBufferVSSize[cache.registerIndex] = uniformBufferSize; break; } case gl::ShaderType::Fragment: { if (mCurrentConstantBufferPS[bufferIndex] == constantBuffer->getSerial() && mCurrentConstantBufferPSOffset[bufferIndex] == uniformBufferOffset && mCurrentConstantBufferPSSize[bufferIndex] == uniformBufferSize) { continue; } if (firstConstant != 0 && uniformBufferSize != 0) { deviceContext1->PSSetConstantBuffers1(cache.registerIndex, 1, constantBuffer->getPointer(), &firstConstant, &numConstants); } else { deviceContext->PSSetConstantBuffers(cache.registerIndex, 1, constantBuffer->getPointer()); } mCurrentConstantBufferPS[cache.registerIndex] = constantBuffer->getSerial(); mCurrentConstantBufferPSOffset[cache.registerIndex] = uniformBufferOffset; mCurrentConstantBufferPSSize[cache.registerIndex] = uniformBufferSize; break; } case gl::ShaderType::Compute: { if (mCurrentConstantBufferCS[bufferIndex] == constantBuffer->getSerial() && mCurrentConstantBufferCSOffset[bufferIndex] == uniformBufferOffset && mCurrentConstantBufferCSSize[bufferIndex] == uniformBufferSize) { continue; } if (firstConstant != 0 && uniformBufferSize != 0) { deviceContext1->CSSetConstantBuffers1(cache.registerIndex, 1, constantBuffer->getPointer(), &firstConstant, &numConstants); } else { deviceContext->CSSetConstantBuffers(cache.registerIndex, 1, constantBuffer->getPointer()); } mCurrentConstantBufferCS[cache.registerIndex] = constantBuffer->getSerial(); mCurrentConstantBufferCSOffset[cache.registerIndex] = uniformBufferOffset; mCurrentConstantBufferCSSize[cache.registerIndex] = uniformBufferSize; break; } // TODO(jiawei.shao@intel.com): update geometry shader uniform buffers. case gl::ShaderType::Geometry: UNIMPLEMENTED(); break; default: UNREACHABLE(); } } const auto &shaderUniformBuffersUseSB = mProgramD3D->getShaderUniformBufferCacheUseSB(shaderType); for (size_t bufferIndex = 0; bufferIndex < shaderUniformBuffersUseSB.size(); ++bufferIndex) { const D3DUBOCacheUseSB cache = shaderUniformBuffersUseSB[bufferIndex]; if (cache.binding == -1) { continue; } const auto &uniformBuffer = glState.getIndexedUniformBuffer(cache.binding); if (uniformBuffer.get() == nullptr) { continue; } const GLintptr uniformBufferOffset = uniformBuffer.getOffset(); Buffer11 *bufferStorage = GetImplAs(uniformBuffer.get()); const d3d11::ShaderResourceView *bufferSRV = nullptr; ANGLE_TRY(bufferStorage->getStructuredBufferRangeSRV( context, static_cast(uniformBufferOffset), cache.byteWidth, cache.structureByteStride, &bufferSRV)); ASSERT(bufferSRV->valid()); setShaderResourceInternal(shaderType, cache.registerIndex, bufferSRV); } return angle::Result::Continue; } angle::Result StateManager11::syncShaderStorageBuffersForShader(const gl::Context *context, gl::ShaderType shaderType) { const gl::State &glState = context->getState(); const gl::Program *program = glState.getProgram(); angle::FixedVector previouslyBound; for (size_t blockIndex = 0; blockIndex < program->getActiveShaderStorageBlockCount(); blockIndex++) { GLuint binding = program->getShaderStorageBlockBinding(static_cast(blockIndex)); const unsigned int registerIndex = mProgramD3D->getShaderStorageBufferRegisterIndex( static_cast(blockIndex), shaderType); // It means this block is active but not statically used. if (registerIndex == GL_INVALID_INDEX) { continue; } const auto &shaderStorageBuffer = glState.getIndexedShaderStorageBuffer(binding); if (shaderStorageBuffer.get() == nullptr) { // We didn't see a driver error like atomic buffer did. But theoretically, the same // thing should be done. setUnorderedAccessViewInternal(shaderType, registerIndex, nullptr); continue; } Buffer11 *bufferStorage = GetImplAs(shaderStorageBuffer.get()); if (std::find(previouslyBound.begin(), previouslyBound.end(), bufferStorage) != previouslyBound.end()) { // D3D11 doesn't support binding a buffer multiple times // http://anglebug.com/3032 ERR() << "Writing to multiple blocks on the same buffer is not allowed."; return angle::Result::Stop; } previouslyBound.push_back(bufferStorage); d3d11::UnorderedAccessView *uavPtr = nullptr; GLsizeiptr viewSize = 0; // Bindings only have a valid size if bound using glBindBufferRange if (shaderStorageBuffer.getSize() > 0) { viewSize = shaderStorageBuffer.getSize(); } // We use the buffer size for glBindBufferBase else { viewSize = bufferStorage->getSize(); } ANGLE_TRY(bufferStorage->getRawUAVRange(context, shaderStorageBuffer.getOffset(), viewSize, &uavPtr)); switch (shaderType) { case gl::ShaderType::Compute: { setUnorderedAccessViewInternal(shaderType, registerIndex, uavPtr); break; } case gl::ShaderType::Vertex: case gl::ShaderType::Fragment: case gl::ShaderType::Geometry: UNIMPLEMENTED(); break; default: UNREACHABLE(); } } return angle::Result::Continue; } angle::Result StateManager11::syncUniformBuffers(const gl::Context *context) { mProgramD3D->updateUniformBufferCache(context->getCaps()); if (mProgramD3D->hasShaderStage(gl::ShaderType::Compute)) { ANGLE_TRY(syncUniformBuffersForShader(context, gl::ShaderType::Compute)); } else { ANGLE_TRY(syncUniformBuffersForShader(context, gl::ShaderType::Vertex)); ANGLE_TRY(syncUniformBuffersForShader(context, gl::ShaderType::Fragment)); if (mProgramD3D->hasShaderStage(gl::ShaderType::Geometry)) { ANGLE_TRY(syncUniformBuffersForShader(context, gl::ShaderType::Geometry)); } } return angle::Result::Continue; } angle::Result StateManager11::syncAtomicCounterBuffers(const gl::Context *context) { if (mProgramD3D->hasShaderStage(gl::ShaderType::Compute)) { ANGLE_TRY(syncAtomicCounterBuffersForShader(context, gl::ShaderType::Compute)); } return angle::Result::Continue; } angle::Result StateManager11::syncAtomicCounterBuffersForShader(const gl::Context *context, gl::ShaderType shaderType) { const gl::State &glState = context->getState(); const gl::Program *program = glState.getProgram(); for (const auto &atomicCounterBuffer : program->getState().getAtomicCounterBuffers()) { GLuint binding = atomicCounterBuffer.binding; const auto &buffer = glState.getIndexedAtomicCounterBuffer(binding); const unsigned int registerIndex = mProgramD3D->getAtomicCounterBufferRegisterIndex(binding, shaderType); ASSERT(registerIndex != GL_INVALID_INDEX); if (buffer.get() == nullptr) { // The atomic counter is used in shader. However, there is no buffer binding to it. We // should clear the corresponding UAV in case the previous view type is a texture not a // buffer. Otherwise, below error will be reported. The Unordered Access View dimension // declared in the shader code (BUFFER) does not match the view type bound to slot 0 // of the Compute Shader unit (TEXTURE2D). setUnorderedAccessViewInternal(shaderType, registerIndex, nullptr); continue; } Buffer11 *bufferStorage = GetImplAs(buffer.get()); // TODO(enrico.galli@intel.com): Check to make sure that we aren't binding the same buffer // multiple times, as this is unsupported by D3D11. http://anglebug.com/3141 // Bindings only have a valid size if bound using glBindBufferRange. Therefore, we use the // buffer size for glBindBufferBase GLsizeiptr viewSize = (buffer.getSize() > 0) ? buffer.getSize() : bufferStorage->getSize(); d3d11::UnorderedAccessView *uavPtr = nullptr; ANGLE_TRY(bufferStorage->getRawUAVRange(context, buffer.getOffset(), viewSize, &uavPtr)); if (shaderType == gl::ShaderType::Compute) { setUnorderedAccessViewInternal(shaderType, registerIndex, uavPtr); } else { // Atomic Shaders on non-compute shaders are currently unimplemented // http://anglebug.com/1729 UNIMPLEMENTED(); } } return angle::Result::Continue; } angle::Result StateManager11::syncShaderStorageBuffers(const gl::Context *context) { if (mProgramD3D->hasShaderStage(gl::ShaderType::Compute)) { ANGLE_TRY(syncShaderStorageBuffersForShader(context, gl::ShaderType::Compute)); } return angle::Result::Continue; } angle::Result StateManager11::syncTransformFeedbackBuffers(const gl::Context *context) { const auto &glState = context->getState(); ID3D11DeviceContext *deviceContext = mRenderer->getDeviceContext(); // If transform feedback is not active, unbind all buffers if (!glState.isTransformFeedbackActiveUnpaused()) { if (mAppliedTFSerial != mEmptySerial) { deviceContext->SOSetTargets(0, nullptr, nullptr); mAppliedTFSerial = mEmptySerial; } return angle::Result::Continue; } gl::TransformFeedback *transformFeedback = glState.getCurrentTransformFeedback(); TransformFeedback11 *tf11 = GetImplAs(transformFeedback); if (mAppliedTFSerial == tf11->getSerial() && !tf11->isDirty()) { return angle::Result::Continue; } const std::vector *soBuffers = nullptr; ANGLE_TRY(tf11->getSOBuffers(context, &soBuffers)); const std::vector &soOffsets = tf11->getSOBufferOffsets(); deviceContext->SOSetTargets(tf11->getNumSOBuffers(), soBuffers->data(), soOffsets.data()); mAppliedTFSerial = tf11->getSerial(); tf11->onApply(); return angle::Result::Continue; } void StateManager11::syncPrimitiveTopology(const gl::State &glState, gl::PrimitiveMode currentDrawMode) { D3D11_PRIMITIVE_TOPOLOGY primitiveTopology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; // Don't cull everything by default, this also resets if we were previously culling mCullEverything = false; switch (currentDrawMode) { case gl::PrimitiveMode::Points: { bool usesPointSize = mProgramD3D->usesPointSize(); // ProgramBinary assumes non-point rendering if gl_PointSize isn't written, // which affects varying interpolation. Since the value of gl_PointSize is // undefined when not written, just skip drawing to avoid unexpected results. if (!usesPointSize && !glState.isTransformFeedbackActiveUnpaused()) { // Notify developers of risking undefined behavior. WARN() << "Point rendering without writing to gl_PointSize."; mCullEverything = true; return; } // If instanced pointsprites are enabled and the shader uses gl_PointSize, the topology // must be D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST. if (usesPointSize && mRenderer->getFeatures().useInstancedPointSpriteEmulation.enabled) { primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; } else { primitiveTopology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; } break; } case gl::PrimitiveMode::Lines: primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; case gl::PrimitiveMode::LineLoop: primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; case gl::PrimitiveMode::LineStrip: primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; case gl::PrimitiveMode::Triangles: primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; mCullEverything = CullsEverything(glState); break; case gl::PrimitiveMode::TriangleStrip: primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; mCullEverything = CullsEverything(glState); break; // emulate fans via rewriting index buffer case gl::PrimitiveMode::TriangleFan: primitiveTopology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; mCullEverything = CullsEverything(glState); break; default: UNREACHABLE(); break; } setPrimitiveTopologyInternal(primitiveTopology); } } // namespace rx