IGC/LLVM3DBuilder/BuiltinsFrontendDefinitions.hpp

/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#ifndef BUILTINS_FRONTEND_DEFINITIONS_HPP
#define BUILTINS_FRONTEND_DEFINITIONS_HPP

#include "common/debug/DebugMacros.hpp" // VALUE_NAME() definition.
#include "common/LLVMWarningsPush.hpp"
#include "llvm/Config/llvm-config.h"
#include "llvm/AsmParser/Parser.h"
#include "llvmWrapper/IR/DerivedTypes.h"
#include "llvm/Support/Casting.h"
#include "common/LLVMWarningsPop.hpp"
#include "Probe/Assertion.h"

typedef union _gfxResourceAddressSpace
{
    struct _bits
    {
        unsigned int  bufId    : 16;
        unsigned int  bufType  : 5;
        unsigned int  indirect : 1;     // bool
        unsigned int  reserved : 10;
    } bits;
    unsigned int   u32Val;
} GFXResourceAddressSpace;

enum class ADDRESS_SPACE_TYPE : unsigned int
{
    ADDRESS_SPACE_PRIVATE = 0,
    ADDRESS_SPACE_GLOBAL = 1,
    ADDRESS_SPACE_CONSTANT = 2,
    ADDRESS_SPACE_LOCAL = 3,
    ADDRESS_SPACE_GENERIC = 4,
    ADDRESS_SPACE_LOCAL_32 = 13,
};

template<bool preserveNames, typename T, typename Inserter>
unsigned LLVM3DBuilder<preserveNames, T, Inserter>::EncodeASForGFXResource(
    const llvm::Value& bufIdx,
    IGC::BufferType bufType,
    unsigned uniqueIndAS)
{
    GFXResourceAddressSpace temp = {};

    static_assert(sizeof(temp) == 4, "Code below may need and update.");

    temp.u32Val = 0;
    IGC_ASSERT((bufType + 1) < IGC::BUFFER_TYPE_UNKNOWN + 1);
    temp.bits.bufType = bufType + 1;
    if (bufType == IGC::BufferType::SLM)
    {
        return static_cast<unsigned int>(ADDRESS_SPACE_TYPE::ADDRESS_SPACE_LOCAL); // OCL uses addrspace 3 for SLM. We should use the same thing.
    }
    else if (llvm::isa<llvm::ConstantInt>(&bufIdx))
    {
        const unsigned bufId = (unsigned)(llvm::cast<llvm::ConstantInt>(&bufIdx)->getZExtValue());
        IGC_ASSERT(bufId < (1 << 16));
        temp.bits.bufId = bufId;
        return temp.u32Val;
    }

    // if it is indirect-buf, it is front-end's job to give a proper(unique) address-space per access
    temp.bits.bufId = uniqueIndAS;
    temp.bits.indirect = 1;
    return temp.u32Val;
}


template<bool preserveNames, typename T, typename Inserter>
inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_ubfe() const
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* func_llvm_GenISA_ubfe = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_ubfe);
    return func_llvm_GenISA_ubfe;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_ibfe() const
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* func_llvm_GenISA_ibfe = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_ibfe);
    return func_llvm_GenISA_ibfe;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_bfi() const
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* func_llvm_GenISA_bfi = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_bfi);
    return func_llvm_GenISA_bfi;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_bfrev() const
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* func_llvm_GenISA_bfrev = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_bfrev);
    return func_llvm_GenISA_bfrev;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_firstbitHi() const
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* func_llvm_GenISA_firstbitHi = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_firstbitHi);
    return func_llvm_GenISA_firstbitHi;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_firstbitLo() const
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* func_llvm_GenISA_firstbitLo = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_firstbitLo);
    return func_llvm_GenISA_firstbitLo;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Function* LLVM3DBuilder<preserveNames, T, Inserter>::llvm_GenISA_firstbitShi() const
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* func_llvm_GenISA_firstbitShi = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_firstbitShi);
    return func_llvm_GenISA_firstbitShi;
}

template<bool preserveNames, typename T, typename Inserter>
void LLVM3DBuilder<preserveNames, T, Inserter>::Init()
{
    // Cached constants
    m_int0 = this->getInt32( 0 );
    m_int1 = this->getInt32( 1 );
    m_int2 = this->getInt32( 2 );
    m_int3 = this->getInt32( 3 );
    m_float0 = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), 0.0));
    m_float1 = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), 1.0));
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_resinfo(
    llvm::Value* int32_src_s_mip,
    llvm::Value* int32_textureIdx)
{
    llvm::Value * packed_params[] = {
        int32_textureIdx,
        int32_src_s_mip,
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* func_llvm_GenISA_resinfoptr = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_resinfoptr, int32_textureIdx->getType());

    llvm::CallInst* packed_resinfo_call = this->CreateCall(func_llvm_GenISA_resinfoptr, packed_params);
    return packed_resinfo_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_resinfoptr_msaa(
    llvm::Value* srcBuffer,
    llvm::Value* float_src_s_mip)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* func_resinfoptr = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_resinfoptr, srcBuffer->getType());

    //%mip_s = bitcast float %float_src_s_mip to i32
    llvm::Value* int32_mip = this->CreateBitCast(float_src_s_mip, this->getInt32Ty(), VALUE_NAME("mip_s"));

    llvm::Value * packed_params[] = {
        srcBuffer,
        int32_mip
    };

    llvm::CallInst* packed_resinfo_call = llvm::cast<llvm::CallInst>(this->CreateCall(func_resinfoptr, packed_params));

    // %tex_s.chan0 = extractelement <4 x i32> %packed_resinfo_call, i32 2
    llvm::Value* int32_info_s_ch2 = this->CreateExtractElement(packed_resinfo_call, this->m_int2);

    llvm::Function* func_sampleinfoptr = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, srcBuffer->getType());

    llvm::Value * packed_sampleinfo_params[] = {
        srcBuffer
    };

    // Call sampleinfoptr intrinsic to get the number of samples.
    llvm::CallInst* packed_sampleinfo_call = llvm::cast<llvm::CallInst>(this->CreateCall(func_sampleinfoptr, packed_sampleinfo_params));

    // We can not use channel 0 of sampleinfo which should contain the correct
    // number of samples retrieved from surface state because this value in surface
    // state must be set to 1 in case of MSAA UAV emulation due to fact that
    // IGC does not support native MSAA UAV messages at the moment.
    // Instead of channel 0 we can use channel 3 of sampleinfo which contains
    // sample position palette index field retrieved from surface state.
    // The sample position palette index field is set to log2(number of samples).

    // Get sample position palette index from sampleinfo. Note that this value
    // is incremented by one from its value in the surface state.
    llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
    llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);

    // Number of samples = 2 ^ "sample position palette index".
    llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);

    // Divide depth by number of samples.
    // %depth_s = udiv i32 %src_s.chan2, %src1_s_ch0
    llvm::Value* int32_depth = this->CreateUDiv(int32_info_s_ch2, int32_numberOfSamples, VALUE_NAME("depth_s"));

    llvm::Value *resinfo = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(llvm::Type::getInt32Ty(module->getContext()), 4));

    resinfo = this->CreateInsertElement(
        resinfo,
        this->CreateExtractElement(packed_resinfo_call, this->m_int0),
        this->getInt32(0),
        "call_inst");

    resinfo = this->CreateInsertElement(
        resinfo,
        this->CreateExtractElement(packed_resinfo_call, this->m_int1),
        this->getInt32(1),
        "call_inst");

    resinfo = this->CreateInsertElement(
        resinfo,
        this->CreateExtractElement(packed_resinfo_call, this->m_int3),
        this->getInt32(3),
        "call_inst");

    resinfo = this->CreateInsertElement(
        resinfo,
        int32_depth,
        this->getInt32(2),
        "call_inst");

    return resinfo;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedwrite(
    llvm::Value* dstBuffer,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* srcAddressW,
    llvm::Value* lod,
    llvm::Value* float_X,
    llvm::Value* float_Y,
    llvm::Value* float_Z,
    llvm::Value* float_W)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function *pFuncTypedWrite = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_typedwrite, dstBuffer->getType());

    //R = SampleIndex
    llvm::Value * args[] = {
        dstBuffer,
        srcAddressU,
        srcAddressV,
        srcAddressW,
        lod,
        float_X,
        float_Y,
        float_Z,
        float_W,
    };

    llvm::Value* typedwrite = this->CreateCall(pFuncTypedWrite, args);
    return typedwrite;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedread(
    llvm::Value* srcBuffer,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* srcAddressW,
    llvm::Value* lod)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function *pFuncTypedRead = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_typedread, srcBuffer->getType());

    llvm::Value * args[] = {
        srcBuffer,
        srcAddressU,
        srcAddressV,
        srcAddressW,
        lod
    };

    llvm::Value* typedread = this->CreateCall(pFuncTypedRead, args);
    return typedread;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedread_msaa2D(
    llvm::Value* srcBuffer,
    llvm::Value* sampleIdx,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* lod)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function *pFuncTypedRead = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_typedread, srcBuffer->getType());

    //R = SampleIndex
    llvm::Value * args[] = {
        srcBuffer,
        srcAddressU,
        srcAddressV,
        sampleIdx,
        lod
    };

    llvm::Value* typedread = this->CreateCall(pFuncTypedRead, args);
    return typedread;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedread_msaa2DArray(
    llvm::Value* srcBuffer,
    llvm::Value* sampleIdx,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* srcAddressR,
    llvm::Value* lod)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    // Call sampleinfoptr intrinsic to get the number of samples.
    // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
    llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, srcBuffer->getType());
    llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, srcBuffer);

    // We can not use channel 0 of sampleinfo which should contain the correct
    // number of samples retrieved from surface state because this value in surface
    // state must be set to 1 in case of MSAA UAV emulation due to fact that
    // IGC does not support native MSAA UAV messages at the moment.
    // Instead of channel 0 we can use channel 3 of sampleinfo which contains
    // sample position palette index field retrieved from surface state.
    // The sample position palette index field is set to log2(number of samples).

    // Get sample position palette index from surface state. Note that this value
    // is incremented by one from its value in the surface state.
    llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
    llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);

    // Number of samples = 2 ^ "sample position palette index".
    llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);

    //R = R' * num of Samples + SampleIndex
    llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
    llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));

    llvm::Function *pFuncTypedRead = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_typedread, srcBuffer->getType());

    llvm::Value * args[] = {
        srcBuffer,
        srcAddressU,
        srcAddressV,
        int32_SrcAddrR,
        lod
    };

    llvm::Value* typedread = this->CreateCall(pFuncTypedRead, args);
    return typedread;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedwrite_msaa2D(
    llvm::Value* dstBuffer,
    llvm::Value* sampleIdx,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* float_X,
    llvm::Value* float_Y,
    llvm::Value* float_Z,
    llvm::Value* float_W)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function *pFuncTypedWrite = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_typedwrite, dstBuffer->getType());

    //R = SampleIndex
    llvm::Value * args[] = {
        dstBuffer,
        srcAddressU,
        srcAddressV,
        sampleIdx,
        m_int0,
        float_X,
        float_Y,
        float_Z,
        float_W,
    };

    llvm::Value* typedwrite = this->CreateCall(pFuncTypedWrite, args);
    return typedwrite;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_typedwrite_msaa2DArray(
    llvm::Value* dstBuffer,
    llvm::Value* sampleIdx,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* srcAddressR,
    llvm::Value* float_X,
    llvm::Value* float_Y,
    llvm::Value* float_Z,
    llvm::Value* float_W)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    // Call sampleinfoptr intrinsic to get the number of samples.
    // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
    llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, dstBuffer->getType());
    llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, dstBuffer);

    // We can not use channel 0 of sampleinfo which should contain the correct
    // number of samples retrieved from surface state because this value in surface
    // state must be set to 1 in case of MSAA UAV emulation due to fact that
    // IGC does not support native MSAA UAV messages at the moment.
    // Instead of channel 0 we can use channel 3 of sampleinfo which contains
    // sample position palette index field retrieved from surface state.
    // The sample position palette index field is set to log2(number of samples).

    // Get sample position palette index from surface state. Note that this value
    // is incremented by one from its value in the surface state.
    llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
    llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);

    // Number of samples = 2 ^ "sample position palette index".
    llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);

    //R = R' * num of Samples + SampleIndex
    llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
    llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));

    llvm::Function *pFuncTypedWrite = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_typedwrite, dstBuffer->getType());

    //R = SampleIndex
    llvm::Value * args[] = {
        dstBuffer,
        srcAddressU,
        srcAddressV,
        int32_SrcAddrR,
        m_int0,
        float_X,
        float_Y,
        float_Z,
        float_W,
    };

    llvm::Value* typedwrite = this->CreateCall(pFuncTypedWrite, args);
    return typedwrite;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_dwordatomictypedMsaa2D(
    llvm::Value* dstBuffer,
    llvm::Value* sampleIdx,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* src,
    llvm::Value* instType)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type *types[] = { src->getType(), dstBuffer->getType() };

    llvm::Function *pFuncDwordAtomicTyped = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_intatomictyped, types);

    //R = SampleIndex
    llvm::Value * args[] = {
        dstBuffer,
        srcAddressU,
        srcAddressV,
        sampleIdx,
        src,
        instType
    };

    llvm::Value* dwordAtomicTyped = this->CreateCall(pFuncDwordAtomicTyped, args);
    return dwordAtomicTyped;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_dwordatomictypedMsaa2DArray(
    llvm::Value* dstBuffer,
    llvm::Value* sampleIdx,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* srcAddressR,
    llvm::Value* src,
    llvm::Value* instType)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    // Call sampleinfoptr intrinsic to get the number of samples.
    // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
    llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, dstBuffer->getType());
    llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, dstBuffer);

    // We can not use channel 0 of sampleinfo which should contain the correct
    // number of samples retrieved from surface state because this value in surface
    // state must be set to 1 in case of MSAA UAV emulation due to fact that
    // IGC does not support native MSAA UAV messages at the moment.
    // Instead of channel 0 we can use channel 3 of sampleinfo which contains
    // sample position palette index field retrieved from surface state.
    // The sample position palette index field is set to log2(number of samples).

    // Get sample position palette index from surface state. Note that this value
    // is incremented by one from its value in the surface state.
    llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
    llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);

    // Number of samples = 2 ^ "sample position palette index".
    llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);

    //R = R' * num of Samples + SampleIndex
    llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
    llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));

    llvm::Type *types[] = { src->getType(), dstBuffer->getType() };

    llvm::Function *pFuncDwordAtomicTyped = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_intatomictyped, types);

    llvm::Value * args[] = {
        dstBuffer,
        srcAddressU,
        srcAddressV,
        int32_SrcAddrR,
        src,
        instType
    };

    llvm::Value* dwordAtomicTyped = this->CreateCall(pFuncDwordAtomicTyped, args);
    return dwordAtomicTyped;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_StatelessAtomic(
    llvm::Value* ptr,
    llvm::Value* data,
    IGC::AtomicOp opcode)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Type* types[] = { data->getType(), ptr->getType(), ptr->getType() };
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_intatomicrawA64, types);

    llvm::Value* args[] =
    {
        ptr,
        ptr,
        data,
        this->getInt32(opcode)
    };
    return this->CreateCall(pFunc, args);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_InidrectAtomic(
    llvm::Value* resource,
    llvm::Value* offset,
    llvm::Value* data,
    IGC::AtomicOp opcode)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type *types[] = { data->getType(), resource->getType() };

    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_intatomicraw, types);

    llvm::Value* args[] =
    {
        resource,
        offset,
        data,
        this->getInt32(opcode)
    };
    return this->CreateCall(pFunc, args);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_StatelessAtomicCmpXChg(
    llvm::Value* ptr,
    llvm::Value* data0,
    llvm::Value* data1)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Type* types[] = { data0->getType(), ptr->getType(), ptr->getType() };
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomicrawA64, types);

    llvm::Value* args[] =
    {
        ptr,
        ptr,
        data0,
        data1,
    };
    return this->CreateCall(pFunc, args);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_InidrectAtomicCmpXChg(
    llvm::Value* resource,
    llvm::Value* offset,
    llvm::Value* data0,
    llvm::Value* data1)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type *types[] = { data0->getType(), resource->getType() };

    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomicraw, types);

    llvm::Value* args[] =
    {
        resource,
        offset,
        data0,
        data1,
    };
    return this->CreateCall(pFunc, args);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_cmpxchgatomictypedMsaa2D(
    llvm::Value* dstBuffer,
    llvm::Value* sampleIdx,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* src0,
    llvm::Value* src1)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type *types[] = { src0->getType(), dstBuffer->getType() };

    llvm::Function *pFuncCmpxchgatomictyped = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomictyped, types);

    //R = SampleIndex
    llvm::Value * args[] = {
        dstBuffer,
        srcAddressU,
        srcAddressV,
        sampleIdx,
        src0,
        src1
    };

    llvm::Value* dwordCmpxchgatomictyped = this->CreateCall(pFuncCmpxchgatomictyped, args);
    return dwordCmpxchgatomictyped;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_cmpxchgatomictypedMsaa2DArray(
    llvm::Value* dstBuffer,
    llvm::Value* sampleIdx,
    llvm::Value* srcAddressU,
    llvm::Value* srcAddressV,
    llvm::Value* srcAddressR,
    llvm::Value* src0,
    llvm::Value* src1)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    // Call sampleinfoptr intrinsic to get the number of samples.
    // %tex = call <4 x i32> @llvm.GenISA.sampleinfoptr(4x(float)addrspace())
    llvm::Function* pfuncsampleinfoptr = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, dstBuffer->getType());
    llvm::Value* packed_sampleinfo_call = this->CreateCall(pfuncsampleinfoptr, dstBuffer);

    // We can not use channel 0 of sampleinfo which should contain the correct
    // number of samples retrieved from surface state because this value in surface
    // state must be set to 1 in case of MSAA UAV emulation due to fact that
    // IGC does not support native MSAA UAV messages at the moment.
    // Instead of channel 0 we can use channel 3 of sampleinfo which contains
    // sample position palette index field retrieved from surface state.
    // The sample position palette index field is set to log2(number of samples).

    // Get sample position palette index from surface state. Note that this value
    // is incremented by one from its value in the surface state.
    llvm::Value* int32_sampleinfo_s_chan3 = this->CreateExtractElement(packed_sampleinfo_call, this->m_int3);
    llvm::Value* int32_paletteIndex = this->CreateSub(int32_sampleinfo_s_chan3, this->m_int1);

    // Number of samples = 2 ^ "sample position palette index".
    llvm::Value* int32_numberOfSamples = this->CreateShl(this->m_int1, int32_paletteIndex);

    //R = R' * num of Samples + SampleIndex
    llvm::Value* int32_mulwithSamples = this->CreateMul(srcAddressR, int32_numberOfSamples, VALUE_NAME("mul_s"));
    llvm::Value* int32_SrcAddrR = this->CreateAdd(int32_mulwithSamples, sampleIdx, VALUE_NAME("source_R"));

    llvm::Type *types[] = { src0->getType(), dstBuffer->getType() };

    llvm::Function *pFuncCmpxchgatomictyped = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomictyped, types);

    llvm::Value * args[] = {
        dstBuffer,
        srcAddressU,
        srcAddressV,
        int32_SrcAddrR,
        src0,
        src1
    };

    llvm::Value* dwordCmpxchgatomictyped = this->CreateCall(pFuncCmpxchgatomictyped, args);
    return dwordCmpxchgatomictyped;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_TypedAtomic(
    llvm::Value* resource,
    llvm::Value* addressU,
    llvm::Value* addressV,
    llvm::Value* addressR,
    llvm::Value* data,
    IGC::AtomicOp opcode)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type *types[] = { data->getType(), resource->getType() };

    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_intatomictyped, types);

    llvm::Value* args[] =
    {
        resource,
        addressU,
        addressV,
        addressR,
        data,
        this->getInt32(opcode)
    };
    return this->CreateCall(pFunc, args);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_TypedAtomicCmpXChg(
    llvm::Value* resource,
    llvm::Value* addressU,
    llvm::Value* addressV,
    llvm::Value* addressR,
    llvm::Value* data0,
    llvm::Value* data1)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type *types[] = { data0->getType(), resource->getType() };

    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_icmpxchgatomictyped, types);

    llvm::Value* args[] =
    {
        resource,
        addressU,
        addressV,
        addressR,
        data0,
        data1,
    };
    return this->CreateCall(pFunc, args);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SampleInfo(
    llvm::Value* resourcePtr)
{
    llvm::Value * packed_tex_params[] = {
        resourcePtr,
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::CallInst* packed_tex_call = llvm::cast<llvm::CallInst>(this->CreateCall(
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_sampleinfoptr, resourcePtr->getType()),
        packed_tex_params));

    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateReadSurfaceInfo(
    llvm::Value* resourcePtr,
    llvm::Value* mipmap)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* fn = llvm::GenISAIntrinsic::getDeclaration(
        module, llvm::GenISAIntrinsic::GenISA_readsurfaceinfoptr, resourcePtr->getType());
    llvm::Value* packed_tex_call = this->CreateCall2(fn, resourcePtr, mipmap);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SyncThreadGroup()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    return this->CreateCall(llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_threadgroupbarrier));
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FlushSampler()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    return this->CreateCall(llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_flushsampler));
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_MemoryFence(
    bool commit,
    bool flushRWDataCache,
    bool flushConstantCache,
    bool flushTextureCache,
    bool flushInstructionCache,
    bool globalFence)
{
    llvm::Value* parameters[] =
    {
        this->getInt1(commit),
        this->getInt1(flushRWDataCache),
        this->getInt1(flushConstantCache),
        this->getInt1(flushTextureCache),
        this->getInt1(flushInstructionCache),
        this->getInt1(globalFence),
        this->getInt1(false),
    };
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    return this->CreateCall(
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_memoryfence),
        parameters);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_GlobalSync()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    return this->CreateCall(llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_globalSync));
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SamplePos(
    llvm::Value* int32_resourceIdx,
    llvm::Value* int32_samplerIdx)
{
    llvm::Value* sampleInfo = this->Create_SampleInfo(int32_resourceIdx);


    llvm::Value* int32_texX = this->CreateExtractElement(sampleInfo, m_int0);
    llvm::Value* int32_texW = this->CreateExtractElement(sampleInfo, m_int3);

    llvm::Value* int32_tempIndex = this->CreateAdd(int32_texX, int32_samplerIdx);
    llvm::Value* int1_ole = this->CreateICmp(llvm::ICmpInst::ICMP_UGT, int32_texX, int32_samplerIdx);
    llvm::Value* int32_sel = this->CreateSelect(int1_ole, int32_tempIndex, m_int0);
    llvm::Value* int1_one = this->CreateICmp(llvm::ICmpInst::ICMP_EQ, int32_texW, m_int1);
    llvm::Value* int32_selIndex = this->CreateSelect(int1_one, m_int0, int32_sel);

    /*
        %tempY = extractelement <32 x f32> <f32 0.0, f32 0.0, f32 4.0 / 16.0, f32 -4.0 / 16.0, f32 -6.0 / 16.0,
                                            f32 -2.0 / 16.0, f32 2.0 / 16.0, f32 6.0 / 16.0, f32 -3.0 / 16.0,
                                            f32 3.0 / 16.0, f32 1.0 / 16.0, f32 -5.0 / 16.0, f32 5.0 / 16.0,
                                            f32 -1.0 / 16.0, f32 7.0 / 16.0, f32 -7.0 / 16.0, f32 1.0 / 16.0,
                                            f32 -3.0 / 16.0, f32 2.0 / 16.0, f32 -1.0 / 16.0, f32 -2.0 / 16.0,
                                            f32 5.0 / 16.0, f32 3.0 / 16.0, f32 -5.0 / 16.0, f32 6.0 / 16.0,
                                            f32 -7.0 / 16.0, f32 -6.0 / 16.0, f32 4.0 / 16.0, f32 0.0,
                                            f32 -4.0 / 16.0, f32 7.0 / 16.0, f32 -8.0 / 16.0>, i32 %selIndex
    */
    llvm::Value* float_y = nullptr;
    {
        llvm::Value* temp = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 32));
        temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(0));
        temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(1));
        temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(2));
        temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(3));
        temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(4));
        temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(5));
        temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(6));
        temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(7));
        temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(8));
        temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(9));
        temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(10));
        temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(11));
        temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(12));
        temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(13));
        temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(14));
        temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(15));
        temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(16));
        temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(17));
        temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(18));
        temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(19));
        temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(20));
        temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(21));
        temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(22));
        temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(23));
        temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(24));
        temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(25));
        temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(26));
        temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(27));
        temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(28));
        temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(29));
        temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(30));
        temp = this->CreateInsertElement(temp, this->getFloat(-8.0f / 16.0f), this->getInt32(31));
        float_y = this->CreateExtractElement(temp, int32_selIndex);
    }

    /*
        %tempX = extractelement <32 x f32> <f32 0.0, f32 0.0, f32 4.0 / 16.0, f32 -4.0 / 16.0, f32 -2.0 / 16.0,
                                            f32 6.0 / 16.0, f32 -6.0 / 16.0, f32 2.0 / 16.0, f32 1.0 / 16.0,
                                            f32 -1.0 / 16.0, f32 5.0 / 16.0, f32 -3.0 / 16.0, f32 -5.0 / 16.0,
                                            f32 -7.0 / 16.0, f32 3.0 / 16.0, f32 7.0 / 16.0, f32 1.0 / 16.0,
                                            f32 -1.0 / 16.0, f32 -3.0 / 16.0, f32 4.0 / 16.0, f32 -5.0 / 16.0,
                                            f32 2.0 / 16.0, f32 5.0 / 16.0, f32 3.0 / 16.0, f32 -2.0 / 16.0,
                                            f32 0.0 / 16.0, f32 -4.0 / 16.0, f32 -6.0 / 16.0, f32 -8.0 / 16.0,
                                            f32 7.0 / 16.0, f32 6.0 / 16.0, f32 -7.0 / 16.0>, i32 %selIndex
    */
    llvm::Value* float_x = nullptr;
    {
        llvm::Value* temp = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 32));
        temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(0));
        temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(1));
        temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(2));
        temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(3));
        temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(4));
        temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(5));
        temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(6));
        temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(7));
        temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(8));
        temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(9));
        temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(10));
        temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(11));
        temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(12));
        temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(13));
        temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(14));
        temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(15));
        temp = this->CreateInsertElement(temp, this->getFloat( 1.0f / 16.0f), this->getInt32(16));
        temp = this->CreateInsertElement(temp, this->getFloat(-1.0f / 16.0f), this->getInt32(17));
        temp = this->CreateInsertElement(temp, this->getFloat(-3.0f / 16.0f), this->getInt32(18));
        temp = this->CreateInsertElement(temp, this->getFloat( 4.0f / 16.0f), this->getInt32(19));
        temp = this->CreateInsertElement(temp, this->getFloat(-5.0f / 16.0f), this->getInt32(20));
        temp = this->CreateInsertElement(temp, this->getFloat( 2.0f / 16.0f), this->getInt32(21));
        temp = this->CreateInsertElement(temp, this->getFloat( 5.0f / 16.0f), this->getInt32(22));
        temp = this->CreateInsertElement(temp, this->getFloat( 3.0f / 16.0f), this->getInt32(23));
        temp = this->CreateInsertElement(temp, this->getFloat(-2.0f / 16.0f), this->getInt32(24));
        temp = this->CreateInsertElement(temp, this->getFloat( 0.0f), this->getInt32(25));
        temp = this->CreateInsertElement(temp, this->getFloat(-4.0f / 16.0f), this->getInt32(26));
        temp = this->CreateInsertElement(temp, this->getFloat(-6.0f / 16.0f), this->getInt32(27));
        temp = this->CreateInsertElement(temp, this->getFloat(-8.0f / 16.0f), this->getInt32(28));
        temp = this->CreateInsertElement(temp, this->getFloat( 7.0f / 16.0f), this->getInt32(29));
        temp = this->CreateInsertElement(temp, this->getFloat( 6.0f / 16.0f), this->getInt32(30));
        temp = this->CreateInsertElement(temp, this->getFloat(-7.0f / 16.0f), this->getInt32(31));
        float_x = this->CreateExtractElement(temp, int32_selIndex);
    }

    llvm::Value* packed_ret_value = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
    packed_ret_value = this->CreateInsertElement(packed_ret_value, float_x, this->getInt32(0));
    packed_ret_value = this->CreateInsertElement(packed_ret_value, float_y, this->getInt32(1));
    packed_ret_value = this->CreateInsertElement(packed_ret_value, this->getFloat(0.0f), this->getInt32(2));
    packed_ret_value = this->CreateInsertElement(packed_ret_value, this->getFloat(0.0f), this->getInt32(3));

    return packed_ret_value;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLE(
    llvm::Value* coordinate_u,
    llvm::Value* coordinate_v,
    llvm::Value* coordinate_r,
    llvm::Value* coordinate_ai,
    llvm::Value* ptr_textureIdx,
    llvm::Value* ptr_sampler,
    llvm::Value* offsetU,
    llvm::Value* offsetV,
    llvm::Value* offsetW,
    llvm::Value* minlod,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    if (minlod == nullptr)
    {
        minlod = llvm::ConstantFP::get(coordinate_u->getType(), 0.0);
    }

    llvm::Value * packed_tex_params[] = {
        coordinate_u,
        coordinate_v,
        coordinate_r,
        coordinate_ai,
        minlod,
        ptr_textureIdx,
        ptr_sampler,
        offsetU,
        offsetV,
        offsetW
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        coordinate_u->getType(),
        ptr_textureIdx->getType(),
        ptr_sampler->getType()
    };
    if (feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }
    llvm::Function* func_llvm_GenISA_sampleptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleptr, types);
    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleptr_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEC(
    llvm::Value* float_reference_0,
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* float_address_3,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetR,
    llvm::Value* minlod,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    if (minlod == nullptr)
    {
        minlod = llvm::ConstantFP::get(float_address_0->getType(), 0.0);
    }

    llvm::Value * packed_tex_params[] = {
        float_reference_0,
        float_address_0,
        float_address_1,
        float_address_2,
        float_address_3,
        minlod,
        int32_textureIdx,
        int32_sampler,
        int32_offsetU,
        int32_offsetV,
        int32_offsetR
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_reference_0->getType(),
        int32_textureIdx->getType(),
        int32_sampler->getType()
    };
    if (feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }
    llvm::Function* func_llvm_GenISA_sampleCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleCptr, types);
    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleCptr_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLELC(
    llvm::Value* float_reference_0,
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* float_address_3,
    llvm::Value* float_lod,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetW,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        float_reference_0,
        float_lod,
        float_address_0,
        float_address_1,
        float_address_2,
        float_address_3,
        int32_textureIdx,
        int32_sampler,
        int32_offsetU,
        int32_offsetV,
        int32_offsetW
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_reference_0->getType(),
        int32_textureIdx->getType(),
        int32_sampler->getType()
    };
    llvm::Function* func_llvm_GenISA_sampleLCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleLCptr, types);
    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleLCptr_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEC_LZ(
    llvm::Value* float_reference_0,
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* float_address_3,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetW,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        float_reference_0,
        llvm::ConstantFP::get(float_address_0->getType(), 0.0),
        float_address_0,
        float_address_1,
        float_address_2,
        float_address_3,
        int32_textureIdx,
        int32_sampler,
        int32_offsetU,
        int32_offsetV,
        int32_offsetW
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_reference_0->getType(),
        int32_textureIdx->getType(),
        int32_sampler->getType()
    };
    if (feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }
    llvm::Function* func_llvm_GenISA_sampleLCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleLCptr, types);
    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleLCptr_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4C(
    llvm::Value* float_reference_0,
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* float_address_3,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_srcChannel,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        float_reference_0,
        float_address_0,
        float_address_1,
        float_address_2,
        float_address_3,
        int32_textureIdx,
        int32_sampler,
        int32_offsetU,
        int32_offsetV,
        m_int0,
        int32_srcChannel
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_reference_0->getType(),
        int32_textureIdx->getType(),
        int32_sampler->getType()
    };
    if (feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }
    llvm::Function* func_llvm_GenISA_gather4Cptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_gather4Cptr, types);
    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4Cptr_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4POC(
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* int_src_offset_0,
    llvm::Value* int_src_offset_1,
    llvm::Value* float_src_reference_0,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_srcChannel,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        float_src_reference_0,
        float_address_0,
        float_address_1,
        int_src_offset_0,
        int_src_offset_1,
        float_address_2,
        int32_textureIdx,
        int32_sampler,
        int32_offsetU,
        int32_offsetV,
        m_int0,
        int32_srcChannel
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_src_reference_0->getType(),
        int32_textureIdx->getType(),
        int32_sampler->getType()
    };
    if (feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }
    llvm::Function* func_llvm_GenISA_gather4POCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_gather4POCptr, types);
    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4POCptr_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4PO(
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* int_src_offset_0,
    llvm::Value* int_src_offset_1,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_srcChannel,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        float_address_0,
        float_address_1,
        int_src_offset_0,
        int_src_offset_1,
        float_address_2,
        int32_textureIdx,
        int32_sampler,
        int32_offsetU,
        int32_offsetV,
        m_int0,
        int32_srcChannel
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_address_0->getType(),
        int32_textureIdx->getType(),
        int32_sampler->getType()
    };
    if (feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }
    llvm::Function* func_llvm_GenISA_gather4POptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_gather4POptr, types);
    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4POptr_v4f32_f32, packed_tex_params);

    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4PositionOffsets(
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::ArrayRef<llvm::Value *> int_src_offsets,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_srcChannel)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Value *gatherReturn = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(llvm::Type::getFloatTy(module->getContext()), 4));
    for (int i = 0, j = 0; i < 7; i = i + 2, j++)
    {
        llvm::Value* packed_tex_call = Create_gather4PO(
            float_address_0,
            float_address_1,
            float_address_2,
            int_src_offsets[i],
            int_src_offsets[i + 1],
            int32_textureIdx,
            int32_sampler,
            int32_offsetU,
            int32_offsetV,
            int32_srcChannel,
            false,
            llvm::Type::getFloatTy(module->getContext()));


        gatherReturn = this->CreateInsertElement(
            gatherReturn,
            this->CreateExtractElement(packed_tex_call, this->getInt32(3)),
            this->getInt32(j),
            "call_inst");
    }

    return gatherReturn;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4PositionOffsetsC(
    llvm::Value* float_reference_0,
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::ArrayRef<llvm::Value *> int_src_offsets,
    llvm::Value* int32_textureIdx_356,
    llvm::Value* int32_sampler_357,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_srcChannel)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Value *gatherReturn = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(llvm::Type::getFloatTy(module->getContext()), 4));
    for (int i = 0, j = 0; i < 7; i = i + 2, j++)
    {
        llvm::Value* packed_tex_1527_call = Create_gather4POC(
            float_address_0,
            float_address_1,
            float_address_2,
            int_src_offsets[i],
            int_src_offsets[i + 1],
            float_reference_0,
            int32_textureIdx_356,
            int32_sampler_357,
            int32_offsetU,
            int32_offsetV,
            int32_srcChannel,
            false,
            llvm::Type::getFloatTy(module->getContext()));

        gatherReturn = this->CreateInsertElement(
            gatherReturn,
            this->CreateExtractElement(packed_tex_1527_call, this->getInt32(3)),
            this->getInt32(j),
            "call_inst");
    }

    return gatherReturn;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEB(
    llvm::Value* float_bias_0,
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* float_address_3,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetW,
    llvm::Value* minlod,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    if (minlod == nullptr)
    {
        minlod = llvm::ConstantFP::get(float_address_0->getType(), 0.0);
    }

    //   %tex = call <4 x float> @llvm.GenISA.sample.v4f32.f32(float %src_s.chan0, float %src_s.chan1, float %src_s.chan2, float 0.000000e+00, i32 %textureIdx, i32 %sampler, i32 %offsetU, i32 %offsetV, i32 %offsetW)
    llvm::Value * packed_tex_params[] = {
        float_bias_0,
        float_address_0,
        float_address_1,
        float_address_2,
        float_address_3,
        minlod,
        int32_textureIdx,
        int32_sampler,
        int32_offsetU,
        int32_offsetV,
        int32_offsetW
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_bias_0->getType(),
        int32_textureIdx->getType(),
        int32_sampler->getType()
    };
    if (feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }
    llvm::Function* func_llvm_GenISA_sampleB_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleBptr, types);

    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleB_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEL(
    llvm::Value* float_lod_0,
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* float_address_3,
    llvm::Value* ptr_textureIdx,
    llvm::Value* ptr_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetW,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        float_lod_0,
        float_address_0,
        float_address_1,
        float_address_2,
        float_address_3,
        ptr_textureIdx,
        ptr_sampler,
        int32_offsetU,
        int32_offsetV,
        int32_offsetW
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_lod_0->getType(),
        ptr_textureIdx->getType(),
        ptr_sampler->getType()
    };
    if (feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }
    llvm::Function* func_llvm_GenISA_sampleL_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleLptr, types);

    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleL_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLED(
    SampleD_DC_FromCubeParams& sampleParams,
    llvm::Value* minlod,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    return Create_SAMPLED(
        sampleParams.get_float_src_u(),
        sampleParams.get_float_src_v(),
        sampleParams.get_float_src_r(),
        sampleParams.get_dxu(),
        sampleParams.get_dxv(),
        sampleParams.get_dxr(),
        sampleParams.get_dyu(),
        sampleParams.get_dyv(),
        sampleParams.get_dyr(),
        sampleParams.get_float_src_ai(),
        sampleParams.get_int32_textureIdx(),
        sampleParams.get_int32_sampler(),
        sampleParams.get_int32_offsetU(),
        sampleParams.get_int32_offsetV(),
        sampleParams.get_int32_offsetW(),
        minlod,
        feedback_enabled,
        returnType
    );
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLED(
    llvm::Value* float_src1_s_chan0,
    llvm::Value* float_src1_s_chan1,
    llvm::Value* float_src1_s_chan2,
    llvm::Value* float_src2_s_chan0,
    llvm::Value* float_src2_s_chan1,
    llvm::Value* float_src2_s_chan2,
    llvm::Value* float_src3_s_chan0,
    llvm::Value* float_src3_s_chan1,
    llvm::Value* float_src3_s_chan2,
    llvm::Value* float_src1_s_chan3,
    llvm::Value* ptr_textureIdx,
    llvm::Value* ptr_sampler,
    llvm::Value* int32_offsetU_358,
    llvm::Value* int32_offsetV_359,
    llvm::Value* int32_offsetW_359,
    llvm::Value* minlod,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    if (minlod == nullptr)
    {
        minlod = llvm::ConstantFP::get(float_src1_s_chan0->getType(), 0.0);
    }

    //   %tex = call <4 x float> @llvm.GenISA.sample.v4f32.f32D(float %src_s.chan0, float %src2_s.chan0, float %src3_s.chan0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 %textureIdx, i32 %sampler, i32 %offsetU, i32 0, i32 0)
    llvm::Value * packed_tex_params[] = {
        float_src1_s_chan0,
        float_src2_s_chan0,
        float_src3_s_chan0,
        float_src1_s_chan1,
        float_src2_s_chan1,
        float_src3_s_chan1,
        float_src1_s_chan2,
        float_src2_s_chan2,
        float_src3_s_chan2,
        float_src1_s_chan3,
        minlod,
        ptr_textureIdx,
        ptr_sampler,
        int32_offsetU_358,
        int32_offsetV_359,
        int32_offsetW_359
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_src1_s_chan0->getType(),
        ptr_textureIdx->getType(),
        ptr_sampler->getType()
    };
    if(feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }

    llvm::Function* func_llvm_GenISA_sampleDptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleDptr, types);

    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleDptr_v4f32_f32, packed_tex_params);

    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEDC(
    llvm::Value* float_ref,
    llvm::Value* float_src_u,
    llvm::Value* dxu,
    llvm::Value* dyu,
    llvm::Value* float_src_v,
    llvm::Value* dxv,
    llvm::Value* dyv,
    llvm::Value* float_src_r,
    llvm::Value* dxr,
    llvm::Value* dyr,
    llvm::Value* float_src_ai,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetW,
    llvm::Type* returnType)
{
    //   %tex = call <4 x float> @llvm.GenISA.sample.v4f32.f32D(float %float_ref, float %float_src_u, float %dxu, float %dxu, float %dyu, float float_src_v,
    //                           float %dxv, float %dyv, float %float_src_r, float %dxr, float %dyr, float 0.000000e+00,
    //                           i32 %textureIdx, i32 %sampler, i32 %offsetU, i32 %offsetV, i32 %offsetW)
    llvm::Value * packed_tex_params[] = {
        float_ref,
        float_src_u,
        dxu,
        dyu,
        float_src_v,
        dxv,
        dyv,
        float_src_r,
        dxr,
        dyr,
        float_src_ai,
        int32_textureIdx,
        int32_sampler,
        int32_offsetU,
        int32_offsetV,
        int32_offsetW
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_ref->getType(),
        int32_textureIdx->getType(),
        int32_sampler->getType()
    };

    llvm::Function* func_llvm_GenISA_sampleDCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleDCptr, types);

    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleDCptr_v4f32_f32, packed_tex_params);

    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_lod(
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* float_address_3,
    llvm::Value* int32_textureIdx_356,
    llvm::Value* int32_sampler_357,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        float_address_0,
        float_address_1,
        float_address_2,
        float_address_3,
        int32_textureIdx_356,
        int32_sampler_357,
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_address_0->getType(),
        int32_textureIdx_356->getType(),
        int32_sampler_357->getType()
    };

    llvm::Function* func_llvm_GenISA_lodptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_lodptr, types);

    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_lodptr_v4f32_f32, packed_tex_params);

    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_gather4(
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* float_address_3,
    llvm::Value* int32_textureIdx_356,
    llvm::Value* int32_sampler_357,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetW,
    llvm::Value* int32_srcChannel,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        float_address_0,
        float_address_1,
        float_address_2,
        float_address_3,
        int32_textureIdx_356,
        int32_sampler_357,
        int32_offsetU,
        int32_offsetV,
        int32_offsetW,
        int32_srcChannel
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_address_0->getType(),
        int32_textureIdx_356->getType(),
        int32_sampler_357->getType()
    };
    if (feedback_enabled)
    {
        types[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }
    llvm::Function* func_llvm_GenISA_gather4ptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_gather4ptr, types);

    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_gather4ptr_v4f32_f32, packed_tex_params);

    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_load(
    llvm::Value* int32_sampleIdxU,
    llvm::Value* int32_sampleIdxV,
    llvm::Value* int32_sampleIdxR,
    llvm::Value* int32_lod,
    llvm::Value* ptr_textureIdx,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetR,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        int32_sampleIdxU,
        int32_sampleIdxV,
        int32_lod,
        int32_sampleIdxR,
        ptr_textureIdx,
        int32_offsetU,
        int32_offsetV,
        int32_offsetR
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, feedback_enabled ? 5 : 4),
        ptr_textureIdx->getType()
    };

    llvm::Function* func_llvm_GenISA_ldptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_ldptr, types);

    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_ldptr_v4f32_f32, packed_tex_params);

    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_ldms(
    llvm::Value* int32_srcIdxU,
    llvm::Value* int32_srcIdxV,
    llvm::Value* int32_srcIdxR,
    llvm::Value* int32_sampleIdx,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetR,
    bool feedback_enabled,
    llvm::Type* returnType)
{
    llvm::Value * packed_mcs_params[] = {
        int32_srcIdxU,
        int32_srcIdxV,
        int32_srcIdxR,
        m_int0,
        int32_textureIdx,
        int32_offsetU,
        int32_offsetV,
        int32_offsetR
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* types[] = { IGCLLVM::FixedVectorType::get(this->getInt32Ty(), 2), this->getInt32Ty(), int32_textureIdx->getType() };
    llvm::Function* func_llvm_GenISA_ldmcsptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_ldmcsptr, types);
    llvm::CallInst* packed_mcs_call = this->CreateCall(func_llvm_GenISA_ldmcsptr_v4f32_f32, packed_mcs_params);


    llvm::Value* mcs_ch0 = this->CreateExtractElement(packed_mcs_call, m_int0);
    llvm::Value* mcs_ch1 = this->CreateExtractElement(packed_mcs_call, m_int1);

    llvm::Value * packed_tex_params[] = {
        int32_sampleIdx,
        mcs_ch0,
        mcs_ch1,
        int32_srcIdxU,
        int32_srcIdxV,
        int32_srcIdxR,
        m_int0,
        int32_textureIdx,
        int32_offsetU,
        int32_offsetV,
        int32_offsetR
    };

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types_ldms[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        int32_textureIdx->getType()
    };
    if (feedback_enabled)
    {
        types_ldms[0] = IGCLLVM::FixedVectorType::get(dstType, 5);
    }

    llvm::Function* func_llvm_GenISA_ldmsptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_ldmsptr, types_ldms);

    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_ldmsptr_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline SampleParamsFromCube LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_Cube_ParamsFromUnormalizedCoords(
    llvm::Value* int32_lod,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_u,
    llvm::Value* int32_v,
    llvm::Value* int32_faceid,
    llvm::Value* int32_cube_array_index,
    llvm::Value *float_array_6_3,
    llvm::Value* int32_sampler
    )
{
    //Samplers point of reference is always center of the face, which is (0,0)
    //That means the four vertices of the normalized cube are depiced as below
    //(-1,-1)        (1,-1)
    //  -------|---------
    //  |      |        |
    //  |      |        |
    //  |---------------|
    //  |      |(0,0)   |
    //  |      |        |
    //  -------|---------
    //(-1,1)          (1,1)
    //Thus each un-normalized coordiate (x,y) needs to be normalized between <-1,1>
    //Below is the Math to normalize between <-1,1>
    //u = (u * 2 + 1)/width - 1
    //v = (v * 2 + 1)/height - 1

    //Using resinfo extract width and height of the buffer
    //Using resinfo extract width and height of the buffer
    llvm::Value *resinfo = this->Create_resinfo(int32_lod, int32_textureIdx);
    llvm::Value *width = this->CreateExtractElement(resinfo, m_int0);
    llvm::Value *height = this->CreateExtractElement(resinfo, m_int1);

    //convert u, v, width and height to float
    llvm::Value *float_u = this->CreateUIToFP(int32_u, this->getFloatTy());
    llvm::Value *float_v = this->CreateUIToFP(int32_v, this->getFloatTy());
    width = this->CreateUIToFP(width, this->getFloatTy());
    height = this->CreateUIToFP(height, this->getFloatTy());
    //define some constants
    llvm::Value* float_minus1 = this->getFloat(-1.0);
    llvm::Value* float_2 = this->getFloat(2.0);

    //u and v represent the coordinates of a texel for a given face
    //Now normalize u in the range [-1,1] using following equation
    //u = (2*u + 1)/width -1
    float_u = this->CreateFAdd(this->CreateFMul(float_u, float_2), m_float1);
    float_u = this->CreateFSub(this->CreateFDiv(float_u, width), m_float1);
    //Now normalize v in the range [-1,1] using following equation
    //v = (v * 2 + 1)/height - 1
    float_v = this->CreateFAdd(this->CreateFMul(float_v, float_2), m_float1);
    float_v = this->CreateFSub(this->CreateFDiv(float_v, height), m_float1);

    llvm::Value *minus_floatu = this->CreateFMul(float_u, float_minus1); //-u
    llvm::Value *minus_floatv = this->CreateFMul(float_v, float_minus1); //-v
    llvm::Value *float_arrayIndex = this->CreateUIToFP(int32_cube_array_index, this->getFloatTy());
    //This array represents how the u and v value needs to be picked, for a face
    unsigned num_cube_faces = 6;
    unsigned num_dimensions = 3;

    //The mapping of face-id to texture surface is as follows
    //+x->face 0, -x->face 1, +y -> face 2, -y -> face 3, +z -> face 4, -z -> face 5
    //Now for each face we need to transform the normalized coordinates as follows
    //face 0(+X) = (-v, -u), face 1(-X) = (-v, u), face 2(+Y) = (u, v)
    //face 3(-Y) = (u, -v) , face 4(+Z) = (u, -v), face 5(+Z) = (-u, -v)
    //Refer to https://en.wikipedia.org/wiki/Cube_mapping for details
    llvm::Value *cubeCoordMap[6][3] = {
        { m_float1     ,    minus_floatv,   minus_floatu    }, //+x = face0
        { float_minus1 ,    minus_floatv,   float_u         }, //-x = face1
        { float_u ,         m_float1    ,   float_v         }, //+y = face2
        { float_u ,         float_minus1,   minus_floatv    }, //-y = face3
        { float_u ,         minus_floatv,   m_float1        }, //+z = face4
        { minus_floatu ,    minus_floatv,   float_minus1    }  //-z = face5
    };
    //Now populate the 6x3 array with values of cubeCoordMap
    llvm::Value *indexList[2];
    llvm::Value *row, *elt;
    indexList[0] = m_int0;
    for (unsigned faceid = 0; faceid < num_cube_faces; faceid++) {
        indexList[1] = this->getInt32(faceid);
        row = this->CreateGEP(float_array_6_3, llvm::ArrayRef<llvm::Value*>(indexList, 2));
        for (unsigned j = 0; j < num_dimensions; j++) {
            indexList[1] = this->getInt32(j);
            elt = this->CreateGEP(row, llvm::ArrayRef<llvm::Value*>(indexList, 2));
            this->CreateStore(cubeCoordMap[faceid][j], elt);
        }
    }

    //Now pick the one the row indexed by int32_faceid
    llvm::Value *finalCoords[3];
    indexList[1] = int32_faceid;
    row = this->CreateGEP(float_array_6_3, llvm::ArrayRef<llvm::Value*>(indexList, 2));
    for (unsigned i = 0; i < 3; i++) {
        indexList[1] = this->getInt32(i);
        elt = this->CreateGEP(row, llvm::ArrayRef<llvm::Value*>(indexList, 2));
        finalCoords[i] = this->CreateLoad(elt);
    }

    SampleParamsFromCube CubeRetParams;
    CubeRetParams.float_xcube = finalCoords[0];
    CubeRetParams.float_ycube = finalCoords[1];
    CubeRetParams.float_address_3 = finalCoords[2];
    CubeRetParams.float_aicube = float_arrayIndex;
    CubeRetParams.int32_textureIdx = int32_textureIdx;
    CubeRetParams.int32_sampler = int32_sampler;
    CubeRetParams.offsetU = int32_u;
    CubeRetParams.offsetV = int32_v;
    CubeRetParams.offsetR = m_int0; //Not used
    return CubeRetParams;
}

template<bool preserveNames, typename T, typename Inserter>
inline SampleParamsFromCube LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_Cube_Params(
    llvm::Value* float_address_0,
    llvm::Value* float_address_1,
    llvm::Value* float_address_2,
    llvm::Value* float_address_3,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler)
{
    IGC_ASSERT(nullptr != float_address_0);
    llvm::Type* const coordType = float_address_0->getType();
    IGC_ASSERT(nullptr != coordType);
    IGC_ASSERT(coordType->isFloatTy() || coordType->isHalfTy());

    llvm::Value* zero = llvm::ConstantFP::get(coordType, 0.0);

    //   %xneg_s = fsub float 0.000000e+00, %src_s.chan0
    llvm::Value* float_xneg_s_1389 = this->CreateFSub(zero, float_address_0, VALUE_NAME("xneg_s"));

    //   %cmpx_s = fcmp oge float %src_s.chan0, 0.000000e+00
    llvm::Value* int1_cmpx_s_1390 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_address_0, zero, VALUE_NAME("cmpx_s"));

    //   %xabs_s = select i1 %cmpx_s, float %src_s.chan0, float %xneg_s
    llvm::Value* float_xabs_s_1391 = this->CreateSelect(int1_cmpx_s_1390, float_address_0, float_xneg_s_1389, VALUE_NAME("xabs_s"));

    //   %yneg_s = fsub float 0.000000e+00, %src_s.chan1
    llvm::Value* float_yneg_s_1392 = this->CreateFSub(zero, float_address_1, VALUE_NAME("yneg_s"));

    //   %cmpy_s = fcmp oge float %src_s.chan1, 0.000000e+00
    llvm::Value* int1_cmpy_s_1393 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_address_1, zero, VALUE_NAME("cmpy_s"));

    //   %yabs_s = select i1 %cmpy_s, float %src_s.chan1, float %yneg_s
    llvm::Value* float_yabs_s_1394 = this->CreateSelect(int1_cmpy_s_1393, float_address_1, float_yneg_s_1392, VALUE_NAME("yabs_s"));

    //   %aineg_s = fsub float 0.000000e+00, %src_s.chan2
    llvm::Value* float_aineg_s_1395 = this->CreateFSub(zero, float_address_2, VALUE_NAME("aineg_s"));

    //   %cmpai_s = fcmp oge float %src_s.chan2, 0.000000e+00
    llvm::Value* int1_cmpai_s_1396 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_address_2, zero, VALUE_NAME("cmpai_s"));

    //   %aiabs_s = select i1 %cmpai_s, float %src_s.chan2, float %aineg_s
    llvm::Value* float_aiabs_s_1397 = this->CreateSelect(int1_cmpai_s_1396, float_address_2, float_aineg_s_1395, VALUE_NAME("aiabs_s"));

    //   %oge0_s = fcmp oge float %xabs_s, %yabs_s
    llvm::Value* int1_oge0_s_1398 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_xabs_s_1391, float_yabs_s_1394, VALUE_NAME("oge0_s"));

    //   %max1_s = select i1 %oge0_s, float %xabs_s, float %yabs_s
    llvm::Value* float_max1_s_1399 = this->CreateSelect(int1_oge0_s_1398, float_xabs_s_1391, float_yabs_s_1394, VALUE_NAME("max1_s"));

    //   %oge1_s = fcmp oge float %max1_s, %aiabs_s
    llvm::Value* int1_oge1_s_1400 = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_max1_s_1399, float_aiabs_s_1397, VALUE_NAME("oge1_s"));

    //   %max2_s = select i1 %oge1_s, float %max1_s, float %aiabs_s
    llvm::Value* float_max2_s_1401 = this->CreateSelect(int1_oge1_s_1400, float_max1_s_1399, float_aiabs_s_1397, VALUE_NAME("max2_s"));

    //   %xcube_s = fdiv float %src_s.chan0, %max2_s
    llvm::Value* float_xcube_s_1402 = this->CreateFDiv(float_address_0, float_max2_s_1401, VALUE_NAME("xcube_s"));

    //   %ycube_s = fdiv float %src_s.chan1, %max2_s
    llvm::Value* float_ycube_s_1403 = this->CreateFDiv(float_address_1, float_max2_s_1401, VALUE_NAME("ycube_s"));

    //   %aicube_s = fdiv float %src_s.chan2, %max2_s
    llvm::Value* float_aicube_s_1404 = this->CreateFDiv(float_address_2, float_max2_s_1401, VALUE_NAME("aicube_s"));

    SampleParamsFromCube CubeRetParams;

    CubeRetParams.float_xcube = float_xcube_s_1402;
    CubeRetParams.float_ycube = float_ycube_s_1403;
    CubeRetParams.float_aicube = float_aicube_s_1404;
    CubeRetParams.float_address_3 = float_address_3;
    CubeRetParams.int32_textureIdx = int32_textureIdx;
    CubeRetParams.int32_sampler = int32_sampler;
    CubeRetParams.offsetU = m_int0;
    CubeRetParams.offsetV = m_int0;
    CubeRetParams.offsetR = m_int0;

    return CubeRetParams;

}

template<bool preserveNames, typename T, typename Inserter>
inline SampleD_DC_FromCubeParams LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_D_DC_Cube_Params(
    SampleD_DC_FromCubeParams& params)
{
    return Prepare_SAMPLE_D_DC_Cube_Params(
        params.float_src_u,
        params.float_src_v,
        params.float_src_r,
        params.float_src_ai,
        params.dxu,
        params.dxv,
        params.dxr,
        params.dyu,
        params.dyv,
        params.dyr,
        params.int32_textureIdx,
        params.int32_sampler,
        params.int32_offsetU,
        params.int32_offsetV,
        params.int32_offsetW
    );
}

template<bool preserveNames, typename T, typename Inserter>
inline SampleD_DC_FromCubeParams LLVM3DBuilder<preserveNames, T, Inserter>::Prepare_SAMPLE_D_DC_Cube_Params(
    llvm::Value* float_src_r,
    llvm::Value* float_src_s,
    llvm::Value* float_src_t,
    llvm::Value* float_src_ai,
    llvm::Value* float_drdx,
    llvm::Value* float_dsdx,
    llvm::Value* float_dtdx,
    llvm::Value* float_drdy,
    llvm::Value* float_dsdy,
    llvm::Value* float_dtdy,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetW)
{
    //  For cube texture sampling, sampling instruction must receive proper cube face ID
    //  together with coordinates projected onto that face. Gradients also have to be transformed
    //  into the same (cube face) address space.
    //  To achieve this we first have to find a major coordinate, then normalize coordinates
    //  and select remaining ones as u/v coordinates for the face. Because of the cube texture layout
    //  in memory (as 6 2D faces) this sometimes involves changing the coordinate direction (sign).
    //  Gradients are transformed using quotient rule for derivatives:
    //        (fA/fB)' = (fA'*fB - fB'*fA)/fB^2
    //  where fA and fB are base functions, i.e. base cube coordinates in this case.
    //  Note that we first normalize coordinates and all derivatives, so calculations
    //  here use the form:
    //        (fA/fB)' = [fA'/fB] - [fB'/fB]*[fA/fB]

    IGC_ASSERT(nullptr != this->GetInsertBlock());
    llvm::Function* const parentFunc = this->GetInsertBlock()->getParent();
    IGC_ASSERT(nullptr != float_src_r);
    llvm::Type* const coordType = float_src_r->getType();
    IGC_ASSERT(nullptr != coordType);
    IGC_ASSERT(coordType->isFloatTy() || coordType->isHalfTy());

    llvm::Value* zero = llvm::ConstantFP::get(coordType, 0.0);

    // Create coordinate absolute values to look for major.
    llvm::Value* float_abs_r = this->CreateFAbs(float_src_r);
    llvm::Value* float_abs_s = this->CreateFAbs(float_src_s);
    llvm::Value* float_abs_t = this->CreateFAbs(float_src_t);

    {
        llvm::BasicBlock* currentBlock = this->GetInsertBlock();
        bool shouldSplitBB = this->GetInsertPoint() != currentBlock->end();

        // Create basic blocks.
        llvm::BasicBlock* block_final = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubefinal_block"));

        llvm::BasicBlock* block_major_t = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubemajor_t_block"));
        llvm::BasicBlock* block_not_t = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubenott_block"));
        llvm::BasicBlock* block_zp = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_zp_block"));
        llvm::BasicBlock* block_zm = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_zm_block"));

        llvm::BasicBlock* block_major_s = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubemajor_s_block"));
        llvm::BasicBlock* block_yp = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_yp_block"));
        llvm::BasicBlock* block_ym = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_ym_block"));

        llvm::BasicBlock* block_major_r = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cubemajor_r_block"));
        llvm::BasicBlock* block_xp = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_xp_block"));
        llvm::BasicBlock* block_xm = llvm::BasicBlock::Create(this->getContext(), VALUE_NAME("cube_face_xm_block"));

        // Find the major coordinate (and thus cube face), precedence is Z,Y,X.
        llvm::Value* int1_cmp_tges = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_abs_t, float_abs_s, VALUE_NAME("cmp_tges"));

        llvm::Value* int1_cmp_tger = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_abs_t, float_abs_r, VALUE_NAME("cmp_tger"));

        llvm::Value* int1_tgesr = this->CreateAnd(int1_cmp_tger, int1_cmp_tges);

        // Major coordinate is T, faces could be +Z or -Z
        llvm::BasicBlock* splitBlock = nullptr;
        if (shouldSplitBB)
        {
            IGC_ASSERT(nullptr != currentBlock);
            IGC_ASSERT(currentBlock->getTerminator());
            splitBlock = currentBlock->splitBasicBlock(this->GetInsertPoint()->getNextNode());
            currentBlock->getTerminator()->eraseFromParent();
            this->SetInsertPoint(currentBlock);
        }
        this->CreateCondBr(int1_tgesr, block_major_t, block_not_t);
        this->SetInsertPoint(block_major_t);
        parentFunc->getBasicBlockList().push_back(block_major_t);

        // Normalize coordinates and gradients.
        llvm::Value* float_tnorm_r = this->CreateFDiv(float_src_r, float_abs_t, VALUE_NAME("tnorm_r"));
        llvm::Value* float_tnorm_s = this->CreateFDiv(float_src_s, float_abs_t, VALUE_NAME("tnorm_s"));
        llvm::Value* float_tnorm_drdx = this->CreateFDiv(float_drdx, float_abs_t, VALUE_NAME("tnorm_drdx"));
        llvm::Value* float_tnorm_drdy = this->CreateFDiv(float_drdy, float_abs_t, VALUE_NAME("tnorm_drdy"));
        llvm::Value* float_tnorm_dsdx = this->CreateFDiv(float_dsdx, float_abs_t, VALUE_NAME("tnorm_dsdx"));
        llvm::Value* float_tnorm_dsdy = this->CreateFDiv(float_dsdy, float_abs_t, VALUE_NAME("tnorm_dsdy"));
        llvm::Value* float_tnorm_dtdx = this->CreateFDiv(float_dtdx, float_abs_t, VALUE_NAME("tnorm_dtdx"));
        llvm::Value* float_tnorm_dtdy = this->CreateFDiv(float_dtdy, float_abs_t, VALUE_NAME("tnorm_dtdy"));

        // Select positive or negative face.
        llvm::Value* int1_cmpx_t = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_src_t, zero, VALUE_NAME("cmpx_t"));
        this->CreateCondBr(int1_cmpx_t, block_zp, block_zm);
        this->SetInsertPoint(block_zp);
        parentFunc->getBasicBlockList().push_back(block_zp);

        // Face +Z,
        // major = neg T
        // u     = R
        // v     = neg S

        llvm::Value* float_face_zp_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 4.0));

        // Select u from s/r/t
        llvm::Value* float_face_zp_u = float_tnorm_r;

        // Select v from s/r/t
        llvm::Value* float_face_zp_v = this->CreateFNeg(float_tnorm_s, VALUE_NAME("face_zp_v"));

        // du/dx = dm * u + d{s/r/t}/dx
        llvm::Value* float_neg_dmx4 = this->CreateFNeg(float_tnorm_dtdx, VALUE_NAME("neg_dmx"));
        llvm::Value* float_dmxu4 = this->CreateFMul(float_neg_dmx4, float_tnorm_r, VALUE_NAME("dmxu"));
        llvm::Value* float_face_zp_dudx = this->CreateFAdd(float_dmxu4, float_tnorm_drdx, VALUE_NAME("face_zp_dudx"));

        // du/dy = dm * u + d{s/r/t}/dy
        llvm::Value* float_neg_dmy4 = this->CreateFNeg(float_tnorm_dtdy, VALUE_NAME("neg_dmy"));
        llvm::Value* float_dmyu4 = this->CreateFMul(float_neg_dmy4, float_tnorm_r, VALUE_NAME("dmyu"));
        llvm::Value* float_face_zp_dudy = this->CreateFAdd(float_dmyu4, float_tnorm_drdy, VALUE_NAME("face_zp_dvdx"));

        // dv/dx = dm * v + d{s/r/t}/dx
        llvm::Value* float_dmxv4 = this->CreateFMul(float_tnorm_dtdx, float_tnorm_s, VALUE_NAME("dmxv"));
        llvm::Value* float_face_zp_dvdx = this->CreateFSub(float_dmxv4, float_tnorm_dsdx, VALUE_NAME("face_zp_dvdx"));

        // dv/dy = dm * v + d{s/r/t}/dy
        llvm::Value* float_dmyv4 = this->CreateFMul(float_tnorm_dtdy, float_tnorm_s, VALUE_NAME("dmyv"));
        llvm::Value* float_face_zp_dvdy = this->CreateFSub(float_dmyv4, float_tnorm_dsdy, VALUE_NAME("face_zp_dvdy"));

        this->CreateBr(block_final);
        this->SetInsertPoint(block_zm);
        parentFunc->getBasicBlockList().push_back(block_zm);

        // Face -Z,
        // major = T
        // u     = neg R
        // v     = neg S

        llvm::Value* float_face_zm_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 5.0));

        // Select u from s/r/t
        llvm::Value* float_face_zm_u = this->CreateFNeg(float_tnorm_r, VALUE_NAME("face_zm_u"));

        // Select v from s/r/t
        llvm::Value* float_face_zm_v = this->CreateFNeg(float_tnorm_s, VALUE_NAME("face_zm_v"));

        // du/dx = dm * u + d{s/r/t}/dx
        llvm::Value* float_dmxu5 = this->CreateFMul(float_tnorm_dtdx, float_face_zm_u, VALUE_NAME("dmxu"));
        llvm::Value* float_face_zm_dudx = this->CreateFSub(float_dmxu5, float_tnorm_drdx, VALUE_NAME("face_zm_dudx"));

        // du/dy = dm * u + d{s/r/t}/dy
        llvm::Value* float_dmyu5 = this->CreateFMul(float_tnorm_dtdy, float_face_zm_u, VALUE_NAME("dmyu"));
        llvm::Value* float_face_zm_dudy = this->CreateFSub(float_dmyu5, float_tnorm_drdy, VALUE_NAME("face_zm_dvdx"));

        // dv/dx = dm * v + d{s/r/t}/dx
        llvm::Value* float_dmxv5 = this->CreateFMul(float_tnorm_dtdx, float_face_zm_v, VALUE_NAME("dmxv"));
        llvm::Value* float_face_zm_dvdx = this->CreateFSub(float_dmxv5, float_tnorm_dsdx, VALUE_NAME("face_zm_dvdx"));

        // dv/dy = dm * v + d{s/r/t}/dy
        llvm::Value* float_dmyv5 = this->CreateFMul(float_tnorm_dtdy, float_face_zm_v, VALUE_NAME("dmyv"));
        llvm::Value* float_face_zm_dvdy = this->CreateFSub(float_dmyv5, float_tnorm_dsdy, VALUE_NAME("face_zm_dvdy"));

        this->CreateBr(block_final);
        this->SetInsertPoint(block_not_t);
        parentFunc->getBasicBlockList().push_back(block_not_t);

        // Choose major S or R.
        llvm::Value* int1_cmp_sger = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_abs_s, float_abs_r, VALUE_NAME("cmp_sger"));

        // Major coordinate is S, faces could be +Y or -Y
        this->CreateCondBr(int1_cmp_sger, block_major_s, block_major_r);
        this->SetInsertPoint(block_major_s);
        parentFunc->getBasicBlockList().push_back(block_major_s);

        // Normalize coordinates and gradients.
        llvm::Value* float_snorm_r = this->CreateFDiv(float_src_r, float_abs_s, VALUE_NAME("snorm_r"));
        llvm::Value* float_snorm_t = this->CreateFDiv(float_src_t, float_abs_s, VALUE_NAME("snorm_t"));
        llvm::Value* float_snorm_drdx = this->CreateFDiv(float_drdx, float_abs_s, VALUE_NAME("snorm_drdx"));
        llvm::Value* float_snorm_drdy = this->CreateFDiv(float_drdy, float_abs_s, VALUE_NAME("snorm_drdy"));
        llvm::Value* float_snorm_dsdx = this->CreateFDiv(float_dsdx, float_abs_s, VALUE_NAME("snorm_dsdx"));
        llvm::Value* float_snorm_dsdy = this->CreateFDiv(float_dsdy, float_abs_s, VALUE_NAME("snorm_dsdy"));
        llvm::Value* float_snorm_dtdx = this->CreateFDiv(float_dtdx, float_abs_s, VALUE_NAME("snorm_dtdx"));
        llvm::Value* float_snorm_dtdy = this->CreateFDiv(float_dtdy, float_abs_s, VALUE_NAME("snorm_dtdy"));

        // Select positive or negative face.
        llvm::Value* int1_cmpx_s = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_src_s, zero, VALUE_NAME("cmpx_s"));
        this->CreateCondBr(int1_cmpx_s, block_yp, block_ym);
        this->SetInsertPoint(block_yp);
        parentFunc->getBasicBlockList().push_back(block_yp);

        // Face +Y,
        // major = neg S
        // u     = R
        // v     = T

        llvm::Value* float_face_yp_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 2.0));

        // Select u from s/r/t
        llvm::Value* float_face_yp_u = float_snorm_r;

        // Select v from s/r/t
        llvm::Value* float_face_yp_v = float_snorm_t;

        // du/dx = dm * u + d{s/r/t}/dx
        llvm::Value* float_neg_dmx2 = this->CreateFNeg(float_snorm_dsdx, VALUE_NAME("neg_dmx"));
        llvm::Value* float_dmxu2 = this->CreateFMul(float_neg_dmx2, float_snorm_r, VALUE_NAME("dmxu"));
        llvm::Value* float_face_yp_dudx = this->CreateFAdd(float_dmxu2, float_snorm_drdx, VALUE_NAME("face_yp_dudx"));

        // du/dy = dm * u + d{s/r/t}/dy
        llvm::Value* float_neg_dmy2 = this->CreateFNeg(float_snorm_dsdy, VALUE_NAME("neg_dmy"));
        llvm::Value* float_dmyu2 = this->CreateFMul(float_neg_dmy2, float_snorm_r, VALUE_NAME("dmyu"));
        llvm::Value* float_face_yp_dudy = this->CreateFAdd(float_dmyu2, float_snorm_drdy, VALUE_NAME("face_yp_dvdx"));

        // dv/dx = dm * v + d{s/r/t}/dx
        llvm::Value* float_dmxv2 = this->CreateFMul(float_neg_dmx2, float_snorm_t, VALUE_NAME("dmxv"));
        llvm::Value* float_face_yp_dvdx = this->CreateFAdd(float_dmxv2, float_snorm_dtdx, VALUE_NAME("face_yp_dvdx"));

        // dv/dy = dm * v + d{s/r/t}/dy
        llvm::Value* float_dmyv2 = this->CreateFMul(float_neg_dmy2, float_snorm_t, VALUE_NAME("dmyv"));
        llvm::Value* float_face_yp_dvdy = this->CreateFAdd(float_dmyv2, float_snorm_dtdy, VALUE_NAME("face_yp_dvdy"));

        this->CreateBr(block_final);
        this->SetInsertPoint(block_ym);
        parentFunc->getBasicBlockList().push_back(block_ym);

        // Face -Y,
        // major = S
        // u     = R
        // v     = neg T

        llvm::Value* float_face_ym_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 3.0));

        // Select u from s/r/t
        llvm::Value* float_face_ym_u = float_snorm_r;

        // Select v from s/r/t
        llvm::Value* float_face_ym_v = this->CreateFNeg(float_snorm_t, VALUE_NAME("face_ym_v"));

        // du/dx = dm * u + d{s/r/t}/dx
        llvm::Value* float_dmxu3 = this->CreateFMul(float_snorm_dsdx, float_snorm_r, VALUE_NAME("dmxu"));
        llvm::Value* float_face_ym_dudx = this->CreateFAdd(float_dmxu3, float_snorm_drdx, VALUE_NAME("face_ym_dudx"));

        // du/dy = dm * u + d{s/r/t}/dy
        llvm::Value* float_dmyu3 = this->CreateFMul(float_snorm_dsdy, float_snorm_r, VALUE_NAME("dmyu"));
        llvm::Value* float_face_ym_dudy = this->CreateFAdd(float_dmyu3, float_snorm_drdy, VALUE_NAME("face_ym_dvdx"));

        // dv/dx = dm * v + d{s/r/t}/dx
        llvm::Value* float_dmxv3 = this->CreateFMul(float_snorm_dsdx, float_face_ym_v, VALUE_NAME("dmxv"));
        llvm::Value* float_face_ym_dvdx = this->CreateFSub(float_dmxv3, float_snorm_dtdx, VALUE_NAME("face_ym_dvdx"));

        // dv/dy = dm * v + d{s/r/t}/dy
        llvm::Value* float_dmyv3 = this->CreateFMul(float_snorm_dsdx, float_face_ym_v, VALUE_NAME("dmyv"));
        llvm::Value* float_face_ym_dvdy = this->CreateFSub(float_dmyv3, float_snorm_dtdy, VALUE_NAME("face_ym_dvdy"));

        this->CreateBr(block_final);
        this->SetInsertPoint(block_major_r);
        parentFunc->getBasicBlockList().push_back(block_major_r);

        // Major coordinate is R, faces could be +X or -X

        // Normalize coordinates and gradients.
        llvm::Value* float_rnorm_s = this->CreateFDiv(float_src_s, float_abs_r, VALUE_NAME("rnorm_r"));
        llvm::Value* float_rnorm_t = this->CreateFDiv(float_src_t, float_abs_r, VALUE_NAME("rnorm_t"));
        llvm::Value* float_rnorm_drdx = this->CreateFDiv(float_drdx, float_abs_r, VALUE_NAME("rnorm_drdx"));
        llvm::Value* float_rnorm_drdy = this->CreateFDiv(float_drdy, float_abs_r, VALUE_NAME("rnorm_drdy"));
        llvm::Value* float_rnorm_dsdx = this->CreateFDiv(float_dsdx, float_abs_r, VALUE_NAME("rnorm_dsdx"));
        llvm::Value* float_rnorm_dsdy = this->CreateFDiv(float_dsdy, float_abs_r, VALUE_NAME("rnorm_dsdy"));
        llvm::Value* float_rnorm_dtdx = this->CreateFDiv(float_dtdx, float_abs_r, VALUE_NAME("rnorm_dtdx"));
        llvm::Value* float_rnorm_dtdy = this->CreateFDiv(float_dtdy, float_abs_r, VALUE_NAME("rnorm_dtdy"));

        // Select positive or negative face.
        llvm::Value* int1_cmpx_r = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, float_src_r, zero, VALUE_NAME("cmpx_r"));
        this->CreateCondBr(int1_cmpx_r, block_xp, block_xm);
        this->SetInsertPoint(block_xp);
        parentFunc->getBasicBlockList().push_back(block_xp);

        // Face +X,
        // major = neg R
        // u     = neg T
        // v     = neg S

        llvm::Value* float_face_xp_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 0.0));

        // Select u from s/r/t
        llvm::Value* float_face_xp_u = this->CreateFNeg(float_rnorm_t, VALUE_NAME("face_xp_u"));

        // Select v from s/r/t
        llvm::Value* float_face_xp_v = this->CreateFNeg(float_rnorm_s, VALUE_NAME("face_xp_v"));

        // du/dx = dm * u + d{s/r/t}/dx
        llvm::Value* float_dmxu0 = this->CreateFMul(float_rnorm_drdx, float_rnorm_t, VALUE_NAME("dmxu"));
        llvm::Value* float_face_xp_dudx = this->CreateFSub(float_dmxu0, float_rnorm_dtdx, VALUE_NAME("face_xp_dudx"));

        // du/dy = dm * u + d{s/r/t}/dy
        llvm::Value* float_dmyu0 = this->CreateFMul(float_rnorm_drdy, float_rnorm_t, VALUE_NAME("dmyu"));
        llvm::Value* float_face_xp_dudy = this->CreateFSub(float_dmyu0, float_rnorm_dtdy, VALUE_NAME("face_xp_dvdx"));

        // dv/dx = dm * v + d{s/r/t}/dx
        llvm::Value* float_dmxv0 = this->CreateFMul(float_rnorm_drdx, float_rnorm_s, VALUE_NAME("dmxv"));
        llvm::Value* float_face_xp_dvdx = this->CreateFSub(float_dmxv0, float_rnorm_dsdx, VALUE_NAME("face_xp_dvdx"));

        // dv/dy = dm * v + d{s/r/t}/dy
        llvm::Value* float_dmyv0 = this->CreateFMul(float_rnorm_drdy, float_rnorm_s, VALUE_NAME("dmyv"));
        llvm::Value* float_face_xp_dvdy = this->CreateFSub(float_dmyv0, float_rnorm_dsdy, VALUE_NAME("face_xp_dvdy"));

        this->CreateBr(block_final);
        this->SetInsertPoint(block_xm);
        parentFunc->getBasicBlockList().push_back(block_xm);

        // Face -X,
        // major = R
        // u     = T
        // v     = neg S

        llvm::Value* float_face_xm_id = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(coordType, 1.0));

        // Select u from s/r/t
        llvm::Value* float_face_xm_u = float_rnorm_t;

        // Select v from s/r/t
        llvm::Value* float_face_xm_v = this->CreateFNeg(float_rnorm_s, VALUE_NAME("face_xm_v"));

        // du/dx = dm * u + d{s/r/t}/dx
        llvm::Value* float_dmxu1 = this->CreateFMul(float_rnorm_drdx, float_rnorm_t, VALUE_NAME("dmxu"));
        llvm::Value* float_face_xm_dudx = this->CreateFAdd(float_dmxu1, float_rnorm_dtdx, VALUE_NAME("face_xm_dudx"));

        // du/dy = dm * u + d{s/r/t}/dy
        llvm::Value* float_dmyu1 = this->CreateFMul(float_rnorm_drdy, float_rnorm_t, VALUE_NAME("dmyu"));
        llvm::Value* float_face_xm_dudy = this->CreateFAdd(float_dmyu1, float_rnorm_dtdx, VALUE_NAME("face_xm_dvdx"));

        // dv/dx = dm * v + d{s/r/t}/dx
        llvm::Value* float_dmxv1 = this->CreateFMul(float_rnorm_drdx, float_face_xm_v, VALUE_NAME("dmxv"));
        llvm::Value* float_face_xm_dvdx = this->CreateFSub(float_dmxv1, float_rnorm_dsdx, VALUE_NAME("face_xm_dvdx"));

        // dv/dy = dm * v + d{s/r/t}/dy
        llvm::Value* float_dmyv1 = this->CreateFMul(float_rnorm_drdy, float_face_xm_v, VALUE_NAME("dmyv"));
        llvm::Value* float_face_xm_dvdy = this->CreateFSub(float_dmyv1, float_rnorm_dsdy, VALUE_NAME("face_xm_dvdy"));

        this->CreateBr(block_final);
        this->SetInsertPoint(block_final);
        parentFunc->getBasicBlockList().push_back(block_final);

        llvm::PHINode* phi_u = this->CreatePHI(coordType, 6, VALUE_NAME("phi_u"));
        phi_u->addIncoming(float_face_xp_u, block_xp);
        phi_u->addIncoming(float_face_xm_u, block_xm);
        phi_u->addIncoming(float_face_yp_u, block_yp);
        phi_u->addIncoming(float_face_ym_u, block_ym);
        phi_u->addIncoming(float_face_zp_u, block_zp);
        phi_u->addIncoming(float_face_zm_u, block_zm);

        llvm::PHINode* phi_v = this->CreatePHI(coordType, 6, VALUE_NAME("phi_v"));
        phi_v->addIncoming(float_face_xp_v, block_xp);
        phi_v->addIncoming(float_face_xm_v, block_xm);
        phi_v->addIncoming(float_face_yp_v, block_yp);
        phi_v->addIncoming(float_face_ym_v, block_ym);
        phi_v->addIncoming(float_face_zp_v, block_zp);
        phi_v->addIncoming(float_face_zm_v, block_zm);

        llvm::PHINode* phi_dudx = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dudx"));
        phi_dudx->addIncoming(float_face_xp_dudx, block_xp);
        phi_dudx->addIncoming(float_face_xm_dudx, block_xm);
        phi_dudx->addIncoming(float_face_yp_dudx, block_yp);
        phi_dudx->addIncoming(float_face_ym_dudx, block_ym);
        phi_dudx->addIncoming(float_face_zp_dudx, block_zp);
        phi_dudx->addIncoming(float_face_zm_dudx, block_zm);

        llvm::PHINode* phi_dudy = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dudy"));
        phi_dudy->addIncoming(float_face_xp_dudy, block_xp);
        phi_dudy->addIncoming(float_face_xm_dudy, block_xm);
        phi_dudy->addIncoming(float_face_yp_dudy, block_yp);
        phi_dudy->addIncoming(float_face_ym_dudy, block_ym);
        phi_dudy->addIncoming(float_face_zp_dudy, block_zp);
        phi_dudy->addIncoming(float_face_zm_dudy, block_zm);

        llvm::PHINode* phi_dvdx = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dvdx"));
        phi_dvdx->addIncoming(float_face_xp_dvdx, block_xp);
        phi_dvdx->addIncoming(float_face_xm_dvdx, block_xm);
        phi_dvdx->addIncoming(float_face_yp_dvdx, block_yp);
        phi_dvdx->addIncoming(float_face_ym_dvdx, block_ym);
        phi_dvdx->addIncoming(float_face_zp_dvdx, block_zp);
        phi_dvdx->addIncoming(float_face_zm_dvdx, block_zm);

        llvm::PHINode* phi_dvdy = this->CreatePHI(coordType, 6, VALUE_NAME("phi_dvdy"));
        phi_dvdy->addIncoming(float_face_xp_dvdy, block_xp);
        phi_dvdy->addIncoming(float_face_xm_dvdy, block_xm);
        phi_dvdy->addIncoming(float_face_yp_dvdy, block_yp);
        phi_dvdy->addIncoming(float_face_ym_dvdy, block_ym);
        phi_dvdy->addIncoming(float_face_zp_dvdy, block_zp);
        phi_dvdy->addIncoming(float_face_zm_dvdy, block_zm);

        llvm::PHINode* phi_face_id = this->CreatePHI(coordType, 6, VALUE_NAME("phi_face_id"));
        phi_face_id->addIncoming(float_face_xp_id, block_xp);
        phi_face_id->addIncoming(float_face_xm_id, block_xm);
        phi_face_id->addIncoming(float_face_yp_id, block_yp);
        phi_face_id->addIncoming(float_face_ym_id, block_ym);
        phi_face_id->addIncoming(float_face_zp_id, block_zp);
        phi_face_id->addIncoming(float_face_zm_id, block_zm);

        if (shouldSplitBB)
        {
            llvm::BranchInst* brInst = this->CreateBr(splitBlock);
            this->SetInsertPoint(brInst);
        }

        SampleD_DC_FromCubeParams D_DC_CUBE_params;

        D_DC_CUBE_params.float_src_u = phi_u;
        D_DC_CUBE_params.dxu = phi_dudx;
        D_DC_CUBE_params.dyu = phi_dudy;
        D_DC_CUBE_params.float_src_v = phi_v;
        D_DC_CUBE_params.dxv = phi_dvdx;
        D_DC_CUBE_params.dyv = phi_dvdy;
        D_DC_CUBE_params.float_src_r = phi_face_id;
        D_DC_CUBE_params.dxr = zero;
        D_DC_CUBE_params.dyr = zero;
        D_DC_CUBE_params.float_src_ai = float_src_ai;
        D_DC_CUBE_params.int32_textureIdx = int32_textureIdx;
        D_DC_CUBE_params.int32_sampler = int32_sampler;
        D_DC_CUBE_params.int32_offsetU = m_int0;
        D_DC_CUBE_params.int32_offsetV = m_int0;
        D_DC_CUBE_params.int32_offsetW = m_int0;

        return D_DC_CUBE_params;
    }
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFAbs(llvm::Value* V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* fabs = llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::fabs, V->getType());
    return this->CreateCall(fabs, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFSat(llvm::Value* V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* fsat =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_fsat, V->getType());
    return this->CreateCall(fsat, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateF16TOF32(
    llvm::Value* f16_src)
{
    llvm::Value* f32_dst = this->CreateFPExt(f16_src, this->getFloatTy(), VALUE_NAME("src0_s"));
    return f32_dst;
}

/*****************************************************************************\
Description:
Returns true if additional conversion is required if given format is
128bit.

Input:
SURFACE_FORMAT format           - conversion format

Output:
bool - return value.

\*****************************************************************************/
template<bool preserveNames, typename T, typename Inserter>
bool LLVM3DBuilder<preserveNames, T, Inserter>::NeedConversionFor128FormatRead(
    IGC::SURFACE_FORMAT format) const
{
    bool needsConversion = true;

    if ((format == IGC::SURFACE_FORMAT::SURFACE_FORMAT_R32G32B32A32_FLOAT) ||
        (format == IGC::SURFACE_FORMAT::SURFACE_FORMAT_R32G32B32A32_UINT) ||
        (format == IGC::SURFACE_FORMAT::SURFACE_FORMAT_R32G32B32A32_SINT))
    {
        needsConversion = false;
    }

    return needsConversion;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_UBFE(
    llvm::Value* int32_width,
    llvm::Value* int32_offset,
    llvm::Value* int32_source)
{
    //   %res = call i32 @llvm.GenISA.ubfe(i32 %src0_s, i32 %src1_s, i32 %src2_s)
    llvm::Value * packed_params[] = {
        int32_width,
        int32_offset,
        int32_source
    };
    llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_ubfe(), packed_params));
    return int32_res;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_IBFE(
    llvm::Value* int32_width,
    llvm::Value* int32_offset,
    llvm::Value* int32_source)
{
    //   %res = call i32 @llvm.GenISA.ibfe(i32 %int32_width, i32 %int32_offset, i32 %int32_source)
    llvm::Value * packed_params[] = {
        int32_width,
        int32_offset,
        int32_source
    };
    llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_ibfe(), packed_params));
    return int32_res;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_BFI(
    llvm::Value* int32_width,
    llvm::Value* int32_offset,
    llvm::Value* int32_source,
    llvm::Value* int32_replace)
{
    llvm::Value * packed_params[] = {
        int32_width,
        int32_offset,
        int32_source,
        int32_replace
    };
    llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_bfi(), packed_params));
    return int32_res;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_BFREV(
    llvm::Value* int32_source)
{
    llvm::Value * packed_params[] = {
        int32_source
    };
    llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_bfrev(), packed_params));
    return int32_res;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FirstBitHi(
    llvm::Value* int32_source)
{
    llvm::Value * packed_params[] = {
        int32_source
    };
    llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_firstbitHi(), packed_params));
    return int32_res;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FirstBitLo(
    llvm::Value* int32_source)
{
    llvm::Value * packed_params[] = {
        int32_source
    };
    llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_firstbitLo(), packed_params));
    return int32_res;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_FirstBitShi(
    llvm::Value* int32_source)
{
    llvm::Value * packed_params[] = {
        int32_source
    };
    llvm::CallInst* int32_res = llvm::cast<llvm::CallInst>(this->CreateCall(llvm_GenISA_firstbitShi(), packed_params));
    return int32_res;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_indirectLoad(
    llvm::Value* srcBuffer,
    llvm::Value* offset,
    llvm::Value* alignment,
    llvm::Type* returnType,
    bool isVolatile /* false */)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Type* types[] = {
        returnType,
        srcBuffer->getType()
    };
    llvm::Function* pfuncLdPtr = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_ldrawvector_indexed,
        types);
    return this->CreateCall4(pfuncLdPtr, srcBuffer, offset, alignment, this->getInt1(isVolatile));
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_indirectStore(
    llvm::Value* srcBuffer,
    llvm::Value* offset,
    llvm::Value* data,
    bool isVolatile /* false */ )
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Type* types[] = {
        srcBuffer->getType(),
        data->getType(),
    };
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_storerawvector_indexed,
        types);
    llvm::Value* alignment = this->getInt32(data->getType()->getScalarSizeInBits() / 8);
    return this->CreateCall5(pFunc, srcBuffer, offset, data, alignment, this->getInt1(isVolatile));
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_atomicCounterIncrement(
    llvm::Value* srcBuffer)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_atomiccounterinc,
        srcBuffer->getType());
    return this->CreateCall(pFunc, srcBuffer);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_atomicCounterDecrement(
    llvm::Value* srcBuffer)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_atomiccounterpredec,
        srcBuffer->getType());
    return this->CreateCall(pFunc, srcBuffer);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::createThreadLocalId(unsigned int dim)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
        this->getInt32Ty());
    return this->CreateCall(pFunc, this->getInt32(IGC::THREAD_ID_IN_GROUP_X + dim));
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::createGroupId(unsigned int dim)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
        this->getFloatTy());
    return this->CreateBitCast(
        this->CreateCall(pFunc, this->getInt32(IGC::THREAD_GROUP_ID_X + dim)), this->getInt32Ty());
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFrc(llvm::Value* V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* frc =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_frc);
    return this->CreateCall(frc, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSin(llvm::Value* V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* sin =
        llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::sin, V->getType());
    return this->CreateCall(sin, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCos(llvm::Value* V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* cos =
        llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::cos, V->getType());
    return this->CreateCall(cos, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSqrt(llvm::Value* V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* sqrt =
        llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::sqrt, V->getType());
    return this->CreateCall(sqrt, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFPow(llvm::Value *LHS, llvm::Value *RHS)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* fpow =
        llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::pow, LHS->getType());
    return this->CreateCall2(fpow, LHS, RHS);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFMax(llvm::Value *LHS, llvm::Value *RHS)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* fmax =
        llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::maxnum, LHS->getType());
    return this->CreateCall2(fmax, LHS, RHS);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFMin(llvm::Value *LHS, llvm::Value *RHS)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* fmin =
        llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::minnum, LHS->getType());
    return this->CreateCall2(fmin, LHS, RHS);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateIMulH(llvm::Value *LHS, llvm::Value *RHS)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* imulh =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_imulH, LHS->getType());
    return this->CreateCall2(imulh, LHS, RHS);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateUMulH(llvm::Value *LHS, llvm::Value *RHS)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* umulh =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_umulH, LHS->getType());
    return this->CreateCall2(umulh, LHS, RHS);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDiscard(llvm::Value* V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* discard =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_discard);
    return this->CreateCall(discard, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFLog(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* flog =
        llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::log2, V->getType());
    return this->CreateCall(flog, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFExp(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* fexp =
        llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::exp2, V->getType());
    return this->CreateCall(fexp, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDFloor(llvm::Value* src)
{
    llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
    IGC_ASSERT(nullptr != mod);
    llvm::Function* func = mod->getFunction("__builtin_floor_f64");
    if (func != nullptr)
    {
        return this->CreateCall(func, src);
    }

    // from OCL builtin: double @__builtin_spirv_floor_f64(double %x)
    static const char* const code =
        "define double @__builtin_floor_f64(double %x)                              \n"
        "    alwaysinline nounwind readnone {                                       \n"
        "  %1 = bitcast double %x to i64                                            \n"
        "  %2 = lshr i64 %1, 32                                                     \n"
        "  %3 = trunc i64 %2 to i32                                                 \n"
        "  %4 = lshr i64 %1, 52                                                     \n"
        "  %5 = trunc i64 %4 to i32                                                 \n"
        "  %6 = and i32 %5, 2047                                                    \n"
        "  %7 = sub nsw i32 1023, %6                                                \n"
        "  %8 = add nsw i32 %7, 52                                                  \n"
        "  %9 = add nsw i32 %7, 20                                                  \n"
        "  %10 = icmp sgt i32 %8, 32                                                \n"
        "  %11 = select i1 %10, i32 32, i32 %8                                      \n"
        "  %12 = icmp sgt i32 %9, 20                                                \n"
        "  %13 = select i1 %12, i32 20, i32 %9                                      \n"
        "  %14 = icmp sgt i32 %11, 0                                                \n"
        "  %15 = select i1 %14, i32 %11, i32 0                                      \n"
        "  %16 = icmp sgt i32 %13, 0                                                \n"
        "  %17 = select i1 %16, i32 %13, i32 0                                      \n"
        "  %18 = and i32 %15, 31                                                    \n"
        "  %19 = shl i32 -1, %18                                                    \n"
        "  %20 = and i32 %17, 31                                                    \n"
        "  %21 = shl i32 -1, %20                                                    \n"
        "  %22 = icmp ne i32 %15, 32                                                \n"
        "  %23 = select i1 %22, i32 %19, i32 0                                      \n"
        "  %24 = icmp eq i32 %17, 32                                                \n"
        "  %25 = icmp ult i32 %6, 1023                                              \n"
        "  %or.cond.i = or i1 %25, %24                                              \n"
        "  %maskValHigh32bit.0.i = select i1 %or.cond.i, i32 -2147483648, i32 %21   \n"
        "  %maskValLow32bit.0.i = select i1 %or.cond.i, i32 0, i32 %23              \n"
        "  %26 = trunc i64 %1 to i32                                                \n"
        "  %27 = and i32 %maskValLow32bit.0.i, %26                                  \n"
        "  %28 = and i32 %maskValHigh32bit.0.i, %3                                  \n"
        "  %29 = zext i32 %28 to i64                                                \n"
        "  %30 = shl nuw i64 %29, 32                                                \n"
        "  %31 = zext i32 %27 to i64                                                \n"
        "  %32 = or i64 %30, %31                                                    \n"
        "  %33 = bitcast i64 %32 to double                                          \n"
        "  %34 = sub i64 %1, %32                                                    \n"
        "  %35 = lshr i64 %34, 32                                                   \n"
        "  %36 = or i64 %35, %34                                                    \n"
        "  %37 = trunc i64 %36 to i32                                               \n"
        "  %38 = icmp eq i32 %37, 0                                                 \n"
        "  %39 = ashr i64 %1, 31                                                    \n"
        "  %.op = and i64 %39, -4616189618054758400                                 \n"
        "  %40 = bitcast i64 %.op to double                                         \n"
        "  %41 = select i1 %38, double 0.000000e+00, double %40                     \n"
        "  %42 = fadd double %33, %41                                               \n"
        "  ret double %42                                                           \n"
        "}";

    llvm::MemoryBufferRef codeBuf(code, "<string>");
    llvm::SMDiagnostic diagnostic;
    const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
    (void) failed;
    IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");

    func = mod->getFunction("__builtin_floor_f64");
    return this->CreateCall(func, src);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateFloor(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    if( V->getType() == this->getDoubleTy() )
    {
        return CreateDFloor(V);
    }
    else
    {
        llvm::Function* floor =
            llvm::Intrinsic::getDeclaration( module, llvm::Intrinsic::floor, V->getType() );
        return this->CreateCall( floor, V );
    }
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDCeil(llvm::Value *src)
{
    llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
    IGC_ASSERT(nullptr != mod);
    llvm::Function* func = mod->getFunction("__builtin_ceil_f64");
    if (func != nullptr)
    {
        return this->CreateCall(func, src);
    }

    // from OCL builtin: double @__builtin_spirv_ceil_f64(double %x)
    static const char* const code =
        "define double @__builtin_ceil_f64(double %x)                               \n"
        "    alwaysinline nounwind readnone {                                       \n"
        "  %1 = bitcast double %x to i64                                            \n"
        "  %2 = lshr i64 %1, 32                                                     \n"
        "  %3 = trunc i64 %2 to i32                                                 \n"
        "  %4 = lshr i64 %1, 52                                                     \n"
        "  %5 = trunc i64 %4 to i32                                                 \n"
        "  %6 = and i32 %5, 2047                                                    \n"
        "  %7 = sub nsw i32 1023, %6                                                \n"
        "  %8 = add nsw i32 %7, 52                                                  \n"
        "  %9 = add nsw i32 %7, 20                                                  \n"
        "  %10 = icmp sgt i32 %8, 32                                                \n"
        "  %11 = select i1 %10, i32 32, i32 %8                                      \n"
        "  %12 = icmp sgt i32 %9, 20                                                \n"
        "  %13 = select i1 %12, i32 20, i32 %9                                      \n"
        "  %14 = icmp sgt i32 %11, 0                                                \n"
        "  %15 = select i1 %14, i32 %11, i32 0                                      \n"
        "  %16 = icmp sgt i32 %13, 0                                                \n"
        "  %17 = select i1 %16, i32 %13, i32 0                                      \n"
        "  %18 = and i32 %15, 31                                                    \n"
        "  %19 = shl i32 -1, %18                                                    \n"
        "  %20 = and i32 %17, 31                                                    \n"
        "  %21 = shl i32 -1, %20                                                    \n"
        "  %22 = icmp ne i32 %15, 32                                                \n"
        "  %23 = select i1 %22, i32 %19, i32 0                                      \n"
        "  %24 = icmp eq i32 %17, 32                                                \n"
        "  %25 = icmp ult i32 %6, 1023                                              \n"
        "  %or.cond.i = or i1 %25, %24                                              \n"
        "  %maskValHigh32bit.0.i = select i1 %or.cond.i, i32 -2147483648, i32 %21   \n"
        "  %maskValLow32bit.0.i = select i1 %or.cond.i, i32 0, i32 %23              \n"
        "  %26 = trunc i64 %1 to i32                                                \n"
        "  %27 = and i32 %maskValLow32bit.0.i, %26                                  \n"
        "  %28 = and i32 %maskValHigh32bit.0.i, %3                                  \n"
        "  %29 = zext i32 %28 to i64                                                \n"
        "  %30 = shl nuw i64 %29, 32                                                \n"
        "  %31 = zext i32 %27 to i64                                                \n"
        "  %32 = or i64 %30, %31                                                    \n"
        "  %33 = bitcast i64 %32 to double                                          \n"
        "  %34 = sub i64 %1, %32                                                    \n"
        "  %35 = lshr i64 %34, 32                                                   \n"
        "  %36 = or i64 %35, %34                                                    \n"
        "  %37 = trunc i64 %36 to i32                                               \n"
        "  %38 = icmp eq i32 %37, 0                                                 \n"
        "  %39 = ashr i64 %1, 31                                                    \n"
        "  %40 = and i64 %39, -4607182418800017408                                  \n"
        "  %.op = add nsw i64 %40, 4607182418800017408                              \n"
        "  %41 = bitcast i64 %.op to double                                         \n"
        "  %42 = select i1 %38, double 0.000000e+00, double %41                     \n"
        "  %43 = fadd double %33, %42                                               \n"
        "  ret double %43                                                           \n"
        "}";

    llvm::MemoryBufferRef codeBuf(code, "<string>");
    llvm::SMDiagnostic diagnostic;
    const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
    (void) failed;
    IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");

    func = mod->getFunction("__builtin_ceil_f64");

    return this->CreateCall(func, src);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCeil(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    if( V->getType() == this->getDoubleTy() )
    {
        return CreateDCeil(V);
    }
    else
    {
        llvm::Function* ceil =
            llvm::Intrinsic::getDeclaration( module, llvm::Intrinsic::ceil, V->getType() );
        return this->CreateCall( ceil, V );
    }
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDTrunc(llvm::Value *src)
{
    llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
    IGC_ASSERT(nullptr != mod);
    llvm::Function* func = mod->getFunction("__builtin_trunc_f64");
    if (func != nullptr)
    {
        return this->CreateCall(func, src);
    }

    // from OCL builtin: double @__builtin_spirv_trunc_f64(double %x)
    static const char* const code =
        "define double @__builtin_trunc_f64(double %x)                        \n"
        "    alwaysinline nounwind readnone {                                       \n"
        "  %1 = bitcast double %x to i64                                            \n"
        "  %2 = lshr i64 %1, 32                                                     \n"
        "  %3 = trunc i64 %2 to i32                                                 \n"
        "  %4 = lshr i64 %1, 52                                                     \n"
        "  %5 = trunc i64 %4 to i32                                                 \n"
        "  %6 = and i32 %5, 2047                                                    \n"
        "  %7 = sub nsw i32 1023, %6                                                \n"
        "  %8 = add nsw i32 %7, 52                                                  \n"
        "  %9 = add nsw i32 %7, 20                                                  \n"
        "  %10 = icmp sgt i32 %8, 32                                                \n"
        "  %11 = select i1 %10, i32 32, i32 %8                                      \n"
        "  %12 = icmp sgt i32 %9, 20                                                \n"
        "  %13 = select i1 %12, i32 20, i32 %9                                      \n"
        "  %14 = icmp sgt i32 %11, 0                                                \n"
        "  %15 = select i1 %14, i32 %11, i32 0                                      \n"
        "  %16 = icmp sgt i32 %13, 0                                                \n"
        "  %17 = select i1 %16, i32 %13, i32 0                                      \n"
        "  %18 = and i32 %15, 31                                                    \n"
        "  %19 = shl i32 -1, %18                                                    \n"
        "  %20 = and i32 %17, 31                                                    \n"
        "  %21 = shl i32 -1, %20                                                    \n"
        "  %22 = icmp ne i32 %15, 32                                                \n"
        "  %23 = select i1 %22, i32 %19, i32 0                                      \n"
        "  %24 = icmp eq i32 %17, 32                                                \n"
        "  %25 = icmp ult i32 %6, 1023                                              \n"
        "  %or.cond = or i1 %25, %24                                                \n"
        "  %maskValHigh32bit.0 = select i1 %or.cond, i32 -2147483648, i32 %21       \n"
        "  %maskValLow32bit.0 = select i1 %or.cond, i32 0, i32 %23                  \n"
        "  %26 = trunc i64 %1 to i32                                                \n"
        "  %27 = and i32 %maskValLow32bit.0, %26                                    \n"
        "  %28 = and i32 %maskValHigh32bit.0, %3                                    \n"
        "  %29 = zext i32 %28 to i64                                                \n"
        "  %30 = shl nuw i64 %29, 32                                                \n"
        "  %31 = zext i32 %27 to i64                                                \n"
        "  %32 = or i64 %30, %31                                                    \n"
        "  %33 = bitcast i64 %32 to double                                          \n"
        "  ret double %33                                                           \n"
        "}";

    llvm::MemoryBufferRef codeBuf(code, "<string>");
    llvm::SMDiagnostic diagnostic;
    const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
    (void) failed;
    IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");

    func = mod->getFunction("__builtin_trunc_f64");

    return this->CreateCall(func, src);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateRoundZ(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    if (V->getType() == this->getDoubleTy())
    {
        return CreateDTrunc(V);
    }
    else
    {
        llvm::Function* trunc =
            llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::trunc, V->getType());
        return this->CreateCall(trunc, V);
    }
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDRoundNE(llvm::Value *src)
{
    llvm::Module* const mod = this->GetInsertBlock()->getParent()->getParent();
    IGC_ASSERT(nullptr != mod);
    llvm::Function* func = mod->getFunction("__builtin_roundne_f64");
    if (func != nullptr)
    {
        return this->CreateCall(func, src);
    }

    // From OCL builtin: double @__builtin_spirv_rint_f64(double %x)
    static const char* const code =
        "define double @__builtin_roundne_f64(double %x)                            \n"
        "    alwaysinline nounwind readnone {                                       \n"
        "  %1 = bitcast double %x to i64                                            \n"
        "  %2 = and i64 %1, 9223372036854775807                                     \n"
        "  %3 = bitcast i64 %2 to double                                            \n"
        "  %4 = lshr i64 %2, 52                                                     \n"
        "  %5 = trunc i64 %4 to i32                                                 \n"
        "  %6 = icmp ult i32 %5, 1075                                               \n"
        "  %7 = zext i1 %6 to i32                                                   \n"
        "  %8 = sitofp i32 %7 to double                                             \n"
        "  %9 = fmul double %8, 5.000000e-01                                        \n"
        "  %10 = fadd double %3, %9                                                 \n"
        "  %11 = bitcast double %10 to i64                                          \n"
        "  %12 = lshr i64 %11, 32                                                   \n"
        "  %13 = trunc i64 %12 to i32                                               \n"
        "  %14 = lshr i64 %11, 52                                                   \n"
        "  %15 = trunc i64 %14 to i32                                               \n"
        "  %16 = and i32 %15, 2047                                                  \n"
        "  %17 = sub nsw i32 1023, %16                                              \n"
        "  %18 = add nsw i32 %17, 52                                                \n"
        "  %19 = add nsw i32 %17, 20                                                \n"
        "  %20 = icmp sgt i32 %18, 32                                               \n"
        "  %21 = select i1 %20, i32 32, i32 %18                                     \n"
        "  %22 = icmp sgt i32 %19, 20                                               \n"
        "  %23 = select i1 %22, i32 20, i32 %19                                     \n"
        "  %24 = icmp sgt i32 %21, 0                                                \n"
        "  %25 = select i1 %24, i32 %21, i32 0                                      \n"
        "  %26 = icmp sgt i32 %23, 0                                                \n"
        "  %27 = select i1 %26, i32 %23, i32 0                                      \n"
        "  %28 = and i32 %25, 31                                                    \n"
        "  %29 = shl i32 -1, %28                                                    \n"
        "  %30 = and i32 %27, 31                                                    \n"
        "  %31 = shl i32 -1, %30                                                    \n"
        "  %32 = icmp ne i32 %25, 32                                                \n"
        "  %33 = select i1 %32, i32 %29, i32 0                                      \n"
        "  %34 = icmp eq i32 %27, 32                                                \n"
        "  %35 = icmp ult i32 %16, 1023                                             \n"
        "  %or.cond.i = or i1 %35, %34                                              \n"
        "  %maskValHigh32bit.0.i = select i1 %or.cond.i, i32 -2147483648, i32 %31   \n"
        "  %maskValLow32bit.0.i = select i1 %or.cond.i, i32 0, i32 %33              \n"
        "  %36 = trunc i64 %11 to i32                                               \n"
        "  %37 = and i32 %maskValLow32bit.0.i, %36                                  \n"
        "  %38 = and i32 %maskValHigh32bit.0.i, %13                                 \n"
        "  %39 = zext i32 %38 to i64                                                \n"
        "  %40 = shl nuw i64 %39, 32                                                \n"
        "  %41 = zext i32 %37 to i64                                                \n"
        "  %42 = or i64 %40, %41                                                    \n"
        "  %43 = bitcast i64 %42 to double                                          \n"
        "  %44 = fptoui double %43 to i64                                           \n"
        "  %.tr = trunc i64 %44 to i32                                              \n"
        "  %45 = fsub double %43, %3                                                \n"
        "  %46 = fcmp oeq double %45, 5.000000e-01                                  \n"
        "  %47 = zext i1 %46 to i32                                                 \n"
        "  %48 = and i32 %.tr, %47                                                  \n"
        "  %49 = uitofp i32 %48 to double                                           \n"
        "  %50 = fsub double %43, %49                                               \n"
        "  %51 = and i64 %1, -9223372036854775808                                   \n"
        "  %52 = bitcast double %50 to i64                                          \n"
        "  %53 = or i64 %52, %51                                                    \n"
        "  %54 = bitcast i64 %53 to double                                          \n"
        "  ret double %54                                                           \n"
        "}";

    llvm::MemoryBufferRef codeBuf(code, "<string>");
    llvm::SMDiagnostic diagnostic;
    const bool failed = llvm::parseAssemblyInto(codeBuf, mod, nullptr, diagnostic);
    (void) failed;
    IGC_ASSERT_MESSAGE(false == failed, "Error parse llvm assembly");

    func = mod->getFunction("__builtin_roundne_f64");

    return this->CreateCall(func, src);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateRoundNE(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    if( V->getType() == this->getDoubleTy() )
    {
        return CreateDRoundNE(V);
    }
    else if( V->getType() == this->getHalfTy() )
    {
        V = this->CreateFPExt(V, this->getFloatTy());
        llvm::Function* roundne =
            llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_ROUNDNE);
        V = this->CreateCall(roundne, V);
        return this->CreateFPTrunc(V, this->getHalfTy());
    }
    else
    {
        llvm::Function* roundne =
            llvm::GenISAIntrinsic::getDeclaration( module, llvm::GenISAIntrinsic::GenISA_ROUNDNE );
        return this->CreateCall( roundne, V );
    }
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateIsNan(llvm::Value* V)
{
    //fcmp_uno yields true if either operand is a QNAN. Since we compare the same numer with itself.
    //If V is not NAN it will return false
    return this->CreateFCmp(llvm::FCmpInst::FCMP_UNO, V, V);
}


template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCtpop(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* ctpop =
        llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::ctpop, V->getType());
    return this->CreateCall(ctpop, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getHalf(float f)
{
    return llvm::ConstantFP::get(this->getHalfTy(), f);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getFloat(float f)
{
    return llvm::ConstantFP::get(this->getFloatTy(), f);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getDouble(double d)
{
    return llvm::ConstantFP::get(this->getDoubleTy(), d);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTX(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* floor =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientX,
        V->getType());
    return this->CreateCall(floor, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTX_Fine(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* floor =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientXfine, V->getType());
    return this->CreateCall(floor, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTY(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* floor =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientY,
    V->getType());
    return this->CreateCall(floor, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDeriveRTY_Fine(llvm::Value *V)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* floor =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_GradientYfine, V->getType());
    return this->CreateCall(floor, V);
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::Create_MAD_Scalar(llvm::Value* float_src0, llvm::Value* float_src1, llvm::Value* float_src2)
{
    llvm::Module* const module = this->GetInsertBlock()->getParent()->getParent();
    IGC_ASSERT(nullptr != module);
    IGC_ASSERT(nullptr != float_src0);

    // Builtin Signature: float (float, float, float)
    IGC_ASSERT_MESSAGE((float_src0->getType() == llvm::Type::getHalfTy(module->getContext()) || float_src0->getType() == this->getFloatTy() || float_src0->getType() == this->getDoubleTy()), "Type check @MAD.scalar arg: 0");
    IGC_ASSERT_MESSAGE((float_src1->getType() == llvm::Type::getHalfTy(module->getContext()) || float_src1->getType() == this->getFloatTy() || float_src1->getType() == this->getDoubleTy()), "Type check @MAD.scalar arg: 1");
    IGC_ASSERT_MESSAGE((float_src2->getType() == llvm::Type::getHalfTy(module->getContext()) || float_src2->getType() == this->getFloatTy() || float_src2->getType() == this->getDoubleTy()), "Type check @MAD.scalar arg: 2");

    llvm::Function* madFunc = llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::fma, float_src0->getType());
    llvm::Value* args[] = { float_src0, float_src1, float_src2 };
    llvm::Value* float_madres_s = this->CreateCall(madFunc, args);

    return float_madres_s;
}

template<bool preserveNames, typename T, typename Inserter>
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreatePow(llvm::Value* src0, llvm::Value* src1)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* powFunc = llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::pow, src0->getType());
    llvm::Value* args[] = { src0, src1 };
    llvm::Value* powres_s = this->CreateCall(powFunc, args);

    return powres_s;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::Create_SAMPLEBC(
    llvm::Value* float_ref_value,
    llvm::Value* bias_value,
    llvm::Value* address_u,
    llvm::Value* address_v,
    llvm::Value* address_r,
    llvm::Value* address_ai,
    llvm::Value* int32_textureIdx,
    llvm::Value* int32_sampler,
    llvm::Value* int32_offsetU,
    llvm::Value* int32_offsetV,
    llvm::Value* int32_offsetW,
    llvm::Type* returnType)
{
    llvm::Value * packed_tex_params[] = {
        float_ref_value,
        bias_value,
        address_u,
        address_v,
        address_r,
        address_ai,
        int32_textureIdx,
        int32_sampler,
        int32_offsetU,
        int32_offsetV,
        int32_offsetW
    };

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Type* dstType = ( returnType != nullptr ) ? returnType : this->getFloatTy();
    llvm::Type* types[] = {
        IGCLLVM::FixedVectorType::get(dstType, 4),
        float_ref_value->getType(),
        int32_textureIdx->getType(),
        int32_sampler->getType()
    };
    llvm::Function* func_llvm_GenISA_sampleBCptr_v4f32_f32 = llvm::GenISAIntrinsic::getDeclaration
        (module, llvm::GenISAIntrinsic::GenISA_sampleBCptr, types);

    llvm::CallInst* packed_tex_call = this->CreateCall(func_llvm_GenISA_sampleBCptr_v4f32_f32, packed_tex_params);
    return packed_tex_call;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateEvalSampleIndex(
    llvm::Value* inputIndex,
    llvm::Value* sampleIndex,
    llvm::Value* perspective)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* pullBarys =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_PullSampleIndexBarys);
    llvm::Value* bary = this->CreateCall2(pullBarys, sampleIndex, perspective);
    llvm::Function* interpolate =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_Interpolate);
    return this->CreateCall2(interpolate, inputIndex, bary);
}


template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateEvalSnapped(
    llvm::Value* inputIndex,
    llvm::Value* xOffset,
    llvm::Value* yOffset,
    llvm::Value* perspective)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();

    llvm::Function* pullBarys =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_PullSnappedBarys);
    llvm::Value* bary = this->CreateCall3(pullBarys, xOffset, yOffset, perspective);
    llvm::Function* interpolate =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_Interpolate);
    return this->CreateCall2(interpolate, inputIndex, bary);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSetStream(
    llvm::Value* StreamId, llvm::Value* emitCount)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* fn =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_SetStream);
    return this->CreateCall2(fn, StreamId, emitCount);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateEndPrimitive(
    llvm::Value* emitCount)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* fn =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_EndPrimitive);
    return this->CreateCall(fn, emitCount);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateControlPointId()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* fn =
        llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_DCL_HSControlPointID);
    return this->CreateCall(fn);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreatePrimitiveID()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
        this->getFloatTy());
    return this->CreateBitCast(
        this->CreateCall(pFunc, this->getInt32(IGC::PRIMITIVEID)), this->getInt32Ty());
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateInstanceID()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
        this->getFloatTy());
    return this->CreateBitCast(
        this->CreateCall(pFunc, this->getInt32(IGC::GS_INSTANCEID)), this->getInt32Ty());
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateSampleIndex()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
        this->getFloatTy());
    return this->CreateBitCast(
        this->CreateCall(pFunc, this->getInt32(IGC::SAMPLEINDEX)), this->getInt32Ty());
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCoverage()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
        this->getFloatTy());
    return this->CreateBitCast(
        this->CreateCall(pFunc, this->getInt32(IGC::INPUT_COVERAGE_MASK)), this->getInt32Ty());
}


template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateDomainPointInput(unsigned int dim)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
        this->getFloatTy());
    return this->CreateCall(pFunc, this->getInt32(IGC::DOMAIN_POINT_ID_X + dim));
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_inputVecF32(llvm::Value* inputIndex, llvm::Value* interpolationMode)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
     module,
     llvm::GenISAIntrinsic::GenISA_DCL_inputVec,
     this->getFloatTy());
    return this->CreateCall2(pFunc, inputIndex, interpolationMode);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_discard(llvm::Value* condition)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_discard);
    return this->CreateCall(pFunc, condition);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_runtime(llvm::Value* offset)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_RuntimeValue);
    return this->CreateCall(pFunc, offset);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_uavSerializeAll()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_uavSerializeAll);
    return this->CreateCall(pFunc);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::CallInst* LLVM3DBuilder<preserveNames, T, Inserter>::create_countbits(llvm::Value* src)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::Intrinsic::getDeclaration(
        module,
        llvm::Intrinsic::ctpop,
        this->getInt32Ty());
    return this->CreateCall(pFunc, src);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value*
LLVM3DBuilder<preserveNames, T, Inserter>::create_waveInverseBallot(
    llvm::Value* src)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_WaveInverseBallot);
    return this->CreateCall(pFunc, src);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveBallot(llvm::Value* src)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_WaveBallot);
    return this->CreateCall(pFunc, src);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveshuffleIndex(llvm::Value* src, llvm::Value* index, llvm::Value* helperLaneMode)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Type* srcType = src->getType();
    if (srcType == this->getInt1Ty())
    {
        src = this->CreateZExt(src, this->getInt32Ty());
    }
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_WaveShuffleIndex,
        src->getType());
    llvm::Value* retVal = this->CreateCall3(pFunc, src, index, (helperLaneMode ? helperLaneMode : this->getInt32(0)));
    if (srcType == this->getInt1Ty())
    {
        retVal = this->CreateTrunc(retVal, srcType);
    }
    return retVal;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveAll(llvm::Value* src, llvm::Value* type)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_WaveAll,
        src->getType());
    return this->CreateCall2(pFunc, src, type);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_wavePrefix(
    llvm::Value* src, llvm::Value* type, bool inclusive, llvm::Value *Mask)
{
    // If a nullptr is passed in for 'Mask' (as is the default), just include
    // all lanes.
    Mask = Mask ? Mask : this->getInt1(true);

    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_WavePrefix,
        src->getType());
    return this->CreateCall4(pFunc, src, type, this->getInt1(inclusive), Mask);
}

    // We currently use the combination of 'convergent' and
    // 'inaccessiblememonly' to prevent code motion of
    // wave intrinsics.  Removing 'readnone' from a callsite
    // is not sufficient to stop LICM from looking back up to the
    // function definition for the attribute.  We can short circuit that
    // by creating an operand bundle.  The name "nohoist" is not
    // significant; anything will do.
inline llvm::CallInst* setUnsafeToHoistAttr(llvm::CallInst *CI)
    {
        CI->setConvergent();
#if LLVM_VERSION_MAJOR >= 7
        CI->setOnlyAccessesInaccessibleMemory();
        CI->removeAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::ReadNone);
#else
        CI->addAttribute(
            llvm::AttributeSet::FunctionIndex, llvm::Attribute::InaccessibleMemOnly);
        CI->removeAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::ReadNone);
#endif
        llvm::OperandBundleDef OpDef("nohoist", llvm::None);

        // An operand bundle cannot be appended onto a call after creation.
        // clone the instruction but add our operandbundle on as well.
        llvm::SmallVector<llvm::OperandBundleDef, 1> OpBundles;
        CI->getOperandBundlesAsDefs(OpBundles);
        OpBundles.push_back(OpDef);
        llvm::CallInst *NewCall = llvm::CallInst::Create(CI, OpBundles, CI);
        CI->replaceAllUsesWith(NewCall);
        return NewCall;
    }

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value*
LLVM3DBuilder<preserveNames, T, Inserter>::create_wavePrefixBitCount(
    llvm::Value* src, llvm::Value *Mask)
{
    //bits = ballot(bBit);
    //laneMaskLT = (1 << WaveGetLaneIndex()) - 1;
    //prefixBitCount = countbits(bits & laneMaskLT);
    llvm::Value* ballot = this->create_waveBallot(src);
    if (Mask)
        ballot = this->CreateAnd(ballot, Mask);
    llvm::Value* shlLaneId = this->CreateShl(
        this->getInt32(1), this->get32BitLaneID());
    llvm::Value* laneMask = this->CreateSub(shlLaneId, this->getInt32(1));
    llvm::Value *mask = this->CreateAnd(ballot, laneMask);

    // update llvm.ctpop so it won't be hoisted/sunk out of the loop.
    auto *PopCnt = this->create_countbits(mask);
    auto *NoHoistPopCnt = setUnsafeToHoistAttr(PopCnt);
    PopCnt->eraseFromParent();
    return NoHoistPopCnt;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_waveMatch(
    llvm::Instruction *inst,
    llvm::Value       *src)
{

    // Note that we will stay in the loop above as long as there is at least
    // one active lane remaining.

    // We will split the basic blocks twice.  The first will create a
    // pre-header for the loop code.  The second will separate the WaveMatch
    // from code after it so it can be broken down into a sequence of
    // instructions and then branch to the remaining code when done.

    auto *PreHeader = inst->getParent();
    auto *BodyBlock = PreHeader->splitBasicBlock(inst, "wavematch-body");
    auto *EndBlock = BodyBlock->splitBasicBlock(
        inst->getNextNode(), "wavematch-end");

    // Make sure that we set the insert point again as we've just invalidated
    // it with the splitBasicBlock() calls above.
    this->SetInsertPoint(inst);

    // Now generate the code for a single iteration of the code
    auto *FirstValue = this->readFirstLane(src);
    llvm::Value *CmpRes = nullptr;
    if (src->getType()->isFloatingPointTy())
        CmpRes = this->CreateFCmpOEQ(FirstValue, src);
    else
        CmpRes = this->CreateICmpEQ(FirstValue, src);

    auto *Mask = this->create_waveBallot(CmpRes);

    // Replace the current terminator to either exit the loop
    // or branch back for another iteration.
    auto *Br = BodyBlock->getTerminator();
    this->SetInsertPoint(Br);
    this->CreateCondBr(CmpRes, EndBlock, BodyBlock);
    Br->eraseFromParent();

    // Now, gather up the output struct outside of the loop
    this->SetInsertPoint(&*EndBlock->getFirstInsertionPt());

    return Mask;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value*
LLVM3DBuilder<preserveNames, T, Inserter>::create_waveMultiPrefix(
    llvm::Instruction *I,
    llvm::Value *Val,
    llvm::Value *Mask,
    IGC::WaveOps OpKind)
{
    // This implementation is similar create_waveMatch() in that we loop
    // until all subsets of lanes are processed.
    auto *PreHeader = I->getParent();
    auto *BodyBlock = PreHeader->splitBasicBlock(I, "multiprefix-body");
    auto *EndBlock = BodyBlock->splitBasicBlock(
        I->getNextNode(), "multiprefix-end");

    // Make sure that we set the insert point again as we've just invalidated
    // it with the splitBasicBlock() calls above.
    this->SetInsertPoint(I);

    // Now generate the code for a single iteration of the code
    auto *FirstValue = this->readFirstLane(Mask);
    auto *ParticipatingLanes = this->create_waveInverseBallot(FirstValue);

    auto *WavePrefix = this->create_wavePrefix(
        Val, this->getInt8((uint8_t)OpKind), false, ParticipatingLanes);

    // Replace the current terminator to either exit the loop
    // or branch back for another iteration.
    auto *Br = BodyBlock->getTerminator();
    this->SetInsertPoint(Br);
    this->CreateCondBr(ParticipatingLanes, EndBlock, BodyBlock);
    Br->eraseFromParent();

    this->SetInsertPoint(&*EndBlock->getFirstInsertionPt());

    return WavePrefix;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value*
LLVM3DBuilder<preserveNames, T, Inserter>::create_waveMultiPrefixBitCount(
    llvm::Instruction *I,
    llvm::Value *Val,
    llvm::Value *Mask)
{
    // Similar structure to waveMatch and waveMultiPrefix
    auto *PreHeader = I->getParent();
    auto *BodyBlock = PreHeader->splitBasicBlock(I, "multiprefixbitcount-body");
    auto *EndBlock = BodyBlock->splitBasicBlock(
        I->getNextNode(), "multiprefixbitcount-end");

    // Make sure that we set the insert point again as we've just invalidated
    // it with the splitBasicBlock() calls above.
    this->SetInsertPoint(I);

    // Now generate the code for a single iteration of the code
    auto *FirstValue = this->readFirstLane(Mask);

    auto *Count = this->create_wavePrefixBitCount(Val, FirstValue);

    // Replace the current terminator to either exit the loop
    // or branch back for another iteration.
    auto *Br = BodyBlock->getTerminator();
    this->SetInsertPoint(Br);
    auto *ParticipatingLanes = this->create_waveInverseBallot(FirstValue);
    this->CreateCondBr(ParticipatingLanes, EndBlock, BodyBlock);
    Br->eraseFromParent();

    this->SetInsertPoint(&*EndBlock->getFirstInsertionPt());

    return Count;
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::create_quadPrefix(llvm::Value* src, llvm::Value* type, bool inclusive)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_QuadPrefix,
        src->getType());
    return this->CreateCall3(pFunc, src, type, this->getInt1(inclusive));
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::get32BitLaneID()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_simdLaneId);
    llvm::Value* int16LaneId =  this->CreateCall(pFunc);
    return this->CreateZExt(int16LaneId, this->getInt32Ty());
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getSimdSize()
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_simdSize);
    return this->CreateCall(pFunc);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::getFirstLaneID()
{
    //fbl(WaveBallot(true))
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Value* ballot = this->create_waveBallot(this->getInt1(1));
    llvm::Function* pFunc = llvm::GenISAIntrinsic::getDeclaration(
        module,
        llvm::GenISAIntrinsic::GenISA_firstbitLo);
    return this->CreateCall(pFunc, ballot);
}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::readFirstLane(llvm::Value* src)
{
    llvm::Value* firstLaneID = this->getFirstLaneID();
    return this->create_waveshuffleIndex(src, firstLaneID);
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Creates data conversion for typed image reads.
///     Gen HW has supports only limited number of surface formats through data
/// port data cache typed read messages. Complete lists of formats supported
/// for read is available in Programmer's Reference Manual.
/// Some of the unsupported formats are  mandatory in Vulkan and OGL.
/// In order to support these formats the driver and the compiler implement the
/// following emulation:
/// Since Gen9 HW typed read messages return raw data when reading from an
/// unsupported format. It's enough to call the conversion method
/// CreateImageDataConversion() using data returned from typed read messages.
///
/// @param format Surface format of the typed image (original i.e. from shader)
/// @param data Data returned by typed read message
/// @returns llvm::Value* Vector of data converted to the input surface format.
///
template<bool preserveNames, typename T, typename Inserter>
inline
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateImageDataConversion(
    IGC::SURFACE_FORMAT format,
    llvm::Value* data)
{
    IGC_ASSERT(nullptr != m_Platform);

    switch (format)
    {
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_UNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_SNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_UNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_SNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_UNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_SNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_UNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_SNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_UNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_SNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_UNORM:
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_SNORM:
        if (m_Platform->hasHDCSupportForTypedReadsUnormSnormToFloatConversion())
        {
            return data;
        }
        break;
    default:
        break;
    }


    llvm::Value* pFormatConvertedLLVMLdUAVTypedResult = data;
    switch (format)
    {
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_UNORM:
    {
        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
        llvm::Value* pConstFloat = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), (1.0f / 65535.0f)));
        llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempInt16 = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
        llvm::Value* pMaskLow = this->getInt32(0x0000FFFF);
        llvm::Value* pShift16 = this->getInt32(0x00000010);

        // pTempFloat = pLdUAVTypedResult[0];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(0));

        // Retrieve unsigned short value (component 0).
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
        pTempInt16 = this->CreateAnd(pTempInt32, pMaskLow);

        // Convert unsigned short to float (component 0).
        pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));

        // Retrieve unsigned short value (component 1).
        pTempInt16 = this->CreateLShr(pTempInt32, pShift16);

        // Convert unsigned short to float (component 1).
        pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 1 in output vector (pTempVec4[1]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));

        // pTempFloat = pLdUAVTypedResult[1];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(1));

        // Retrieve unsigned short value (component 2).
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
        pTempInt16 = this->CreateAnd(pTempInt32, pMaskLow);

        // Convert unsigned short to float (component 2).
        pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 2 in output vector (pTempVec4[2]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));

        // Retrieve unsigned short value (component 3).
        pTempInt16 = this->CreateLShr(pTempInt32, pShift16);

        // Convert unsigned short to float (component 3).
        pTempFloat = this->CreateUIToFP(pTempInt16, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 3 in output vector (pTempVec4[3]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));

        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16B16A16_SNORM:
    {
        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
        llvm::Value* pScalingFactor = this->getFloat(1.0f / 32767.0f);
        llvm::Value* pTempInt32;
        llvm::Value* pTempInt16;
        llvm::Value* pTempFloat;
        llvm::Value* pNegativeOne = this->getFloat(-1.0f);
        llvm::Value* pCmp_result;
        llvm::Value* fieldWidth = this->getInt32(16);

        // pTempFloat = pLdUAVTypedResult[0];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(0));

        // Retrieve unsigned short value (component 0).
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
        pTempInt16 = this->Create_IBFE(fieldWidth, this->getInt32(0), pTempInt32);

        // Convert signed short to float (component 0).
        pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Compare with -1.0f
        pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
        pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));

        // Retrieve unsigned short value (component 1).
        pTempInt16 = this->CreateAShr(pTempInt32, 16);

        // Convert signed short to float (component 1).
        pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Compare with -1.0f
        pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
        pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);

        // Store component 1 in output vector (pTempVec4[1]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));

        // pTempFloat = pLdUAVTypedResult[1];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(1));

        // Retrieve unsigned short value (component 2).
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
        pTempInt16 = this->Create_IBFE(fieldWidth, this->getInt32(0), pTempInt32);

        // Convert unsigned short to float (component 2).
        pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Compare with -1.0f
        pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
        pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);

        // Store component 2 in output vector (pTempVec4[2]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));

        // Retrieve unsigned short value (component 3).
        pTempInt16 = this->CreateAShr(pTempInt32, 16);

        // Convert unsigned short to float (component 3).
        pTempFloat = this->CreateSIToFP(pTempInt16, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Compare with -1.0f
        pCmp_result = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pNegativeOne);
        pTempFloat = this->CreateSelect(pCmp_result, pTempFloat, pNegativeOne);

        // Store component 3 in output vector (pTempVec4[3]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));

        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R10G10B10A2_UNORM:
    {
        llvm::Value* pImmediateXYZ = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), (1.0f / 1023.0f)));
        llvm::Value* pImmediateW = llvm::cast<llvm::ConstantFP>(llvm::ConstantFP::get(this->getFloatTy(), (1.0f / 3.0f)));
        llvm::Value* pMaskXYZ = this->getInt32(0x000003ff);
        llvm::Value* pMaskW = this->getInt32(0x00000003);
        llvm::Value* pShiftData = this->getInt32(10);

        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));
        llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempIntWithMask = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
        llvm::Value* pTempShiftRightData = llvm::UndefValue::get(this->getInt32Ty());

        // pTempFloat = pLdUAVTypedResult[0];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(0));

        // Retrieve unsigned short value (component 0).
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());
        pTempIntWithMask = this->CreateAnd(pTempInt32, pMaskXYZ);

        // Convert unsigned short to float (component 0).
        pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pImmediateXYZ);

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));

        // Retrieve unsigned short value (component 0).
        pTempShiftRightData = this->CreateLShr(pTempInt32, pShiftData);

        pTempIntWithMask = this->CreateAnd(pTempShiftRightData, pMaskXYZ);

        // Convert unsigned short to float.
        pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pImmediateXYZ);

        // Store component 1 in output vector (pTempVec4[1]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));

        // Retrieve unsigned short value.
        pTempShiftRightData = this->CreateLShr(pTempShiftRightData, pShiftData);

        pTempIntWithMask = this->CreateAnd(pTempShiftRightData, pMaskXYZ);

        // Convert unsigned short to float.
        pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pImmediateXYZ);

        // Store component 2 in output vector (pTempVec4[1]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));

        // Retrieve unsigned short value.
        pTempShiftRightData = this->CreateLShr(pTempShiftRightData, pShiftData);

        pTempIntWithMask = this->CreateAnd(pTempShiftRightData, pMaskW);

        // Convert unsigned short to float.
        pTempFloat = this->CreateUIToFP(pTempIntWithMask, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pImmediateW);

        // Store component 3 in output vector (pTempVec4[1]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R11G11B10_FLOAT:
    {
        // This surface format packs 3 half-float values into 32-bit string.
        // Half-floats are always non-negative, so to save space sign bit
        // is not stored and assumed to be zero.
        // Only 11 or 10 most significant bits (not counting sign bit)
        // of the 16 bits of IEEE 754 float16 are stored.
        // The least significant bits of the mantissa are assumed to be zero.
        // First value is stored in bits 0--10.     (r)
        // Second value is stored in bits 11 - 22   (g)
        // Third value is stored in bits 22 - 31    (b)
        // Fourth value is set to 1.0f.

        llvm::Value* pMaskX = this->getInt32(0x000007ff);
        llvm::Value* pMaskY = this->getInt32(0x00007ff0);
        llvm::Value* pMaskZ = this->getInt32(0x00007fe0);
        llvm::Value* pShiftDataX = this->getInt32(4);
        llvm::Value* pShiftDataY = this->getInt32(7);
        llvm::Value* pShiftDataZ = this->getInt32(10);
        llvm::Value* pTempFloat;
        llvm::Value* pTempFloat0;
        llvm::Value* pTempInt;
        llvm::Value* pTempInt0;

        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat0 = pLdUAVTypedResult[0];
        pTempFloat0 = this->CreateExtractElement(data, this->getInt32(0));
        pTempInt0 = this->CreateBitCast(pTempFloat0, this->getInt32Ty());

        pTempInt = this->CreateAnd(pTempInt0, pMaskX);
        pTempInt = this->CreateShl(pTempInt, pShiftDataX);
        pTempInt = this->CreateTrunc(pTempInt, this->getInt16Ty());
        pTempFloat = this->CreateBitCast(pTempInt, llvm::Type::getHalfTy(this->getContext()));
        pTempFloat = this->CreateF16TOF32(pTempFloat);

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));

        pTempInt0 = this->CreateLShr(pTempInt0, pShiftDataY);
        pTempInt = this->CreateAnd(pTempInt0, pMaskY);
        pTempInt = this->CreateTrunc(pTempInt, this->getInt16Ty());
        pTempFloat = this->CreateBitCast(pTempInt, llvm::Type::getHalfTy(this->getContext()));
        pTempFloat = this->CreateF16TOF32(pTempFloat);

        // Store component 1 in output vector (pTempVec4[1]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));

        pTempInt0 = this->CreateLShr(pTempInt0, pShiftDataZ);
        pTempInt = this->CreateAnd(pTempInt0, pMaskZ);
        pTempInt = this->CreateTrunc(pTempInt, this->getInt16Ty());
        pTempFloat = this->CreateBitCast(pTempInt, llvm::Type::getHalfTy(this->getContext()));
        pTempFloat = this->CreateF16TOF32(pTempFloat);

        // Store component 2 in output vector (pTempVec4[2]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));

        // store 1.0 into component 3
        pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R10G10B10A2_UINT:
    {
        // AND          ro.x, ri.x, { 0x000003ff };
        // SHR          ri.x, ri.x, { 10 };
        // AND          ro.y, ri.x, { 0x000003ff };
        // SHR          ri.x, ri.x, { 10 };
        // AND          ro.z, ri.x, { 0x000003ff };
        // SHR          ri.x, ri.x, { 10 };
        // AND          ro.w, ri.x, { 0x00000003 };
        // copy results
        llvm::Value* pMaskXYZ = this->getInt32(0x000003ff);
        llvm::Value* pMaskW = this->getInt32(0x00000003);
        llvm::Value* pShiftDataXYZ = this->getInt32(10);

        llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
        llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempIntRes = llvm::UndefValue::get(this->getInt32Ty());

        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat = pLdUAVTypedResult[0];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        // AND          ro.x, ri.x, { 0x000003ff };
        pTempIntRes = this->CreateAnd(pTempInt32, pMaskXYZ);
        pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));

        // SHR          ri.x, ri.x, { 10 };
        // AND          ro.y, ri.x, { 0x000003ff };
        pTempInt32 = this->CreateLShr(pTempInt32, pShiftDataXYZ);
        pTempIntRes = this->CreateAnd(pTempInt32, pMaskXYZ);
        pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());

        // Store component 1 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));

        // SHR          ri.x, ri.x, { 10 };
        // AND          ro.z, ri.x, { 0x000003ff };
        pTempInt32 = this->CreateLShr(pTempInt32, pShiftDataXYZ);
        pTempIntRes = this->CreateAnd(pTempInt32, pMaskXYZ);
        pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());

        // Store component 2 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));

        // SHR          ri.x, ri.x, { 10 };
        // AND          ro.w, ri.x, { 3 };
        pTempInt32 = this->CreateLShr(pTempInt32, pShiftDataXYZ);
        pTempIntRes = this->CreateAnd(pTempInt32, pMaskW);
        pTempFloat = this->CreateBitCast(pTempIntRes, this->getFloatTy());

        // Store component 3 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_UNORM:
    {
        // immX = 0x8, immY = 0x10, immZ = 0x18
        // immMaskLow = 0x000000FF
        // AND rTemp.x, ri.x, immMaskLow
        // ubfe rTemp.y, immX, immX, ri.x
        // ubfe rTemp.z, immX, immY, ri.x
        // ubfe rTemp.w, immX, immZ, ri.x
        // ubtof rTemp, rTemp
        // Fmul  rOutput, rTemp, 1.0f/255.0f
        llvm::Value* pMaskLow8 = this->getInt32(0x000000FF);
        llvm::Value* pImmX = this->getInt32(0x8);
        llvm::Value* pImmY = this->getInt32(0x10);
        llvm::Value* pImmZ = this->getInt32(0x18);
        llvm::Value* pConstFloat = this->getFloat(1.0f / 255.0f);
        llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
        llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempInt32Res = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat = pLdUAVTypedResult[0];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        // AND rTemp.x, ri.x, immMaskLow
        pTempInt32Res = this->CreateAnd(pTempInt32, pMaskLow8);

        // ubtof rTemp.x, rTemp.x
        pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());

        // Fmul  rOutput.x, rTemp.x, 1.0f/255.0f
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));

        // ubfe rTemp.y, immX, immX,  ri.x
        pTempInt32Res = this->Create_UBFE(pImmX, pImmX, pTempInt32);

        // ubtof rTemp.y, rTemp.y
        pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());

        // Fmul  rOutput.y, rTemp.y, 1.0f/255.0f
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 1 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));

        // ubfe rTemp.z, immX, immY,  ri.x
        pTempInt32Res = this->Create_UBFE(pImmX, pImmY, pTempInt32);

        // ubtof rTemp.z, rTemp.z
        pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());

        // Fmul  rOutput.z, rTemp.z, 1.0f/255.0f
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 2 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(2));

        // ubfe rTemp.w, immX, immZ,  ri.x
        pTempInt32Res = this->Create_UBFE(pImmX, pImmZ, pTempInt32);

        // ubtof rTemp.w, rTemp.w
        pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());

        // Fmul  rOutput.w, rTemp.w, 1.0f/255.0f
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 3 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(3));

        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8B8A8_SNORM:
    {
        llvm::Value* pScalingFactor = this->getFloat(1.0f / 127.0f);
        llvm::Value* fieldWidth = this->getInt32(8);
        llvm::Value* fpNegOne = this->getFloat(-1.0f);

        // pTempFloat = pLdUAVTypedResult[0];
        llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        // cast to int32 since result is seen as float
        llvm::Value* pInputAsInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        // create 4-component output vector
        llvm::Value* pOutputVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // for each of the four channels
        for (unsigned int ch = 0; ch < 4; ++ch)
        {
            // extract 8 bits with sign extend from position 8*ch..8*ch+7
            // for bits 24..31 we can use arithmetic shift right instead of bit extract
            llvm::Value* pTempInt32Res = (ch < 3) ?
                this->Create_IBFE(fieldWidth, this->getInt32(8 * ch), pInputAsInt32) :
                this->CreateAShr(pInputAsInt32, 8 * ch);

            // convert to float
            pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());

            // multiply bthis->y the scaling factor 1.0f/127.0f
            pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

            // Fcmp_ge rFlag, rTemp.x, -1.0f
            // Sel.rFlag rOutput.x, rTemp.x, -1.0f
            llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, fpNegOne);
            pTempFloat = this->CreateSelect(pFlag, pTempFloat, fpNegOne);

            // Store component ch in output vector (pTempVec4[0]).
            pOutputVec4 = this->CreateInsertElement(pOutputVec4, pTempFloat, this->getInt32(ch));
        }

        pFormatConvertedLLVMLdUAVTypedResult = pOutputVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_UNORM:
    {
        // immMaskHigh = 0x0000FFFF
        // rImm.zw = {0.0f, 1.0f}
        // AND rTemp.x, ri.x, immMaskHigh
        // SHR rTemp.y, ri.x, 0x10,
        // USTOF rTemp.xy, rTemp.xy
        // FMUL rOutput.xy, rTemp.xy, 1.0f/65535.0f
        // MOV rOutput.zw, rImm.zw
        llvm::Value* pMaskHigh = this->getInt32(0x0000FFFF);
        llvm::Value* pShiftVal = this->getInt32(0x10);
        llvm::Value* pImmZ = this->getFloat(0.0f);
        llvm::Value* pImmW = this->getFloat(1.0f);
        llvm::Value* pConstFloat = this->getFloat(1.0f / 65535.0f);
        llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
        llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempInt32Res = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat = pLdUAVTypedResult[0];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        // AND rTemp.x, ri.x, immMaskHigh
        pTempInt32Res = this->CreateAnd(pTempInt32, pMaskHigh);

        pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));

        pTempInt32Res = this->CreateLShr(pTempInt32, pShiftVal);
        pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 1 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));

        // Store component 2 to value 0.0f in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pImmZ, this->getInt32(2));

        // Store component 3 to Value 1.0f in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pImmW, this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16G16_SNORM:
    {
        // immMaskLow16 = 0x0000FFFF
        // rImm.zw = {0.0f, 1.0f}
        // AND rTemp.x, ri.x, immMaskLow16
        // SHR rTemp.y, ri.x, 0x10,
        // STOF rTemp.xy, rTemp.xy
        // FMUL rTemp.xy, rTemp.xy, 1.0f / 32767.0f
        // FCMP_GE rFlag.xy, rTemp.xy, -1.0f
        // SEL_rFlag.xy rOutput.xy, rTemp.xy, -1.0f
        // MOV rOutput.zw, rImm.zw
        llvm::Value* pScalingFactor = getFloat(1.0f / 32767.0f);
        llvm::Value* pOutVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat = pLdUAVTypedResult[0];
        llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        // extract bits 0..15 and sign extend the result
        llvm::Value* pTempInt32Res = Create_IBFE(this->getInt32(16), this->getInt32(0), pTempInt32);

        // convert to float and apply scaling factor
        pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // clamp to range [-1.0f, 1.0f] since the value can be little less than -1.0f
        // Fcmp_ge rFlag, rTemp.x, -1.0f
        // Sel.rFlag rOutput.x, rTemp.x, -1.0f
        llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, this->getFloat(-1.0f));
        pTempFloat = this->CreateSelect(pFlag, pTempFloat, this->getFloat(-1.0f));

        // Store component 0 in output vector (pTempVec4[0]).
        pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(0));

        // extract bits 16..31 with sign extension
        pTempInt32Res = this->CreateAShr(pTempInt32, 16);
        pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Fcmp_ge rFlag, rTemp.y, -1.0f
        // Sel.rFlag rOutput.y, rTemp.y, -1.0f
        pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, this->getFloat(-1.0f));

        pTempFloat = this->CreateSelect(pFlag, pTempFloat, this->getFloat(-1.0f));

        // Store component 1 in output vector (pTempVec4[0]).
        pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(1));

        // Store 0.0f, 1.0f in the remaining components of the output vector
        pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(0.0f), this->getInt32(2));
        pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(1.0f), this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pOutVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_UNORM:
    {
        // immMaskLow8 = 0x000000FF
        // rImm.zw = {0.0f, 1.0f}
        // AND rTemp.x, ri.x, immMaskLow8
        // SHR rTemp.y, ri.x, 0x8,
        // USTOF rTemp.xy, rTemp.xy
        // FMUL rOutput.xy, rTemp.xy, 1.0f / 255.0f
        // MOV rOutput.zw, rImm.zw
        llvm::Value* pMaskLow8 = this->getInt32(0x000000FF);
        llvm::Value* pShiftVal = this->getInt32(0x8);
        llvm::Value* pImmZ = this->getFloat(0.0f);
        llvm::Value* pImmW = this->getFloat(1.0f);
        llvm::Value* pConstFloat = this->getFloat(1.0f / 255.0f);
        llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
        llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempInt32Res = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat = pLdUAVTypedResult[0];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        // AND rTemp.x, ri.x, immMaskHigh
        pTempInt32Res = this->CreateAnd(pTempInt32, pMaskLow8);

        pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));

        pTempInt32Res = this->CreateLShr(pTempInt32, pShiftVal);
        pTempFloat = this->CreateUIToFP(pTempInt32Res, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pConstFloat);

        // Store component 1 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(1));

        // Store component 2 to value 0.0f in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pImmZ, this->getInt32(2));

        // Store component 3 to Value 1.0f in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pImmW, this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8G8_SNORM:
    {
        // immMaskLow8 = 0x000000FF
        // rImm.zw = {0.0f, 1.0f}
        // AND rTemp.x, ri.x, immMaskLow8
        // SHR rTemp.y, ri.x, 0x8,
        // STOF rTemp.xy, rTemp.xy
        // FMUL rTemp.xy, rTemp.xy, 1.0f / 127.0f
        // FCMP_GE rFlag.xy, rTemp.xy, -1.0f
        // SEL_rFlag.xy rOutput.xy, rTemp.xy, -1.0f
        // MOV rOutput.zw, rImm.zw
        llvm::Value* pScalingFactor = getFloat(1.0f / 127.0f);

        llvm::Value* pOutVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        llvm::Value* fieldWidth = this->getInt32(8);

        // pTempFloat = pLdUAVTypedResult[0];
        llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        llvm::Value* pInputInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        llvm::Value* pTempInt32Res = Create_IBFE(fieldWidth, this->getInt32(0), pInputInt32);
        pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Fcmp_ge rFlag, rTemp.x, -1.0f
        // Sel.rFlag rOutput.x, rTemp.x, -1.0f
        llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, getFloat(-1.0f));

        pTempFloat = this->CreateSelect(pFlag, pTempFloat, getFloat(-1.0f));
        // Store component 0 in output vector (pTempVec4[0]).
        pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(0));

        // extract bits 8..15 and sign extend the result
        pTempInt32Res = this->Create_IBFE(fieldWidth, this->getInt32(8), pInputInt32);

        pTempFloat = this->CreateSIToFP(pTempInt32Res, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Fcmp_ge rFlag, rTemp.y, -1.0f
        // Sel.rFlag rOutput.y, rTemp.y, -1.0f
        pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, getFloat(-1.0f));
        pTempFloat = this->CreateSelect(pFlag, pTempFloat, getFloat(-1.0f));

        // store the value in component 1 of the output vector
        pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(1));

        // store 0.0f, 1.0f in the remaining components of the output vector
        pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(0.0f), this->getInt32(2));
        pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(1.0f), this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pOutVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_UNORM:
    {
        // rImm.yzw = {0.0f, 0.0f, 1.0f}
        // USTOF rTemp.x, ri.x
        // FMUL rOutput.x, rTemp.x, 1.0f / 65535.0f
        // MOV rOutput.yzw, rImm.yzw
        llvm::Value* pScalingFactor = getFloat(1.0f / 65535.0f);
        llvm::Value* pTempFloat = llvm::UndefValue::get(this->getFloatTy());
        llvm::Value* pTempInt32 = llvm::UndefValue::get(this->getInt32Ty());
        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat = pLdUAVTypedResult[0];
        pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        pTempFloat = this->CreateUIToFP(pTempInt32, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));

        // Store 0.0f, 0.0f, 1.0f, in remaining components of the output
        llvm::Value* pFPZero = getFloat(0.0f);
        pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(1));
        pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(2));
        pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R16_SNORM:
    {
        // rImm.yzw = {0.0f, 0.0f, 1.0f}
        // STOF rTemp.x, ri.x
        // FMUL rTemp.x, rTemp.x, 1.0f / 32767.0f
        // FCMP_GE rFlag.x, rTemp.x, -1.0f
        // SEL_rFlag.x rOutput.x, rTemp.x, -1.0f
        // MOV rOutput.yzw, rImm.yzw
        llvm::Value* pFPZero = getFloat(0.0f);
        llvm::Value* pScalingFactor = getFloat(1.0f / 32767.0f);
        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat = pLdUAVTypedResult[0];
        llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        pTempInt32 = this->Create_IBFE(this->getInt32(16), this->getInt32(0), pTempInt32);

        pTempFloat = this->CreateSIToFP(pTempInt32, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // compare with -1.0f and clamp to -1.0 if less than -1.0
        // Fcmp_ge rFlag, rTemp.x, -1.0f
        // Sel.rFlag rOutput.x, rTemp.x, -1.0f
        llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, getFloat(-1.0f));
        pTempFloat = this->CreateSelect(pFlag, pTempFloat, getFloat(-1.0f));

        // Store the result in component 0 of the output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
        // Store 0.0f, 0.0f, 1.0f in remaining components
        pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(1));
        pTempVec4 = this->CreateInsertElement(pTempVec4, pFPZero, this->getInt32(2));
        pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_UNORM:
    {
        // rImm.yzw = {0.0f, 0.0f, 1.0f}
        // USTOF rTemp.x, ri.x
        // FMUL rOutput.x, rTemp.x, 1.0f / 255.0f
        // MOV rOutput.yzw, rImm.yzw
        // UBTOF        ro.x, ri.x;
        llvm::Value* fpZero = this->getFloat(0.0f);
        llvm::Value* pScalingFactor = getFloat(1.0f / 255.0f);
        llvm::Value* pTempVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat = pLdUAVTypedResult[0];
        llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        pTempFloat = this->CreateUIToFP(pTempInt32, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Store component 0 in output vector (pTempVec4[0]).
        pTempVec4 = this->CreateInsertElement(pTempVec4, pTempFloat, this->getInt32(0));
        // fill the rest with 0.0f, 0.0f, 1.0f
        pTempVec4 = this->CreateInsertElement(pTempVec4, fpZero, this->getInt32(1));
        pTempVec4 = this->CreateInsertElement(pTempVec4, fpZero, this->getInt32(2));
        pTempVec4 = this->CreateInsertElement(pTempVec4, getFloat(1.0f), this->getInt32(3));
        pFormatConvertedLLVMLdUAVTypedResult = pTempVec4;
        break;
    }
    case IGC::SURFACE_FORMAT::SURFACE_FORMAT_R8_SNORM:
    {
        // rImm.yzw = {0.0f, 0.0f, 1.0f}
        // STOF rTemp.x, ri.x
        // FMUL rTemp.x, rTemp.x, 1.0f / 127.0f
        // FCMP_GE rFlag.x, rTemp.x, -1.0f
        // SEL_rFlag.x rOutput.x, rTemp.x, -1.0f
        // MOV rOutput.yzw, rImm.yzw
        llvm::Value* pFpZero = getFloat(0.0f);
        llvm::Value* pFpNegOne = getFloat(-1.0f);
        llvm::Value* pScalingFactor = getFloat(1.0f / 127.0f);
        llvm::Value* pOutVec4 = llvm::UndefValue::get(IGCLLVM::FixedVectorType::get(this->getFloatTy(), 4));

        // pTempFloat = pLdUAVTypedResult[0];
        llvm::Value* pTempFloat = this->CreateExtractElement(data, this->getInt32(0));
        llvm::Value* pTempInt32 = this->CreateBitCast(pTempFloat, this->getInt32Ty());

        // extract bits 0..7 and sign extend the result
        pTempInt32 = this->Create_IBFE(this->getInt32(8), this->getInt32(0), pTempInt32);

        // convert to float and apply scaling factor
        pTempFloat = this->CreateSIToFP(pTempInt32, this->getFloatTy());
        pTempFloat = this->CreateFMul(pTempFloat, pScalingFactor);

        // Fcmp_ge rFlag, rTemp.x, -1.0f
        // Sel.rFlag rOutput.x, rTemp.x, -1.0f
        llvm::Value* pFlag = this->CreateFCmp(llvm::FCmpInst::FCMP_OGE, pTempFloat, pFpNegOne);
        pTempFloat = this->CreateSelect(pFlag, pTempFloat, pFpNegOne);

        // Store component 0 in output vector (pTempVec4[0]).
        pOutVec4 = this->CreateInsertElement(pOutVec4, pTempFloat, this->getInt32(0));

        // Store 0.0f, 0.0f, 1.0f in the remaining components of the output vector
        pOutVec4 = this->CreateInsertElement(pOutVec4, pFpZero, this->getInt32(1));
        pOutVec4 = this->CreateInsertElement(pOutVec4, pFpZero, this->getInt32(2));
        pOutVec4 = this->CreateInsertElement(pOutVec4, getFloat(1.0f), this->getInt32(3));

        pFormatConvertedLLVMLdUAVTypedResult = pOutVec4;
        break;
    }
    default:
        break;
    }

    return pFormatConvertedLLVMLdUAVTypedResult;
}


///////////////////////////////////////////////////////////////////////////////
/// @brief Extract all scalars from a vector
/// @param  vector Llvm value of a vector
/// @param  outScalars pointer to the output array of scalars
/// @param  maxSize Size of the output array
/// @param  initializer optional parameter to set to unused elements
///
template<bool preserveNames, typename T, typename Inserter>
inline
void LLVM3DBuilder<preserveNames, T, Inserter>::VectorToScalars(
    llvm::Value* vector,
    llvm::Value** outScalars,
    unsigned maxSize,
    llvm::Value* initializer)
{
    IGC_ASSERT(nullptr != vector);
    IGC_ASSERT(nullptr != vector->getType());
    IGC_ASSERT(vector->getType()->isVectorTy());

    const unsigned count = (unsigned)llvm::cast<IGCLLVM::FixedVectorType>(vector->getType())->getNumElements();
    IGC_ASSERT(1 < count);
    IGC_ASSERT(count <= 4);
    IGC_ASSERT(count <= maxSize);

    for (unsigned vecElem = 0; vecElem < maxSize; vecElem++)
    {
        if (vecElem >= count)
        {
            outScalars[vecElem] = initializer;
            continue;
        }
        outScalars[vecElem] = this->CreateExtractElement(
            vector,
            this->getInt32(vecElem));
    }
}


///////////////////////////////////////////////////////////////////////////////
/// @brief Aggregates scalar values to a vector
/// @param  scalars Array of scalars
/// @param  vectorElementCnt The number of elements in the vector to create.
/// @return Vector of type resultType
///
template<bool preserveNames, typename T, typename Inserter>
inline
llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::ScalarsToVector(
    llvm::Value* (&scalars)[4],
    unsigned vectorElementCnt)
{
    llvm::Type* const resultType = IGCLLVM::FixedVectorType::get(scalars[0]->getType(), vectorElementCnt);
    IGC_ASSERT(nullptr != resultType);
    llvm::Value* result = llvm::UndefValue::get(resultType);

    for (unsigned i = 0; i < llvm::cast<IGCLLVM::FixedVectorType>(resultType)->getNumElements(); i++)
    {
        IGC_ASSERT(nullptr != scalars[i]);
        IGC_ASSERT(llvm::cast<llvm::VectorType>(resultType)->getElementType() == scalars[i]->getType());

        result = this->CreateInsertElement(
            result,
            scalars[i],
            this->getInt32(i));
    }
    return result;
}


///////////////////////////////////////////////////////////////////////////////
/// @brief Returns the normalization factor for UNORM formats
/// @param  bits Number of bits in the UNORM value
/// @return llvm::Constant* unorm factor
template<bool preserveNames, typename T, typename Inserter>
inline
llvm::Constant* LLVM3DBuilder<preserveNames, T, Inserter>::GetUnormFactor(unsigned bits)
{
    float maxUint = (float)((1 << bits) - 1);
    return llvm::ConstantFP::get(this->getFloatTy(), (1.0f / maxUint));
};


///////////////////////////////////////////////////////////////////////////////
/// @brief Returns the normalization factor for SNORM formats
/// @param  bits Number of bits in the SNORM value
/// @return llvm::Constant* snorm factor
template<bool preserveNames, typename T, typename Inserter>
inline
llvm::Constant* LLVM3DBuilder<preserveNames, T, Inserter>::GetSnormFactor(unsigned bits)
{
    float maxSint = (float)(((1 << bits) - 1) / 2);
    return llvm::ConstantFP::get(this->getFloatTy(), (1.0f / maxSint));
};

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCPSRqstCoarseSize(
    llvm::Value* pSrcVal)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function*  pFunc = llvm::GenISAIntrinsic::getDeclaration(
                                module,
                                llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
                                this->getFloatTy());

    llvm::Value* sizeX = this->CreateCall(pFunc, this->getInt32(IGC::REQUESTED_COARSE_SIZE_X));
    llvm::Value* sizeY = this->CreateCall(pFunc, this->getInt32(IGC::REQUESTED_COARSE_SIZE_Y));
    llvm::Value* vec = this->CreateInsertElement(
                    llvm::UndefValue::get(pSrcVal->getType()),
                    sizeX,
                    this->getInt32(0));
    return this->CreateInsertElement(
            vec,
            sizeY,
            this->getInt32(1));

}

template<bool preserveNames, typename T, typename Inserter>
inline llvm::Value* LLVM3DBuilder<preserveNames, T, Inserter>::CreateCPSActualCoarseSize(
    llvm::Value* pSrcVal)
{
    llvm::Module* module = this->GetInsertBlock()->getParent()->getParent();
    llvm::Function*  pFunc = llvm::GenISAIntrinsic::getDeclaration(
                                module,
                                llvm::GenISAIntrinsic::GenISA_DCL_SystemValue,
                                this->getFloatTy());
    llvm::Value* sizeX = this->CreateCall(pFunc, this->getInt32(IGC::ACTUAL_COARSE_SIZE_X));
    llvm::Value* sizeY = this->CreateCall(pFunc, this->getInt32(IGC::ACTUAL_COARSE_SIZE_Y));
    llvm::Value* vec = this->CreateInsertElement(
                    llvm::UndefValue::get(pSrcVal->getType()),
                    sizeX,
                    this->getInt32(0));
    return this->CreateInsertElement(
            vec,
            sizeY,
            this->getInt32(1));

}


#endif // BUILTINS_FRONTEND_DEFINITIONS_HPP