1 // 2 // OpenCLRuntime.hpp 3 // MNN 4 // 5 // Created by MNN on 2019/01/31. 6 // Copyright © 2018, Alibaba Group Holding Limited 7 // 8 9 #ifndef OpenCLRuntime_hpp 10 #define OpenCLRuntime_hpp 11 12 13 #include <map> 14 #include <memory> 15 #include <mutex> 16 #include <set> 17 #include <string> 18 #include <vector> 19 20 #include <sstream> 21 #include <string> 22 #include <vector> 23 #include "core/Macro.h" 24 #include "Type_generated.h" 25 #include "backend/opencl/core/runtime/OpenCLWrapper.hpp" 26 #include "MNN/MNNForwardType.h" 27 28 namespace MNN { 29 30 #define CL_CONTEXT_PERF_HINT_QCOM 0x40C2 31 #define CL_PERF_HINT_HIGH_QCOM 0x40C3 32 #define CL_PERF_HINT_NORMAL_QCOM 0x40C4 33 #define CL_PERF_HINT_LOW_QCOM 0x40C5 34 #define CL_CONTEXT_PRIORITY_HINT_QCOM 0x40C9 35 #define CL_PRIORITY_HINT_HIGH_QCOM 0x40CA 36 #define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB 37 #define CL_PRIORITY_HINT_LOW_QCOM 0x40CC 38 39 #define CL_KERNEL_WAVE_SIZE_QCOM 0xAA02 40 41 enum GpuType { MALI = 0, ADRENO = 1, RADEON = 2, OTHER = 3 }; 42 enum GpuMemObject { AUTO = 0, BUFFER = 1, IMAGE = 2}; 43 enum CLTuneLevel { None = 0, Heavy = 1, Wide = 2, Normal = 3, Fast = 4}; 44 45 class OpenCLRuntime { 46 public: 47 OpenCLRuntime(const BackendConfig::PrecisionMode precision, const int cl_mode); 48 ~OpenCLRuntime(); 49 OpenCLRuntime(const OpenCLRuntime &) = delete; 50 OpenCLRuntime &operator=(const OpenCLRuntime &) = delete; 51 52 bool isSupportedFP16() const; 53 bool isWeightCpuTransHalf() const; 54 bool isDeviceSupportedFP16() const; 55 bool isSupportedDotInt8() const; 56 bool isSupportedDotAccInt8() const; 57 ::cl::Context &context(); 58 ::cl::CommandQueue &commandQueue(); 59 uint64_t deviceGlobalMemeryCacheSize() const; 60 uint32_t deviceComputeUnits() const; 61 uint32_t maxFreq() const; 62 uint64_t getMaxWorkGroupSize(const ::cl::Kernel &kernel); 63 uint64_t GetKernelWaveSize(const cl::Kernel &kernel); 64 std::vector<uint32_t> getMaxWorkItemSizes(); 65 uint64_t getMaxLocalMem() const; getGpuType()66 GpuType getGpuType() { 67 return mGpuType; 68 } getGpuMemType()69 GpuMemObject getGpuMemType() { 70 return mMemType; 71 } getCLTuneLevel()72 CLTuneLevel getCLTuneLevel() { 73 return mTuneLevel; 74 } getDeviceName()75 std::string getDeviceName() { 76 return mDeviceName; 77 } 78 uint64_t maxAllocSize() const; 79 void setCommandQueueProfileEnable(); 80 void setCommandQueueProfileDisable(); 81 82 unsigned int mQueueCount = 0; 83 unsigned int getQueueNum(); 84 85 unsigned int mKernelTime = 0; 86 87 std::map<std::pair<std::string, std::vector<uint32_t>>, std::pair<std::vector<uint32_t>, uint32_t>>& tunedLwsMap(); 88 89 ::cl::Kernel buildKernel(const std::string &programName, const std::string &kernelName, 90 const std::set<std::string> &buildOptions); 91 92 std::vector<size_t> getMaxImage2DSize(); isCreateError() const93 bool isCreateError() const { 94 return mIsCreateError; 95 } 96 flops() const97 float flops() const { 98 return mFlops; 99 } 100 101 double getCostTime(const cl::Event *event); 102 double getQueuedTime(const cl::Event *event); 103 double getSubmitTime(const cl::Event *event); 104 105 std::pair<const void*, size_t> makeCache(); 106 bool setCache(std::pair<const void*, size_t> cache); 107 private: 108 bool loadProgram(const std::string &programName, cl::Program *program); 109 bool buildProgram(const std::string &buildOptionsStr, cl::Program *program); 110 bool getDeviceSupportsExtension(const cl::Device &device, const char *extensionName); 111 void setGpuMode(const int cl_mode_num); 112 113 private: 114 std::shared_ptr<::cl::Context> mContext; 115 std::shared_ptr<::cl::Device> mFirstGPUDevicePtr; 116 std::shared_ptr<::cl::CommandQueue> mCommandQueuePtr; 117 std::map<std::tuple<std::string, std::string, std::string>, ::cl::Program> mBuildProgramMap; 118 uint64_t mGPUGlobalMemeryCacheSize; 119 uint32_t mGPUComputeUnits; 120 uint32_t mMaxFreq; 121 uint32_t mMaxMemAllocSize; 122 uint64_t mMaxLocalMemSize; 123 bool mIsSupportedFP16 = false; 124 bool mIsDeviceSupportedFP16 = false; 125 bool mSupportDotInt8 = false; 126 bool mSupportDotAccInt8 = false; 127 GpuType mGpuType; 128 GpuMemObject mMemType = AUTO; 129 CLTuneLevel mTuneLevel = Wide; 130 std::string mDeviceName; 131 bool isSetWorkGroupAttribute = false; 132 std::string mDefaultBuildParams; 133 float mFlops = 4.0f; 134 bool mIsCreateError{false}; 135 136 double mStartNanos; 137 double mStopNanos; 138 139 std::map<std::pair<std::string, std::vector<uint32_t>>, std::pair<std::vector<uint32_t>, uint32_t>> mTunedLws; 140 std::vector<uint8_t> mBuffer; 141 const void* mCacheOutside = nullptr; 142 size_t mCacheOutsideSize = 0; 143 }; 144 145 } // namespace MNN 146 #endif /* OpenCLRuntime_hpp */ 147