1 //
2 //  CUDARuntime.cpp
3 //  MNN
4 //
5 //  Created by MNN on 2019/02/28.
6 //  Copyright © 2018, Alibaba Group Holding Limited
7 //
8 
9 #include "backend/cuda/core/runtime/CUDARuntime.hpp"
10 #include <sys/stat.h>
11 #include <cstdlib>
12 #include <fstream>
13 #include <memory>
14 #include <string>
15 #include <utility>
16 #include <vector>
17 #include "core/Macro.h"
18 //#define MNN_OPEN_TIME_TRACE
19 #include <MNN/AutoTime.hpp>
20 #define STR_HELPER(x) #x
21 #define STR(x) STR_HELPER(x)
22 // #define LOG_VERBOSE
23 #define CUDNN_VERSION_STR STR(CUDNN_MAJOR) "." STR(CUDNN_MINOR) "." STR(CUDNN_PATCHLEVEL)
24 
25 #pragma message "compile with cuda " STR(CUDART_VERSION) " "
26 #pragma message "compile with cuDNN " CUDNN_VERSION_STR " "
27 
28 static_assert(!(CUDNN_MAJOR == 5 && CUDNN_MINOR == 1), "cuDNN 5.1.x series has bugs. Use 5.0.x instead.");
29 
30 #undef STR
31 #undef STR_HELPER
32 
33 namespace MNN {
34 
isCreateError() const35 bool CUDARuntime::isCreateError() const {
36     return mIsCreateError;
37 }
38 
CUDARuntime(bool permitFloat16,int device_id)39 CUDARuntime::CUDARuntime(bool permitFloat16, int device_id) {
40 #ifdef LOG_VERBOSE
41     MNN_PRINT("start CUDARuntime !\n");
42 #endif
43     int version;
44     cuda_check(cudaRuntimeGetVersion(&version));
45     int id = device_id;
46     if (id < 0) {
47         cuda_check(cudaGetDevice(&id));
48     }
49     mDeviceId = id;
50     cuda_check(cudaGetDeviceProperties(&mProp, id));
51     MNN_ASSERT(mProp.maxThreadsPerBlock > 0);
52 
53     cublas_check(cublasCreate(&mCublasHandle));
54 
55     // Set stream for cuDNN and cublas handles.
56 
57     // Note that all cublas scalars (alpha, beta) and scalar results such as dot
58     // output resides at device side.
59     cublas_check(cublasSetPointerMode(mCublasHandle, CUBLAS_POINTER_MODE_HOST));
60     cudnn_check(cudnnCreate(&mCudnnHandle));
61 }
62 
~CUDARuntime()63 CUDARuntime::~CUDARuntime() {
64 #ifdef LOG_VERBOSE
65     MNN_PRINT("start ~CUDARuntime !\n");
66 #endif
67     cublas_check(cublasDestroy(mCublasHandle));
68     cudnn_check(cudnnDestroy(mCudnnHandle));
69 
70 #ifdef LOG_VERBOSE
71     MNN_PRINT("end ~CUDARuntime !\n");
72 #endif
73 }
74 
blocks_num(const int total_threads)75 int CUDARuntime::blocks_num(const int total_threads) {
76     int maxNum = mProp.maxThreadsPerBlock;
77     if(total_threads / 32 > maxNum) {
78         mThreadPerBlock = maxNum;
79     } else if(total_threads / 16 > maxNum) {
80         mThreadPerBlock = maxNum / 2;
81     } else if(total_threads / 8 > maxNum) {
82         mThreadPerBlock = maxNum / 4;
83     } else if(total_threads / 4 > maxNum) {
84         mThreadPerBlock = maxNum / 8;
85     } else {
86         mThreadPerBlock = 128;
87     }
88     return (total_threads + mThreadPerBlock - 1) / mThreadPerBlock;
89 }
90 
isSupportedFP16() const91 bool CUDARuntime::isSupportedFP16() const {
92     return mIsSupportedFP16;
93 }
94 
isSupportedDotInt8() const95 bool CUDARuntime::isSupportedDotInt8() const {
96     return mSupportDotInt8;
97 }
98 
isSupportedDotAccInt8() const99 bool CUDARuntime::isSupportedDotAccInt8() const {
100     return mSupportDotAccInt8;
101 }
102 
mem_alignment_in_bytes() const103 size_t CUDARuntime::mem_alignment_in_bytes() const {
104     return std::max(mProp.textureAlignment, mProp.texturePitchAlignment);
105 }
106 
device_id() const107 int CUDARuntime::device_id() const {
108     return mDeviceId;
109 }
110 
activate()111 void CUDARuntime::activate() {
112     int id = device_id();
113     if (id >= 0) {
114         cuda_check(cudaSetDevice(id));
115     }
116 }
117 
alloc(size_t size_in_bytes)118 void *CUDARuntime::alloc(size_t size_in_bytes) {
119     void *ptr = nullptr;
120     cuda_check(cudaMalloc(&ptr, size_in_bytes));
121     MNN_ASSERT(nullptr != ptr);
122     return ptr;
123 }
124 
free(void * ptr)125 void CUDARuntime::free(void *ptr) {
126     cuda_check(cudaFree(ptr));
127 }
128 
memcpy(void * dst,const void * src,size_t size_in_bytes,MNNMemcpyKind_t kind,bool sync)129 void CUDARuntime::memcpy(void *dst, const void *src, size_t size_in_bytes, MNNMemcpyKind_t kind, bool sync) {
130     cudaMemcpyKind cuda_kind;
131     switch (kind) {
132         case MNNMemcpyDeviceToHost:
133             cuda_kind = cudaMemcpyDeviceToHost;
134             break;
135         case MNNMemcpyHostToDevice:
136             cuda_kind = cudaMemcpyHostToDevice;
137             break;
138         case MNNMemcpyDeviceToDevice:
139             cuda_kind = cudaMemcpyDeviceToDevice;
140             break;
141         default:
142             MNN_ERROR("bad cuda memcpy kind\n");
143     }
144     //TODO, support Async Afterwards
145     cuda_check(cudaMemcpy(dst, src, size_in_bytes, cuda_kind));
146 }
147 
memset(void * dst,int value,size_t size_in_bytes)148 void CUDARuntime::memset(void *dst, int value, size_t size_in_bytes) {
149     cuda_check(cudaMemset(dst, value, size_in_bytes));
150 }
151 
cublas_handle()152 cublasHandle_t CUDARuntime::cublas_handle() {
153     return mCublasHandle;
154 }
155 
cudnn_handle()156 cudnnHandle_t CUDARuntime::cudnn_handle() {
157     return mCudnnHandle;
158 }
159 
160 } // namespace MNN
161