1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #include <CL/cl.h>
18
19 #include "gpu/ocl/ocl_gpu_engine.hpp"
20
21 #include "common/type_helpers.hpp"
22 #include "common/utils.hpp"
23 #include "gpu/compute/kernel_list.hpp"
24 #include "gpu/ocl/kernel_utils.hpp"
25 #include "gpu/ocl/ocl_gpu_device_info.hpp"
26 #include "gpu/ocl/ocl_gpu_engine.hpp"
27 #include "gpu/ocl/ocl_memory_storage.hpp"
28 #include "gpu/ocl/ocl_stream.hpp"
29 #include "gpu/ocl/ocl_utils.hpp"
30
31 namespace dnnl {
32 namespace impl {
33 namespace gpu {
34 namespace ocl {
35
init()36 status_t ocl_gpu_engine_t::init() {
37 cl_int err = CL_SUCCESS;
38 err = clRetainDevice(device_);
39 if (err != CL_SUCCESS) {
40 device_ = nullptr;
41 context_ = nullptr;
42 }
43
44 OCL_CHECK(err);
45
46 if (is_user_context_) {
47 err = clRetainContext(context_);
48 if (err != CL_SUCCESS) context_ = nullptr;
49 } else {
50 context_
51 = clCreateContext(nullptr, 1, &device_, nullptr, nullptr, &err);
52 }
53
54 OCL_CHECK(err);
55
56 CHECK(check_device(engine_kind::gpu, device_, context_));
57 compute::compute_engine_t::init();
58
59 return status::success;
60 }
61
create_memory_storage(memory_storage_t ** storage,unsigned flags,size_t size,void * handle)62 status_t ocl_gpu_engine_t::create_memory_storage(
63 memory_storage_t **storage, unsigned flags, size_t size, void *handle) {
64 auto _storage = new ocl_memory_storage_t(this);
65 if (_storage == nullptr) return status::out_of_memory;
66 status_t status = _storage->init(flags, size, handle);
67 if (status != status::success) {
68 delete _storage;
69 return status;
70 }
71 *storage = _storage;
72 return status::success;
73 }
74
create_stream(stream_t ** stream,unsigned flags)75 status_t ocl_gpu_engine_t::create_stream(stream_t **stream, unsigned flags) {
76 return ocl_stream_t::create_stream(stream, this, flags);
77 }
78
create_stream(stream_t ** stream,cl_command_queue queue)79 status_t ocl_gpu_engine_t::create_stream(
80 stream_t **stream, cl_command_queue queue) {
81 return ocl_stream_t::create_stream(stream, this, queue);
82 }
83
count_lines(const char ** code)84 cl_uint count_lines(const char **code) {
85 cl_uint i = 0;
86 while (*code) {
87 i++;
88 code++;
89 }
90 return i;
91 }
92
create_kernel(compute::kernel_t * kernel,jit::jit_generator_base & jitter) const93 status_t ocl_gpu_engine_t::create_kernel(
94 compute::kernel_t *kernel, jit::jit_generator_base &jitter) const {
95
96 auto binary = jitter.get_binary(context(), device());
97 auto kernel_name = jitter.kernel_name();
98
99 ocl_wrapper_t<cl_kernel> ocl_kernel
100 = jitter.get_kernel(context(), device());
101 std::vector<gpu::compute::scalar_type_t> arg_types;
102 CHECK(get_kernel_arg_types(ocl_kernel, &arg_types));
103
104 auto shared_binary = std::make_shared<gpu::compute::binary_t>(binary);
105
106 *kernel = compute::kernel_t(
107 new ocl_gpu_kernel_t(shared_binary, kernel_name, arg_types));
108 dump_kernel_binary(this, *kernel);
109
110 return status::success;
111 }
112
create_kernels(std::vector<compute::kernel_t> * kernels,const std::vector<const char * > & kernel_names,const compute::kernel_ctx_t & kernel_ctx) const113 status_t ocl_gpu_engine_t::create_kernels(
114 std::vector<compute::kernel_t> *kernels,
115 const std::vector<const char *> &kernel_names,
116 const compute::kernel_ctx_t &kernel_ctx) const {
117
118 *kernels = std::vector<compute::kernel_t>(kernel_names.size());
119 compute::kernel_list_t kernel_list;
120 for (size_t i = 0; i < kernels->size(); ++i) {
121 if (kernel_names[i]) kernel_list.add(kernel_names[i], &(*kernels)[i]);
122 }
123
124 return ocl::create_kernels(this, kernel_list, kernel_ctx);
125 }
126
get_program_binaries(cl_program program,std::shared_ptr<compute::binary_t> & binary)127 static status_t get_program_binaries(
128 cl_program program, std::shared_ptr<compute::binary_t> &binary) {
129
130 // Get the size of the program binary in bytes.
131 size_t binary_size = 0;
132 cl_int err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
133 sizeof(binary_size), &binary_size, nullptr);
134 OCL_CHECK(err);
135
136 // Binary is not available for the device.
137 if (binary_size == 0) return status::runtime_error;
138
139 // Get program binary.
140 binary = std::make_shared<compute::binary_t>(binary_size);
141 unsigned char *binary_buffer = binary->data();
142 err = clGetProgramInfo(
143 program, CL_PROGRAM_BINARIES, binary_size, &binary_buffer, nullptr);
144 OCL_CHECK(err);
145
146 return status::success;
147 }
148
create_kernels_from_ocl_source(std::vector<compute::kernel_t> * kernels,const std::vector<const char * > & kernel_names,const char ** code_strings,const compute::kernel_ctx_t & kernel_ctx) const149 status_t ocl_gpu_engine_t::create_kernels_from_ocl_source(
150 std::vector<compute::kernel_t> *kernels,
151 const std::vector<const char *> &kernel_names,
152 const char **code_strings,
153 const compute::kernel_ctx_t &kernel_ctx) const {
154 std::string options = kernel_ctx.options();
155
156 // XXX: Update options by adding macros for OpenCL extensions that are not
157 // handled properly by the OpenCL runtime
158 auto *dev_info
159 = utils::downcast<const ocl_gpu_device_info_t *>(device_info());
160 options += " " + dev_info->get_cl_ext_options();
161
162 cl_int err;
163 cl_program program = clCreateProgramWithSource(
164 context(), count_lines(code_strings), code_strings, nullptr, &err);
165 OCL_CHECK(err);
166
167 cl_device_id dev = device();
168 err = clBuildProgram(program, 1, &dev, options.c_str(), nullptr, nullptr);
169 if (err != CL_SUCCESS) {
170 // Return error if verbose is not enabled.
171 if (get_verbose() == 0) OCL_CHECK(err);
172
173 size_t log_length = 0;
174 err = clGetProgramBuildInfo(
175 program, dev, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_length);
176 assert(err == CL_SUCCESS);
177
178 std::vector<char> log_buf(log_length);
179 err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
180 log_length, log_buf.data(), nullptr);
181 assert(err == CL_SUCCESS);
182 printf("Error during the build of OpenCL program.\nBuild "
183 "log:\n%s\n",
184 log_buf.data());
185 OCL_CHECK(err);
186 }
187
188 std::shared_ptr<compute::binary_t> shared_binary;
189 CHECK(get_program_binaries(program, shared_binary));
190
191 *kernels = std::vector<compute::kernel_t>(kernel_names.size());
192 for (size_t i = 0; i < kernel_names.size(); ++i) {
193 cl_int err;
194 ocl_wrapper_t<cl_kernel> ocl_kernel
195 = clCreateKernel(program, kernel_names[i], &err);
196 OCL_CHECK(err);
197 std::vector<gpu::compute::scalar_type_t> arg_types;
198 CHECK(get_kernel_arg_types(ocl_kernel, &arg_types));
199
200 (*kernels)[i] = compute::kernel_t(new ocl_gpu_kernel_t(
201 shared_binary, kernel_names[i], arg_types));
202 dump_kernel_binary(this, (*kernels)[i]);
203 }
204
205 OCL_CHECK(clReleaseProgram(program));
206 return status::success;
207 }
208
get_program_list_deleter() const209 std::function<void(void *)> ocl_gpu_engine_t::get_program_list_deleter() const {
210 return [](void *p) {
211 cl_int err = clReleaseProgram(reinterpret_cast<cl_program>(p));
212 assert(err == 0);
213 MAYBE_UNUSED(err);
214 };
215 }
216
init_device_info()217 status_t ocl_gpu_engine_t::init_device_info() {
218 device_info_ = std::make_shared<ocl_gpu_device_info_t>();
219 CHECK(device_info_->init(this));
220 return status::success;
221 }
222
223 } // namespace ocl
224 } // namespace gpu
225 } // namespace impl
226 } // namespace dnnl
227