1 /*******************************************************************************
2 * Copyright 2019-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #include <CL/cl.h>
18 
19 #include "gpu/ocl/ocl_gpu_engine.hpp"
20 
21 #include "common/type_helpers.hpp"
22 #include "common/utils.hpp"
23 #include "gpu/compute/kernel_list.hpp"
24 #include "gpu/ocl/kernel_utils.hpp"
25 #include "gpu/ocl/ocl_gpu_device_info.hpp"
26 #include "gpu/ocl/ocl_gpu_engine.hpp"
27 #include "gpu/ocl/ocl_memory_storage.hpp"
28 #include "gpu/ocl/ocl_stream.hpp"
29 #include "gpu/ocl/ocl_utils.hpp"
30 
31 namespace dnnl {
32 namespace impl {
33 namespace gpu {
34 namespace ocl {
35 
init()36 status_t ocl_gpu_engine_t::init() {
37     cl_int err = CL_SUCCESS;
38     err = clRetainDevice(device_);
39     if (err != CL_SUCCESS) {
40         device_ = nullptr;
41         context_ = nullptr;
42     }
43 
44     OCL_CHECK(err);
45 
46     if (is_user_context_) {
47         err = clRetainContext(context_);
48         if (err != CL_SUCCESS) context_ = nullptr;
49     } else {
50         context_
51                 = clCreateContext(nullptr, 1, &device_, nullptr, nullptr, &err);
52     }
53 
54     OCL_CHECK(err);
55 
56     CHECK(check_device(engine_kind::gpu, device_, context_));
57     compute::compute_engine_t::init();
58 
59     return status::success;
60 }
61 
create_memory_storage(memory_storage_t ** storage,unsigned flags,size_t size,void * handle)62 status_t ocl_gpu_engine_t::create_memory_storage(
63         memory_storage_t **storage, unsigned flags, size_t size, void *handle) {
64     auto _storage = new ocl_memory_storage_t(this);
65     if (_storage == nullptr) return status::out_of_memory;
66     status_t status = _storage->init(flags, size, handle);
67     if (status != status::success) {
68         delete _storage;
69         return status;
70     }
71     *storage = _storage;
72     return status::success;
73 }
74 
create_stream(stream_t ** stream,unsigned flags)75 status_t ocl_gpu_engine_t::create_stream(stream_t **stream, unsigned flags) {
76     return ocl_stream_t::create_stream(stream, this, flags);
77 }
78 
create_stream(stream_t ** stream,cl_command_queue queue)79 status_t ocl_gpu_engine_t::create_stream(
80         stream_t **stream, cl_command_queue queue) {
81     return ocl_stream_t::create_stream(stream, this, queue);
82 }
83 
count_lines(const char ** code)84 cl_uint count_lines(const char **code) {
85     cl_uint i = 0;
86     while (*code) {
87         i++;
88         code++;
89     }
90     return i;
91 }
92 
create_kernel(compute::kernel_t * kernel,jit::jit_generator_base & jitter) const93 status_t ocl_gpu_engine_t::create_kernel(
94         compute::kernel_t *kernel, jit::jit_generator_base &jitter) const {
95 
96     auto binary = jitter.get_binary(context(), device());
97     auto kernel_name = jitter.kernel_name();
98 
99     ocl_wrapper_t<cl_kernel> ocl_kernel
100             = jitter.get_kernel(context(), device());
101     std::vector<gpu::compute::scalar_type_t> arg_types;
102     CHECK(get_kernel_arg_types(ocl_kernel, &arg_types));
103 
104     auto shared_binary = std::make_shared<gpu::compute::binary_t>(binary);
105 
106     *kernel = compute::kernel_t(
107             new ocl_gpu_kernel_t(shared_binary, kernel_name, arg_types));
108     dump_kernel_binary(this, *kernel);
109 
110     return status::success;
111 }
112 
create_kernels(std::vector<compute::kernel_t> * kernels,const std::vector<const char * > & kernel_names,const compute::kernel_ctx_t & kernel_ctx) const113 status_t ocl_gpu_engine_t::create_kernels(
114         std::vector<compute::kernel_t> *kernels,
115         const std::vector<const char *> &kernel_names,
116         const compute::kernel_ctx_t &kernel_ctx) const {
117 
118     *kernels = std::vector<compute::kernel_t>(kernel_names.size());
119     compute::kernel_list_t kernel_list;
120     for (size_t i = 0; i < kernels->size(); ++i) {
121         if (kernel_names[i]) kernel_list.add(kernel_names[i], &(*kernels)[i]);
122     }
123 
124     return ocl::create_kernels(this, kernel_list, kernel_ctx);
125 }
126 
get_program_binaries(cl_program program,std::shared_ptr<compute::binary_t> & binary)127 static status_t get_program_binaries(
128         cl_program program, std::shared_ptr<compute::binary_t> &binary) {
129 
130     // Get the size of the program binary in bytes.
131     size_t binary_size = 0;
132     cl_int err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
133             sizeof(binary_size), &binary_size, nullptr);
134     OCL_CHECK(err);
135 
136     // Binary is not available for the device.
137     if (binary_size == 0) return status::runtime_error;
138 
139     // Get program binary.
140     binary = std::make_shared<compute::binary_t>(binary_size);
141     unsigned char *binary_buffer = binary->data();
142     err = clGetProgramInfo(
143             program, CL_PROGRAM_BINARIES, binary_size, &binary_buffer, nullptr);
144     OCL_CHECK(err);
145 
146     return status::success;
147 }
148 
create_kernels_from_ocl_source(std::vector<compute::kernel_t> * kernels,const std::vector<const char * > & kernel_names,const char ** code_strings,const compute::kernel_ctx_t & kernel_ctx) const149 status_t ocl_gpu_engine_t::create_kernels_from_ocl_source(
150         std::vector<compute::kernel_t> *kernels,
151         const std::vector<const char *> &kernel_names,
152         const char **code_strings,
153         const compute::kernel_ctx_t &kernel_ctx) const {
154     std::string options = kernel_ctx.options();
155 
156     // XXX: Update options by adding macros for OpenCL extensions that are not
157     // handled properly by the OpenCL runtime
158     auto *dev_info
159             = utils::downcast<const ocl_gpu_device_info_t *>(device_info());
160     options += " " + dev_info->get_cl_ext_options();
161 
162     cl_int err;
163     cl_program program = clCreateProgramWithSource(
164             context(), count_lines(code_strings), code_strings, nullptr, &err);
165     OCL_CHECK(err);
166 
167     cl_device_id dev = device();
168     err = clBuildProgram(program, 1, &dev, options.c_str(), nullptr, nullptr);
169     if (err != CL_SUCCESS) {
170         // Return error if verbose is not enabled.
171         if (get_verbose() == 0) OCL_CHECK(err);
172 
173         size_t log_length = 0;
174         err = clGetProgramBuildInfo(
175                 program, dev, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_length);
176         assert(err == CL_SUCCESS);
177 
178         std::vector<char> log_buf(log_length);
179         err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
180                 log_length, log_buf.data(), nullptr);
181         assert(err == CL_SUCCESS);
182         printf("Error during the build of OpenCL program.\nBuild "
183                "log:\n%s\n",
184                 log_buf.data());
185         OCL_CHECK(err);
186     }
187 
188     std::shared_ptr<compute::binary_t> shared_binary;
189     CHECK(get_program_binaries(program, shared_binary));
190 
191     *kernels = std::vector<compute::kernel_t>(kernel_names.size());
192     for (size_t i = 0; i < kernel_names.size(); ++i) {
193         cl_int err;
194         ocl_wrapper_t<cl_kernel> ocl_kernel
195                 = clCreateKernel(program, kernel_names[i], &err);
196         OCL_CHECK(err);
197         std::vector<gpu::compute::scalar_type_t> arg_types;
198         CHECK(get_kernel_arg_types(ocl_kernel, &arg_types));
199 
200         (*kernels)[i] = compute::kernel_t(new ocl_gpu_kernel_t(
201                 shared_binary, kernel_names[i], arg_types));
202         dump_kernel_binary(this, (*kernels)[i]);
203     }
204 
205     OCL_CHECK(clReleaseProgram(program));
206     return status::success;
207 }
208 
get_program_list_deleter() const209 std::function<void(void *)> ocl_gpu_engine_t::get_program_list_deleter() const {
210     return [](void *p) {
211         cl_int err = clReleaseProgram(reinterpret_cast<cl_program>(p));
212         assert(err == 0);
213         MAYBE_UNUSED(err);
214     };
215 }
216 
init_device_info()217 status_t ocl_gpu_engine_t::init_device_info() {
218     device_info_ = std::make_shared<ocl_gpu_device_info_t>();
219     CHECK(device_info_->init(this));
220     return status::success;
221 }
222 
223 } // namespace ocl
224 } // namespace gpu
225 } // namespace impl
226 } // namespace dnnl
227