1 /******************************************************************************* 2 * Copyright 2019-2021 Intel Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 *******************************************************************************/ 16 17 #ifndef GPU_COMPUTE_COMPUTE_ENGINE_HPP 18 #define GPU_COMPUTE_COMPUTE_ENGINE_HPP 19 20 #include <cassert> 21 #include <memory> 22 #include <vector> 23 #include <initializer_list> 24 25 #include "common/c_types_map.hpp" 26 #include "common/engine.hpp" 27 #include "common/primitive.hpp" 28 #include "common/primitive_iterator.hpp" 29 #include "common/verbose.hpp" 30 #include "gpu/compute/device_info.hpp" 31 #include "gpu/compute/dispatch.hpp" 32 #include "gpu/compute/kernel.hpp" 33 #include "gpu/compute/kernel_ctx.hpp" 34 #include "gpu/jit/jit_generator_base.hpp" 35 36 namespace dnnl { 37 namespace impl { 38 namespace gpu { 39 namespace compute { 40 41 class compute_engine_t : public engine_t { 42 public: compute_engine_t(engine_kind_t kind,runtime_kind_t runtime_kind,size_t index)43 compute_engine_t( 44 engine_kind_t kind, runtime_kind_t runtime_kind, size_t index) 45 : engine_t(kind, runtime_kind, index) {} 46 47 virtual status_t init(); 48 device_info() const49 const device_info_t *device_info() const { return device_info_.get(); } 50 create_kernel(kernel_t * kernel,const char * kernel_name,const kernel_ctx_t & kernel_ctx) const51 status_t create_kernel(kernel_t *kernel, const char *kernel_name, 52 const kernel_ctx_t &kernel_ctx) const { 53 54 std::vector<kernel_t> kernels(1); 55 auto status = create_kernels(&kernels, {kernel_name}, kernel_ctx); 56 if (status == status::success) *kernel = kernels[0]; 57 return status; 58 } 59 60 virtual status_t create_kernel(compute::kernel_t *kernel, 61 jit::jit_generator_base &jitter) const = 0; 62 63 virtual status_t create_kernels(std::vector<compute::kernel_t> *kernels, 64 const std::vector<const char *> &kernel_names, 65 const compute::kernel_ctx_t &kernel_ctx) const = 0; 66 create_kernels_from_ocl_source(std::vector<compute::kernel_t> * kernels,const std::vector<const char * > & kernel_names,const char * source_string,const compute::kernel_ctx_t & kernel_ctx) const67 virtual status_t create_kernels_from_ocl_source( 68 std::vector<compute::kernel_t> *kernels, 69 const std::vector<const char *> &kernel_names, 70 const char *source_string, 71 const compute::kernel_ctx_t &kernel_ctx) const { 72 assert(!"unexpected"); 73 return status::success; 74 }; 75 get_zero_pad_primitive(primitive_t * & result,const resource_mapper_t * & resources)76 status_t get_zero_pad_primitive( 77 primitive_t *&result, const resource_mapper_t *&resources) { 78 std::call_once(zero_pad_init_, [&]() -> void { 79 zero_pad_desc_t desc; 80 desc.primitive_kind = primitive_kind::zero_pad; 81 dnnl_primitive_desc_iterator it( 82 this, (op_desc_t *)&desc, nullptr, nullptr); 83 std::shared_ptr<primitive_desc_t> zero_pad_pd(*(++it)); 84 if (zero_pad_pd == nullptr) return; 85 86 status_t status 87 = zero_pad_pd->create_primitive(zero_pad_primitive_, this); 88 #ifndef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE 89 if (status == status::success) { 90 status = zero_pad_primitive_->create_resource( 91 this, zero_pad_resources_); 92 } 93 #endif 94 if (status != status::success) { zero_pad_primitive_.reset(); } 95 }); 96 97 result = zero_pad_primitive_.get(); 98 resources = &zero_pad_resources_; 99 return result != nullptr ? status::success : status::unimplemented; 100 }; 101 mayiuse(device_ext_t ext) const102 bool mayiuse(device_ext_t ext) const { return device_info_->has(ext); } 103 is_gen9() const104 bool is_gen9() const { 105 return device_info_->gpu_arch() == gpu_arch_t::gen9; 106 } is_xe_lp() const107 bool is_xe_lp() const { 108 return device_info_->gpu_arch() == gpu_arch_t::xe_lp; 109 } is_xe_hp() const110 bool is_xe_hp() const { 111 return device_info_->gpu_arch() == gpu_arch_t::xe_hp; 112 } is_xe_hpg() const113 bool is_xe_hpg() const { 114 return device_info_->gpu_arch() == gpu_arch_t::xe_hpg; 115 } is_xe_hpc() const116 bool is_xe_hpc() const { 117 return device_info_->gpu_arch() == gpu_arch_t::xe_hpc; 118 } mayiuse_ngen_kernels()119 bool mayiuse_ngen_kernels() { 120 return device_info_->mayiuse_ngen_kernels(this); 121 } mayiuse_non_uniform_work_groups() const122 bool mayiuse_non_uniform_work_groups() const { 123 return device_info_->mayiuse_non_uniform_work_groups(); 124 } mayiuse_sub_group(int size) const125 bool mayiuse_sub_group(int size) const { 126 return device_info_->mayiuse_sub_group(size); 127 } mayiuse_sub_group(std::initializer_list<int> sizes) const128 bool mayiuse_sub_group(std::initializer_list<int> sizes) const { 129 for (int size : sizes) 130 if (!mayiuse_sub_group(size)) return false; 131 return true; 132 } mayiuse_large_grf_mode() const133 bool mayiuse_large_grf_mode() const { 134 // XXX: XeHPG 128EU A0 causes hangs with large GRF mode. 135 if (is_xe_hpg() && device_info()->eu_count() == 128 136 && device_info()->stepping_id() == 0) 137 return false; 138 return device_info_->gpu_arch() >= compute::gpu_arch_t::xe_hp; 139 } 140 create_dispatch(const memory_desc_t * md=nullptr) const141 dispatch_t create_dispatch(const memory_desc_t *md = nullptr) const { 142 return dispatch_t(this, md); 143 } 144 get_service_stream(stream_t * & stream)145 status_t get_service_stream(stream_t *&stream) override { 146 status_t status = status::success; 147 if (service_stream_ == nullptr) { 148 const std::lock_guard<std::mutex> lock(service_stream_mutex_); 149 if (service_stream_ == nullptr) { 150 stream_t *service_stream_ptr; 151 status = create_stream( 152 &service_stream_ptr, stream_flags::default_flags); 153 if (status == status::success) 154 service_stream_.reset(service_stream_ptr); 155 } 156 } 157 stream = service_stream_.get(); 158 return status; 159 } 160 161 // non-blocking query to check if service stream is already created is_service_stream_created() const162 bool is_service_stream_created() const { return (bool)service_stream_; } 163 164 virtual std::function<void(void *)> get_program_list_deleter() const = 0; 165 166 protected: 167 virtual status_t init_device_info() = 0; 168 169 #ifdef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE 170 ~compute_engine_t() override = default; 171 #endif 172 173 std::shared_ptr<device_info_t> device_info_; 174 175 private: 176 // Implement a zero_pad_primitive shared across the engine. The purpose is 177 // to prevent extra overhead associated with creating zero_pad_primitives 178 // for different inputs as ideally the zero_pad operations fast relative to 179 // the time to create the primitive. 180 std::shared_ptr<primitive_t> zero_pad_primitive_; 181 resource_mapper_t zero_pad_resources_; 182 std::once_flag zero_pad_init_; 183 std::unique_ptr<stream_t> service_stream_; 184 std::mutex service_stream_mutex_; 185 }; 186 187 } // namespace compute 188 } // namespace gpu 189 } // namespace impl 190 } // namespace dnnl 191 192 // Exported for testing purposes only. 193 extern "C" bool DNNL_API dnnl_impl_gpu_mayiuse_ngen_kernels( 194 dnnl::impl::engine_t *engine); 195 196 #endif 197