1 /*******************************************************************************
2 * Copyright 2020-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #include <mutex>
18 #include <thread>
19
20 #include "gpu/compute/device_info.hpp"
21
22 #include "common/verbose.hpp"
23 #include "gpu/jit/binary_format.hpp"
24
25 #ifdef DNNL_WITH_SYCL
26 #include "sycl/sycl_engine_base.hpp"
27 #endif
28
29 namespace dnnl {
30 namespace impl {
31 namespace gpu {
32 namespace compute {
33
get_future_extensions(compute::gpu_arch_t gpu_arch)34 uint64_t get_future_extensions(compute::gpu_arch_t gpu_arch) {
35 using namespace compute;
36
37 uint64_t extensions = 0;
38 switch (gpu_arch) {
39 case gpu_arch_t::xe_lp:
40 extensions |= (uint64_t)device_ext_t::intel_dot_accumulate;
41 break;
42 default: break;
43 }
44 return extensions;
45 }
46
str2gpu_arch(const char * str)47 inline gpu_arch_t str2gpu_arch(const char *str) {
48 #define CASE(_case) \
49 if (!strcmp(STRINGIFY(_case), str)) return gpu_arch_t::_case
50
51 CASE(gen9);
52 CASE(xe_lp);
53 return gpu_arch_t::unknown;
54 #undef CASE
55 }
56
mayiuse_ngen_kernels(engine_t * engine)57 bool device_info_t::mayiuse_ngen_kernels(engine_t *engine) {
58 static std::mutex m;
59 std::lock_guard<std::mutex> guard(m);
60
61 if (checked_ngen_kernels_) return mayiuse_ngen_kernels_;
62
63 auto status
64 = jit::gpu_supports_binary_format(&mayiuse_ngen_kernels_, engine);
65 if (status != status::success) mayiuse_ngen_kernels_ = false;
66
67 if (get_verbose())
68 printf("dnnl_verbose,info,gpu,binary_kernels:%s\n",
69 mayiuse_ngen_kernels_ ? "enabled" : "disabled");
70
71 checked_ngen_kernels_ = true;
72
73 return mayiuse_ngen_kernels_;
74 }
75
init_attributes_common(engine_t * engine)76 status_t device_info_t::init_attributes_common(engine_t *engine) {
77 // TODO: Fix for discrete GPUs. The code below is written for
78 // integrated GPUs assuming that last-level cache for GPU is shared
79 // with CPU.
80 // Integrated GPUs share LLC with CPU which is L3 cache on CPU.
81
82 // XXX: this is the only place where GPU runtime functionally depends on
83 // CPU runtime. The `llc_cache_size_` is used only in one kernel for gen9.
84 // The idea is to use approximate cache size.
85
86 // llc_cache_size_ = cpu::platform::get_per_core_cache_size(3)
87 // * cpu::platform::get_num_cores();
88 // Assumption is that HT is likely enabled on client systems.
89 llc_cache_size_ = std::thread::hardware_concurrency() * (1 << 20);
90
91 // Assume 7 threads by default
92 int32_t threads_per_eu = 7;
93 switch (gpu_arch_) {
94 case gpu::compute::gpu_arch_t::gen9:
95 case gpu::compute::gpu_arch_t::xe_lp: threads_per_eu = 7; break;
96 default: break;
97 }
98
99 hw_threads_ = eu_count_ * threads_per_eu;
100
101 mayiuse_non_uniform_work_groups_ = true;
102 #ifdef DNNL_WITH_SYCL
103 if (engine->runtime_kind() == runtime_kind::sycl) {
104 auto *sycl_engine
105 = utils::downcast<const sycl::sycl_engine_base_t *>(engine);
106 // Level Zero backend does not support non-uniform work-groups.
107 mayiuse_non_uniform_work_groups_
108 = (sycl_engine->backend() == sycl::backend_t::opencl);
109 }
110 #endif
111
112 return status::success;
113 }
114
115 } // namespace compute
116 } // namespace gpu
117 } // namespace impl
118 } // namespace dnnl
119