1 /*******************************************************************************
2 * Copyright 2020-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #include <mutex>
18 #include <thread>
19 
20 #include "gpu/compute/device_info.hpp"
21 
22 #include "common/verbose.hpp"
23 #include "gpu/jit/binary_format.hpp"
24 
25 #ifdef DNNL_WITH_SYCL
26 #include "sycl/sycl_engine_base.hpp"
27 #endif
28 
29 namespace dnnl {
30 namespace impl {
31 namespace gpu {
32 namespace compute {
33 
get_future_extensions(compute::gpu_arch_t gpu_arch)34 uint64_t get_future_extensions(compute::gpu_arch_t gpu_arch) {
35     using namespace compute;
36 
37     uint64_t extensions = 0;
38     switch (gpu_arch) {
39         case gpu_arch_t::xe_lp:
40             extensions |= (uint64_t)device_ext_t::intel_dot_accumulate;
41             break;
42         default: break;
43     }
44     return extensions;
45 }
46 
str2gpu_arch(const char * str)47 inline gpu_arch_t str2gpu_arch(const char *str) {
48 #define CASE(_case) \
49     if (!strcmp(STRINGIFY(_case), str)) return gpu_arch_t::_case
50 
51     CASE(gen9);
52     CASE(xe_lp);
53     return gpu_arch_t::unknown;
54 #undef CASE
55 }
56 
mayiuse_ngen_kernels(engine_t * engine)57 bool device_info_t::mayiuse_ngen_kernels(engine_t *engine) {
58     static std::mutex m;
59     std::lock_guard<std::mutex> guard(m);
60 
61     if (checked_ngen_kernels_) return mayiuse_ngen_kernels_;
62 
63     auto status
64             = jit::gpu_supports_binary_format(&mayiuse_ngen_kernels_, engine);
65     if (status != status::success) mayiuse_ngen_kernels_ = false;
66 
67     if (get_verbose())
68         printf("dnnl_verbose,info,gpu,binary_kernels:%s\n",
69                 mayiuse_ngen_kernels_ ? "enabled" : "disabled");
70 
71     checked_ngen_kernels_ = true;
72 
73     return mayiuse_ngen_kernels_;
74 }
75 
init_attributes_common(engine_t * engine)76 status_t device_info_t::init_attributes_common(engine_t *engine) {
77     // TODO: Fix for discrete GPUs. The code below is written for
78     // integrated GPUs assuming that last-level cache for GPU is shared
79     // with CPU.
80     // Integrated GPUs share LLC with CPU which is L3 cache on CPU.
81 
82     // XXX: this is the only place where GPU runtime functionally depends on
83     // CPU runtime. The `llc_cache_size_` is used only in one kernel for gen9.
84     // The idea is to use approximate cache size.
85 
86     // llc_cache_size_ = cpu::platform::get_per_core_cache_size(3)
87     //        * cpu::platform::get_num_cores();
88     // Assumption is that HT is likely enabled on client systems.
89     llc_cache_size_ = std::thread::hardware_concurrency() * (1 << 20);
90 
91     // Assume 7 threads by default
92     int32_t threads_per_eu = 7;
93     switch (gpu_arch_) {
94         case gpu::compute::gpu_arch_t::gen9:
95         case gpu::compute::gpu_arch_t::xe_lp: threads_per_eu = 7; break;
96         default: break;
97     }
98 
99     hw_threads_ = eu_count_ * threads_per_eu;
100 
101     mayiuse_non_uniform_work_groups_ = true;
102 #ifdef DNNL_WITH_SYCL
103     if (engine->runtime_kind() == runtime_kind::sycl) {
104         auto *sycl_engine
105                 = utils::downcast<const sycl::sycl_engine_base_t *>(engine);
106         // Level Zero backend does not support non-uniform work-groups.
107         mayiuse_non_uniform_work_groups_
108                 = (sycl_engine->backend() == sycl::backend_t::opencl);
109     }
110 #endif
111 
112     return status::success;
113 }
114 
115 } // namespace compute
116 } // namespace gpu
117 } // namespace impl
118 } // namespace dnnl
119