1 /*******************************************************************************
2 * Copyright 2020-2021 Intel Corporation
3 * Copyright 2020 FUJITSU LIMITED
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *******************************************************************************/
17 
18 #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
19 #include <algorithm>
20 
21 #if defined(_WIN32)
22 #include <windows.h>
23 #elif defined(__GLIBC__)
24 #include <sched.h>
25 #endif
26 #endif
27 
28 #include "cpu/platform.hpp"
29 
30 #if DNNL_X64
31 #include "cpu/x64/cpu_isa_traits.hpp"
32 #elif DNNL_AARCH64
33 #include "cpu/aarch64/cpu_isa_traits.hpp"
34 #endif
35 
36 // For DNNL_X64 build we compute the timestamp using rdtsc. Use std::chrono for
37 // other builds.
38 #if !DNNL_X64
39 #include <chrono>
40 #endif
41 
42 namespace dnnl {
43 namespace impl {
44 namespace cpu {
45 namespace platform {
46 
get_isa_info()47 const char *get_isa_info() {
48 #if DNNL_X64
49     return x64::get_isa_info();
50 #elif DNNL_AARCH64
51     return aarch64::get_isa_info();
52 #else
53     return "Generic";
54 #endif
55 }
56 
get_effective_cpu_isa()57 dnnl_cpu_isa_t get_effective_cpu_isa() {
58 #if DNNL_X64
59     return x64::get_effective_cpu_isa();
60 #elif DNNL_AARCH64
61     return aarch64::get_effective_cpu_isa();
62 #else
63     return dnnl_cpu_isa_all;
64 #endif
65 }
66 
set_max_cpu_isa(dnnl_cpu_isa_t isa)67 status_t set_max_cpu_isa(dnnl_cpu_isa_t isa) {
68 #if DNNL_X64
69     return x64::set_max_cpu_isa(isa);
70 #else
71     return status::unimplemented;
72 #endif
73 }
74 
set_cpu_isa_hints(dnnl_cpu_isa_hints_t isa_hints)75 status_t set_cpu_isa_hints(dnnl_cpu_isa_hints_t isa_hints) {
76 #if DNNL_X64
77     return x64::set_cpu_isa_hints(isa_hints);
78 #else
79     return status::unimplemented;
80 #endif
81 }
82 
get_cpu_isa_hints()83 dnnl_cpu_isa_hints_t get_cpu_isa_hints() {
84 #if DNNL_X64
85     return x64::get_cpu_isa_hints();
86 #else
87     return dnnl_cpu_isa_no_hints;
88 #endif
89 }
90 
prefer_ymm_requested()91 bool prefer_ymm_requested() {
92 #if DNNL_X64
93     const bool prefer_ymm = x64::get_cpu_isa_hints() == dnnl_cpu_isa_prefer_ymm;
94     return prefer_ymm;
95 #else
96     return false;
97 #endif
98 }
99 
has_data_type_support(data_type_t data_type)100 bool has_data_type_support(data_type_t data_type) {
101     switch (data_type) {
102         case data_type::bf16:
103 #if DNNL_X64
104             return x64::mayiuse(x64::avx512_core);
105 #else
106             return false;
107 #endif
108         case data_type::f16: return false;
109         default: return true;
110     }
111 }
112 
s8s8_weights_scale_factor()113 float s8s8_weights_scale_factor() {
114 #if DNNL_X64
115     return x64::mayiuse(x64::avx512_core_vnni) ? 1.0f : 0.5f;
116 #else
117     return 1.0f;
118 #endif
119 }
120 
get_per_core_cache_size(int level)121 unsigned get_per_core_cache_size(int level) {
122     auto guess = [](int level) {
123         switch (level) {
124             case 1: return 32U * 1024;
125             case 2: return 512U * 1024;
126             case 3: return 1024U * 1024;
127             default: return 0U;
128         }
129     };
130 
131 #if DNNL_X64
132     using namespace x64;
133     if (cpu().getDataCacheLevels() == 0) return guess(level);
134 
135     if (level > 0 && (unsigned)level <= cpu().getDataCacheLevels()) {
136         unsigned l = level - 1;
137         return cpu().getDataCacheSize(l) / cpu().getCoresSharingDataCache(l);
138     } else
139         return 0;
140 #else
141     return guess(level);
142 #endif
143 }
144 
get_num_cores()145 unsigned get_num_cores() {
146 #if DNNL_X64
147     return x64::cpu().getNumCores(Xbyak::util::CoreLevel);
148 #else
149     return 1;
150 #endif
151 }
152 
153 #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
154 // The purpose of this function is to return the potential maximum number of
155 // threads in user's threadpool. It is assumed that the number of threads in an
156 // actual threadpool will not exceed the number cores in a socket reported by
157 // the OS, which may or may not be equal to the number of total physical cores
158 // in a socket depending on the OS configuration (read -- VM environment). In
159 // order to simulate the number of cores available in such environment, this
160 // function supports process affinity.
get_max_threads_to_use()161 unsigned get_max_threads_to_use() {
162     int num_cores_per_socket = (int)dnnl::impl::cpu::platform::get_num_cores();
163 #if defined(_WIN32)
164     DWORD_PTR proc_affinity_mask;
165     DWORD_PTR sys_affinity_mask;
166     if (GetProcessAffinityMask(
167                 GetCurrentProcess(), &proc_affinity_mask, &sys_affinity_mask)) {
168         int masked_nthr = 0;
169         for (int i = 0; i < CHAR_BIT * sizeof(proc_affinity_mask);
170                 i++, proc_affinity_mask >>= 1)
171             masked_nthr += proc_affinity_mask & 1;
172         return std::min(masked_nthr, num_cores_per_socket);
173     }
174 #elif defined(__GLIBC__)
175     cpu_set_t cpu_set;
176     // Check if the affinity of the process has been set using, e.g.,
177     // numactl.
178     if (::sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set) == 0)
179         return std::min(CPU_COUNT(&cpu_set), num_cores_per_socket);
180 #endif
181     return num_cores_per_socket;
182 }
183 #endif
184 
get_vector_register_size()185 int get_vector_register_size() {
186 #if DNNL_X64
187     using namespace x64;
188     if (mayiuse(avx512_common)) return cpu_isa_traits<avx512_common>::vlen;
189     if (mayiuse(avx)) return cpu_isa_traits<avx>::vlen;
190     if (mayiuse(sse41)) return cpu_isa_traits<sse41>::vlen;
191 #elif DNNL_AARCH64
192     using namespace aarch64;
193     if (mayiuse(asimd)) return cpu_isa_traits<asimd>::vlen;
194     if (mayiuse(sve_512)) return cpu_isa_traits<sve_512>::vlen;
195 #endif
196     return 0;
197 }
198 
199 /* The purpose of this function is to provide a very efficient timestamp
200  * calculation (used primarily for primitive cache). For DNNL_X64, this can be
201  * accomplished using *rdtsc* since it provides a timestamp value that (i) is
202  * independent for each core, and (ii) is synchronized across cores in multiple
203  * sockets.
204  * TODO: For now, use std::chrono::steady_clock for other builds, however
205  * another more optimized function may be called here.
206  */
get_timestamp()207 size_t get_timestamp() {
208 #if DNNL_X64
209     return static_cast<size_t>(Xbyak::util::Clock::getRdtsc());
210 #else
211     return static_cast<size_t>(
212             std::chrono::steady_clock::now().time_since_epoch().count());
213 #endif
214 }
215 
216 } // namespace platform
217 } // namespace cpu
218 } // namespace impl
219 } // namespace dnnl
220