1 /*
2 * Copyright 2011-2013 Blender Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "util/util_system.h"
18
19 #include "util/util_logging.h"
20 #include "util/util_string.h"
21 #include "util/util_types.h"
22
23 #include <numaapi.h>
24
25 #include <OpenImageIO/sysutil.h>
26 OIIO_NAMESPACE_USING
27
28 #ifdef _WIN32
29 # if (!defined(FREE_WINDOWS))
30 # include <intrin.h>
31 # endif
32 # include "util_windows.h"
33 #elif defined(__APPLE__)
34 # include <sys/ioctl.h>
35 # include <sys/sysctl.h>
36 # include <sys/types.h>
37 #else
38 # include <sys/ioctl.h>
39 # include <unistd.h>
40 #endif
41
42 CCL_NAMESPACE_BEGIN
43
system_cpu_ensure_initialized()44 bool system_cpu_ensure_initialized()
45 {
46 static bool is_initialized = false;
47 static bool result = false;
48 if (is_initialized) {
49 return result;
50 }
51 is_initialized = true;
52 const NUMAAPI_Result numa_result = numaAPI_Initialize();
53 result = (numa_result == NUMAAPI_SUCCESS);
54 return result;
55 }
56
57 /* Fallback solution, which doesn't use NUMA/CPU groups. */
system_cpu_thread_count_fallback()58 static int system_cpu_thread_count_fallback()
59 {
60 #ifdef _WIN32
61 SYSTEM_INFO info;
62 GetSystemInfo(&info);
63 return info.dwNumberOfProcessors;
64 #elif defined(__APPLE__)
65 int count;
66 size_t len = sizeof(count);
67 int mib[2] = {CTL_HW, HW_NCPU};
68 sysctl(mib, 2, &count, &len, NULL, 0);
69 return count;
70 #else
71 return sysconf(_SC_NPROCESSORS_ONLN);
72 #endif
73 }
74
system_cpu_thread_count()75 int system_cpu_thread_count()
76 {
77 const int num_nodes = system_cpu_num_numa_nodes();
78 int num_threads = 0;
79 for (int node = 0; node < num_nodes; ++node) {
80 if (!system_cpu_is_numa_node_available(node)) {
81 continue;
82 }
83 num_threads += system_cpu_num_numa_node_processors(node);
84 }
85 return num_threads;
86 }
87
system_cpu_num_numa_nodes()88 int system_cpu_num_numa_nodes()
89 {
90 if (!system_cpu_ensure_initialized()) {
91 /* Fallback to a single node with all the threads. */
92 return 1;
93 }
94 return numaAPI_GetNumNodes();
95 }
96
system_cpu_is_numa_node_available(int node)97 bool system_cpu_is_numa_node_available(int node)
98 {
99 if (!system_cpu_ensure_initialized()) {
100 return true;
101 }
102 return numaAPI_IsNodeAvailable(node);
103 }
104
system_cpu_num_numa_node_processors(int node)105 int system_cpu_num_numa_node_processors(int node)
106 {
107 if (!system_cpu_ensure_initialized()) {
108 return system_cpu_thread_count_fallback();
109 }
110 return numaAPI_GetNumNodeProcessors(node);
111 }
112
system_cpu_run_thread_on_node(int node)113 bool system_cpu_run_thread_on_node(int node)
114 {
115 if (!system_cpu_ensure_initialized()) {
116 return true;
117 }
118 return numaAPI_RunThreadOnNode(node);
119 }
120
system_console_width()121 int system_console_width()
122 {
123 int columns = 0;
124
125 #ifdef _WIN32
126 CONSOLE_SCREEN_BUFFER_INFO csbi;
127 if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
128 columns = csbi.dwSize.X;
129 }
130 #else
131 struct winsize w;
132 if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
133 columns = w.ws_col;
134 }
135 #endif
136
137 return (columns > 0) ? columns : 80;
138 }
139
system_cpu_num_active_group_processors()140 int system_cpu_num_active_group_processors()
141 {
142 if (!system_cpu_ensure_initialized()) {
143 return system_cpu_thread_count_fallback();
144 }
145 return numaAPI_GetNumCurrentNodesProcessors();
146 }
147
148 #if !defined(_WIN32) || defined(FREE_WINDOWS)
__cpuid(int data[4],int selector)149 static void __cpuid(int data[4], int selector)
150 {
151 # if defined(__x86_64__)
152 asm("cpuid" : "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3]) : "a"(selector));
153 # elif defined(__i386__)
154 asm("pushl %%ebx \n\t"
155 "cpuid \n\t"
156 "movl %%ebx, %1 \n\t"
157 "popl %%ebx \n\t"
158 : "=a"(data[0]), "=r"(data[1]), "=c"(data[2]), "=d"(data[3])
159 : "a"(selector)
160 : "ebx");
161 # else
162 data[0] = data[1] = data[2] = data[3] = 0;
163 # endif
164 }
165 #endif
166
system_cpu_brand_string()167 string system_cpu_brand_string()
168 {
169 char buf[49] = {0};
170 int result[4] = {0};
171
172 __cpuid(result, 0x80000000);
173
174 if (result[0] >= (int)0x80000004) {
175 __cpuid((int *)(buf + 0), 0x80000002);
176 __cpuid((int *)(buf + 16), 0x80000003);
177 __cpuid((int *)(buf + 32), 0x80000004);
178
179 string brand = buf;
180
181 /* make it a bit more presentable */
182 brand = string_remove_trademark(brand);
183
184 return brand;
185 }
186
187 return "Unknown CPU";
188 }
189
system_cpu_bits()190 int system_cpu_bits()
191 {
192 return (sizeof(void *) * 8);
193 }
194
195 #if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)
196
197 struct CPUCapabilities {
198 bool x64;
199 bool mmx;
200 bool sse;
201 bool sse2;
202 bool sse3;
203 bool ssse3;
204 bool sse41;
205 bool sse42;
206 bool sse4a;
207 bool avx;
208 bool f16c;
209 bool avx2;
210 bool xop;
211 bool fma3;
212 bool fma4;
213 bool bmi1;
214 bool bmi2;
215 };
216
system_cpu_capabilities()217 static CPUCapabilities &system_cpu_capabilities()
218 {
219 static CPUCapabilities caps;
220 static bool caps_init = false;
221
222 if (!caps_init) {
223 int result[4], num;
224
225 memset(&caps, 0, sizeof(caps));
226
227 __cpuid(result, 0);
228 num = result[0];
229
230 if (num >= 1) {
231 __cpuid(result, 0x00000001);
232 caps.mmx = (result[3] & ((int)1 << 23)) != 0;
233 caps.sse = (result[3] & ((int)1 << 25)) != 0;
234 caps.sse2 = (result[3] & ((int)1 << 26)) != 0;
235 caps.sse3 = (result[2] & ((int)1 << 0)) != 0;
236
237 caps.ssse3 = (result[2] & ((int)1 << 9)) != 0;
238 caps.sse41 = (result[2] & ((int)1 << 19)) != 0;
239 caps.sse42 = (result[2] & ((int)1 << 20)) != 0;
240
241 caps.fma3 = (result[2] & ((int)1 << 12)) != 0;
242 caps.avx = false;
243 bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
244 bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
245
246 if (os_uses_xsave_xrestore && cpu_avx_support) {
247 // Check if the OS will save the YMM registers
248 uint32_t xcr_feature_mask;
249 # if defined(__GNUC__)
250 int edx; /* not used */
251 /* actual opcode for xgetbv */
252 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr_feature_mask), "=d"(edx) : "c"(0));
253 # elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
254 xcr_feature_mask = (uint32_t)_xgetbv(
255 _XCR_XFEATURE_ENABLED_MASK); /* min VS2010 SP1 compiler is required */
256 # else
257 xcr_feature_mask = 0;
258 # endif
259 caps.avx = (xcr_feature_mask & 0x6) == 0x6;
260 }
261
262 caps.f16c = (result[2] & ((int)1 << 29)) != 0;
263
264 __cpuid(result, 0x00000007);
265 caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
266 caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
267 caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
268 }
269
270 caps_init = true;
271 }
272
273 return caps;
274 }
275
system_cpu_support_sse2()276 bool system_cpu_support_sse2()
277 {
278 CPUCapabilities &caps = system_cpu_capabilities();
279 return caps.sse && caps.sse2;
280 }
281
system_cpu_support_sse3()282 bool system_cpu_support_sse3()
283 {
284 CPUCapabilities &caps = system_cpu_capabilities();
285 return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
286 }
287
system_cpu_support_sse41()288 bool system_cpu_support_sse41()
289 {
290 CPUCapabilities &caps = system_cpu_capabilities();
291 return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
292 }
293
system_cpu_support_avx()294 bool system_cpu_support_avx()
295 {
296 CPUCapabilities &caps = system_cpu_capabilities();
297 return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx;
298 }
299
system_cpu_support_avx2()300 bool system_cpu_support_avx2()
301 {
302 CPUCapabilities &caps = system_cpu_capabilities();
303 return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c &&
304 caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
305 }
306 #else
307
system_cpu_support_sse2()308 bool system_cpu_support_sse2()
309 {
310 return false;
311 }
312
system_cpu_support_sse3()313 bool system_cpu_support_sse3()
314 {
315 return false;
316 }
317
system_cpu_support_sse41()318 bool system_cpu_support_sse41()
319 {
320 return false;
321 }
322
system_cpu_support_avx()323 bool system_cpu_support_avx()
324 {
325 return false;
326 }
system_cpu_support_avx2()327 bool system_cpu_support_avx2()
328 {
329 return false;
330 }
331
332 #endif
333
system_call_self(const vector<string> & args)334 bool system_call_self(const vector<string> &args)
335 {
336 /* Escape program and arguments in case they contain spaces. */
337 string cmd = "\"" + Sysutil::this_program_path() + "\"";
338
339 for (int i = 0; i < args.size(); i++) {
340 cmd += " \"" + args[i] + "\"";
341 }
342
343 #ifdef _WIN32
344 /* Use cmd /S to avoid issues with spaces in arguments. */
345 cmd = "cmd /S /C \"" + cmd + " > nul \"";
346 #else
347 /* Quiet output. */
348 cmd += " > /dev/null";
349 #endif
350
351 return (system(cmd.c_str()) == 0);
352 }
353
system_physical_ram()354 size_t system_physical_ram()
355 {
356 #ifdef _WIN32
357 MEMORYSTATUSEX ram;
358 ram.dwLength = sizeof(ram);
359 GlobalMemoryStatusEx(&ram);
360 return ram.ullTotalPhys;
361 #elif defined(__APPLE__)
362 uint64_t ram = 0;
363 size_t len = sizeof(ram);
364 if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
365 return ram;
366 }
367 return 0;
368 #else
369 size_t ps = sysconf(_SC_PAGESIZE);
370 size_t pn = sysconf(_SC_PHYS_PAGES);
371 return ps * pn;
372 #endif
373 }
374
375 CCL_NAMESPACE_END
376