1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "util/util_system.h"
18 
19 #include "util/util_logging.h"
20 #include "util/util_string.h"
21 #include "util/util_types.h"
22 
23 #include <numaapi.h>
24 
25 #include <OpenImageIO/sysutil.h>
26 OIIO_NAMESPACE_USING
27 
28 #ifdef _WIN32
29 #  if (!defined(FREE_WINDOWS))
30 #    include <intrin.h>
31 #  endif
32 #  include "util_windows.h"
33 #elif defined(__APPLE__)
34 #  include <sys/ioctl.h>
35 #  include <sys/sysctl.h>
36 #  include <sys/types.h>
37 #else
38 #  include <sys/ioctl.h>
39 #  include <unistd.h>
40 #endif
41 
42 CCL_NAMESPACE_BEGIN
43 
system_cpu_ensure_initialized()44 bool system_cpu_ensure_initialized()
45 {
46   static bool is_initialized = false;
47   static bool result = false;
48   if (is_initialized) {
49     return result;
50   }
51   is_initialized = true;
52   const NUMAAPI_Result numa_result = numaAPI_Initialize();
53   result = (numa_result == NUMAAPI_SUCCESS);
54   return result;
55 }
56 
57 /* Fallback solution, which doesn't use NUMA/CPU groups. */
system_cpu_thread_count_fallback()58 static int system_cpu_thread_count_fallback()
59 {
60 #ifdef _WIN32
61   SYSTEM_INFO info;
62   GetSystemInfo(&info);
63   return info.dwNumberOfProcessors;
64 #elif defined(__APPLE__)
65   int count;
66   size_t len = sizeof(count);
67   int mib[2] = {CTL_HW, HW_NCPU};
68   sysctl(mib, 2, &count, &len, NULL, 0);
69   return count;
70 #else
71   return sysconf(_SC_NPROCESSORS_ONLN);
72 #endif
73 }
74 
system_cpu_thread_count()75 int system_cpu_thread_count()
76 {
77   const int num_nodes = system_cpu_num_numa_nodes();
78   int num_threads = 0;
79   for (int node = 0; node < num_nodes; ++node) {
80     if (!system_cpu_is_numa_node_available(node)) {
81       continue;
82     }
83     num_threads += system_cpu_num_numa_node_processors(node);
84   }
85   return num_threads;
86 }
87 
system_cpu_num_numa_nodes()88 int system_cpu_num_numa_nodes()
89 {
90   if (!system_cpu_ensure_initialized()) {
91     /* Fallback to a single node with all the threads. */
92     return 1;
93   }
94   return numaAPI_GetNumNodes();
95 }
96 
system_cpu_is_numa_node_available(int node)97 bool system_cpu_is_numa_node_available(int node)
98 {
99   if (!system_cpu_ensure_initialized()) {
100     return true;
101   }
102   return numaAPI_IsNodeAvailable(node);
103 }
104 
system_cpu_num_numa_node_processors(int node)105 int system_cpu_num_numa_node_processors(int node)
106 {
107   if (!system_cpu_ensure_initialized()) {
108     return system_cpu_thread_count_fallback();
109   }
110   return numaAPI_GetNumNodeProcessors(node);
111 }
112 
system_cpu_run_thread_on_node(int node)113 bool system_cpu_run_thread_on_node(int node)
114 {
115   if (!system_cpu_ensure_initialized()) {
116     return true;
117   }
118   return numaAPI_RunThreadOnNode(node);
119 }
120 
system_console_width()121 int system_console_width()
122 {
123   int columns = 0;
124 
125 #ifdef _WIN32
126   CONSOLE_SCREEN_BUFFER_INFO csbi;
127   if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
128     columns = csbi.dwSize.X;
129   }
130 #else
131   struct winsize w;
132   if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
133     columns = w.ws_col;
134   }
135 #endif
136 
137   return (columns > 0) ? columns : 80;
138 }
139 
system_cpu_num_active_group_processors()140 int system_cpu_num_active_group_processors()
141 {
142   if (!system_cpu_ensure_initialized()) {
143     return system_cpu_thread_count_fallback();
144   }
145   return numaAPI_GetNumCurrentNodesProcessors();
146 }
147 
148 #if !defined(_WIN32) || defined(FREE_WINDOWS)
__cpuid(int data[4],int selector)149 static void __cpuid(int data[4], int selector)
150 {
151 #  if defined(__x86_64__)
152   asm("cpuid" : "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3]) : "a"(selector));
153 #  elif defined(__i386__)
154   asm("pushl %%ebx    \n\t"
155       "cpuid          \n\t"
156       "movl %%ebx, %1 \n\t"
157       "popl %%ebx     \n\t"
158       : "=a"(data[0]), "=r"(data[1]), "=c"(data[2]), "=d"(data[3])
159       : "a"(selector)
160       : "ebx");
161 #  else
162   data[0] = data[1] = data[2] = data[3] = 0;
163 #  endif
164 }
165 #endif
166 
system_cpu_brand_string()167 string system_cpu_brand_string()
168 {
169   char buf[49] = {0};
170   int result[4] = {0};
171 
172   __cpuid(result, 0x80000000);
173 
174   if (result[0] >= (int)0x80000004) {
175     __cpuid((int *)(buf + 0), 0x80000002);
176     __cpuid((int *)(buf + 16), 0x80000003);
177     __cpuid((int *)(buf + 32), 0x80000004);
178 
179     string brand = buf;
180 
181     /* make it a bit more presentable */
182     brand = string_remove_trademark(brand);
183 
184     return brand;
185   }
186 
187   return "Unknown CPU";
188 }
189 
system_cpu_bits()190 int system_cpu_bits()
191 {
192   return (sizeof(void *) * 8);
193 }
194 
195 #if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)
196 
197 struct CPUCapabilities {
198   bool x64;
199   bool mmx;
200   bool sse;
201   bool sse2;
202   bool sse3;
203   bool ssse3;
204   bool sse41;
205   bool sse42;
206   bool sse4a;
207   bool avx;
208   bool f16c;
209   bool avx2;
210   bool xop;
211   bool fma3;
212   bool fma4;
213   bool bmi1;
214   bool bmi2;
215 };
216 
system_cpu_capabilities()217 static CPUCapabilities &system_cpu_capabilities()
218 {
219   static CPUCapabilities caps;
220   static bool caps_init = false;
221 
222   if (!caps_init) {
223     int result[4], num;
224 
225     memset(&caps, 0, sizeof(caps));
226 
227     __cpuid(result, 0);
228     num = result[0];
229 
230     if (num >= 1) {
231       __cpuid(result, 0x00000001);
232       caps.mmx = (result[3] & ((int)1 << 23)) != 0;
233       caps.sse = (result[3] & ((int)1 << 25)) != 0;
234       caps.sse2 = (result[3] & ((int)1 << 26)) != 0;
235       caps.sse3 = (result[2] & ((int)1 << 0)) != 0;
236 
237       caps.ssse3 = (result[2] & ((int)1 << 9)) != 0;
238       caps.sse41 = (result[2] & ((int)1 << 19)) != 0;
239       caps.sse42 = (result[2] & ((int)1 << 20)) != 0;
240 
241       caps.fma3 = (result[2] & ((int)1 << 12)) != 0;
242       caps.avx = false;
243       bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
244       bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
245 
246       if (os_uses_xsave_xrestore && cpu_avx_support) {
247         // Check if the OS will save the YMM registers
248         uint32_t xcr_feature_mask;
249 #  if defined(__GNUC__)
250         int edx; /* not used */
251         /* actual opcode for xgetbv */
252         __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr_feature_mask), "=d"(edx) : "c"(0));
253 #  elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
254         xcr_feature_mask = (uint32_t)_xgetbv(
255             _XCR_XFEATURE_ENABLED_MASK); /* min VS2010 SP1 compiler is required */
256 #  else
257         xcr_feature_mask = 0;
258 #  endif
259         caps.avx = (xcr_feature_mask & 0x6) == 0x6;
260       }
261 
262       caps.f16c = (result[2] & ((int)1 << 29)) != 0;
263 
264       __cpuid(result, 0x00000007);
265       caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
266       caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
267       caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
268     }
269 
270     caps_init = true;
271   }
272 
273   return caps;
274 }
275 
system_cpu_support_sse2()276 bool system_cpu_support_sse2()
277 {
278   CPUCapabilities &caps = system_cpu_capabilities();
279   return caps.sse && caps.sse2;
280 }
281 
system_cpu_support_sse3()282 bool system_cpu_support_sse3()
283 {
284   CPUCapabilities &caps = system_cpu_capabilities();
285   return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
286 }
287 
system_cpu_support_sse41()288 bool system_cpu_support_sse41()
289 {
290   CPUCapabilities &caps = system_cpu_capabilities();
291   return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
292 }
293 
system_cpu_support_avx()294 bool system_cpu_support_avx()
295 {
296   CPUCapabilities &caps = system_cpu_capabilities();
297   return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx;
298 }
299 
system_cpu_support_avx2()300 bool system_cpu_support_avx2()
301 {
302   CPUCapabilities &caps = system_cpu_capabilities();
303   return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c &&
304          caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
305 }
306 #else
307 
system_cpu_support_sse2()308 bool system_cpu_support_sse2()
309 {
310   return false;
311 }
312 
system_cpu_support_sse3()313 bool system_cpu_support_sse3()
314 {
315   return false;
316 }
317 
system_cpu_support_sse41()318 bool system_cpu_support_sse41()
319 {
320   return false;
321 }
322 
system_cpu_support_avx()323 bool system_cpu_support_avx()
324 {
325   return false;
326 }
system_cpu_support_avx2()327 bool system_cpu_support_avx2()
328 {
329   return false;
330 }
331 
332 #endif
333 
system_call_self(const vector<string> & args)334 bool system_call_self(const vector<string> &args)
335 {
336   /* Escape program and arguments in case they contain spaces. */
337   string cmd = "\"" + Sysutil::this_program_path() + "\"";
338 
339   for (int i = 0; i < args.size(); i++) {
340     cmd += " \"" + args[i] + "\"";
341   }
342 
343 #ifdef _WIN32
344   /* Use cmd /S to avoid issues with spaces in arguments. */
345   cmd = "cmd /S /C \"" + cmd + " > nul \"";
346 #else
347   /* Quiet output. */
348   cmd += " > /dev/null";
349 #endif
350 
351   return (system(cmd.c_str()) == 0);
352 }
353 
system_physical_ram()354 size_t system_physical_ram()
355 {
356 #ifdef _WIN32
357   MEMORYSTATUSEX ram;
358   ram.dwLength = sizeof(ram);
359   GlobalMemoryStatusEx(&ram);
360   return ram.ullTotalPhys;
361 #elif defined(__APPLE__)
362   uint64_t ram = 0;
363   size_t len = sizeof(ram);
364   if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
365     return ram;
366   }
367   return 0;
368 #else
369   size_t ps = sysconf(_SC_PAGESIZE);
370   size_t pn = sysconf(_SC_PHYS_PAGES);
371   return ps * pn;
372 #endif
373 }
374 
375 CCL_NAMESPACE_END
376