1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #include "sysinfo.h"
5 #include "intrinsics.h"
6 #include "string.h"
7 #include "ref.h"
8 #if defined(__DragonFly__)
9 #include <pthread_np.h>
10 #endif
11 #if defined(__FREEBSD__)
12 #include <sys/cpuset.h>
13 #include <pthread_np.h>
14 typedef cpuset_t cpu_set_t;
15 #endif
16 
17 ////////////////////////////////////////////////////////////////////////////////
18 /// All Platforms
19 ////////////////////////////////////////////////////////////////////////////////
20 
21 namespace embree
22 {
23   NullTy null;
24 
getPlatformName()25   std::string getPlatformName()
26   {
27 #if defined(__LINUX__) && !defined(__64BIT__)
28     return "Linux (32bit)";
29 #elif defined(__LINUX__) && defined(__64BIT__)
30     return "Linux (64bit)";
31 #elif defined(__DRAGONFLY__) && defined(__X86_64__)
32     return "DragonFly (64bit)";
33 #elif defined(__FREEBSD__) && !defined(__64BIT__)
34     return "FreeBSD (32bit)";
35 #elif defined(__FREEBSD__) && defined(__64BIT__)
36     return "FreeBSD (64bit)";
37 #elif defined(__CYGWIN__) && !defined(__64BIT__)
38     return "Cygwin (32bit)";
39 #elif defined(__CYGWIN__) && defined(__64BIT__)
40     return "Cygwin (64bit)";
41 #elif defined(__WIN32__) && !defined(__64BIT__)
42     return "Windows (32bit)";
43 #elif defined(__WIN32__) && defined(__64BIT__)
44     return "Windows (64bit)";
45 #elif defined(__MACOSX__) && !defined(__64BIT__)
46     return "Mac OS X (32bit)";
47 #elif defined(__MACOSX__) && defined(__64BIT__)
48     return "Mac OS X (64bit)";
49 #elif defined(__UNIX__) && !defined(__64BIT__)
50     return "Unix (32bit)";
51 #elif defined(__UNIX__) && defined(__64BIT__)
52     return "Unix (64bit)";
53 #else
54     return "Unknown";
55 #endif
56   }
57 
getCompilerName()58   std::string getCompilerName()
59   {
60 #if defined(__INTEL_COMPILER)
61     int icc_mayor = __INTEL_COMPILER / 100 % 100;
62     int icc_minor = __INTEL_COMPILER % 100;
63     std::string version = "Intel Compiler ";
64     version += toString(icc_mayor);
65     version += "." + toString(icc_minor);
66 #if defined(__INTEL_COMPILER_UPDATE)
67     version += "." + toString(__INTEL_COMPILER_UPDATE);
68 #endif
69     return version;
70 #elif defined(__clang__)
71     return "CLANG " __clang_version__;
72 #elif defined (__GNUC__)
73     return "GCC " __VERSION__;
74 #elif defined(_MSC_VER)
75     std::string version = toString(_MSC_FULL_VER);
76     version.insert(4,".");
77     version.insert(9,".");
78     version.insert(2,".");
79     return "Visual C++ Compiler " + version;
80 #else
81     return "Unknown Compiler";
82 #endif
83   }
84 
getCPUVendor()85   std::string getCPUVendor()
86   {
87 #if defined(__X86_ASM__)
88     int cpuinfo[4];
89     __cpuid (cpuinfo, 0);
90     int name[4];
91     name[0] = cpuinfo[1];
92     name[1] = cpuinfo[3];
93     name[2] = cpuinfo[2];
94     name[3] = 0;
95     return (char*)name;
96 #elif defined(__ARM_NEON)
97     return "ARM";
98 #else
99     return "Unknown";
100 #endif
101   }
102 
getCPUModel()103   CPU getCPUModel()
104   {
105 #if defined(__X86_ASM__)
106     if (getCPUVendor() != "GenuineIntel")
107       return CPU::UNKNOWN;
108 
109     int out[4];
110     __cpuid(out, 0);
111     if (out[0] < 1) return CPU::UNKNOWN;
112     __cpuid(out, 1);
113 
114     /* please see CPUID documentation for these formulas */
115     uint32_t family_ID          = (out[0] >>  8) & 0x0F;
116     uint32_t extended_family_ID = (out[0] >> 20) & 0xFF;
117 
118     uint32_t model_ID           = (out[0] >>  4) & 0x0F;
119     uint32_t extended_model_ID  = (out[0] >> 16) & 0x0F;
120 
121     uint32_t DisplayFamily = family_ID;
122     if (family_ID == 0x0F)
123       DisplayFamily += extended_family_ID;
124 
125     uint32_t DisplayModel = model_ID;
126     if (family_ID == 0x06 || family_ID == 0x0F)
127       DisplayModel += extended_model_ID << 4;
128 
129     uint32_t DisplayFamily_DisplayModel = (DisplayFamily << 8) + (DisplayModel << 0);
130 
131     // Data from Intel® 64 and IA-32 Architectures, Volume 4, Chapter 2, Table 2-1 (CPUID Signature Values of DisplayFamily_DisplayModel)
132     if (DisplayFamily_DisplayModel == 0x067D) return CPU::CORE_ICE_LAKE;
133     if (DisplayFamily_DisplayModel == 0x067E) return CPU::CORE_ICE_LAKE;
134     if (DisplayFamily_DisplayModel == 0x068C) return CPU::CORE_TIGER_LAKE;
135     if (DisplayFamily_DisplayModel == 0x06A5) return CPU::CORE_COMET_LAKE;
136     if (DisplayFamily_DisplayModel == 0x06A6) return CPU::CORE_COMET_LAKE;
137     if (DisplayFamily_DisplayModel == 0x0666) return CPU::CORE_CANNON_LAKE;
138     if (DisplayFamily_DisplayModel == 0x068E) return CPU::CORE_KABY_LAKE;
139     if (DisplayFamily_DisplayModel == 0x069E) return CPU::CORE_KABY_LAKE;
140     if (DisplayFamily_DisplayModel == 0x066A) return CPU::XEON_ICE_LAKE;
141     if (DisplayFamily_DisplayModel == 0x066C) return CPU::XEON_ICE_LAKE;
142     if (DisplayFamily_DisplayModel == 0x0655) return CPU::XEON_SKY_LAKE;
143     if (DisplayFamily_DisplayModel == 0x064E) return CPU::CORE_SKY_LAKE;
144     if (DisplayFamily_DisplayModel == 0x065E) return CPU::CORE_SKY_LAKE;
145     if (DisplayFamily_DisplayModel == 0x0656) return CPU::XEON_BROADWELL;
146     if (DisplayFamily_DisplayModel == 0x064F) return CPU::XEON_BROADWELL;
147     if (DisplayFamily_DisplayModel == 0x0647) return CPU::CORE_BROADWELL;
148     if (DisplayFamily_DisplayModel == 0x063D) return CPU::CORE_BROADWELL;
149     if (DisplayFamily_DisplayModel == 0x063F) return CPU::XEON_HASWELL;
150     if (DisplayFamily_DisplayModel == 0x063C) return CPU::CORE_HASWELL;
151     if (DisplayFamily_DisplayModel == 0x0645) return CPU::CORE_HASWELL;
152     if (DisplayFamily_DisplayModel == 0x0646) return CPU::CORE_HASWELL;
153     if (DisplayFamily_DisplayModel == 0x063E) return CPU::XEON_IVY_BRIDGE;
154     if (DisplayFamily_DisplayModel == 0x063A) return CPU::CORE_IVY_BRIDGE;
155     if (DisplayFamily_DisplayModel == 0x062D) return CPU::SANDY_BRIDGE;
156     if (DisplayFamily_DisplayModel == 0x062F) return CPU::SANDY_BRIDGE;
157     if (DisplayFamily_DisplayModel == 0x062A) return CPU::SANDY_BRIDGE;
158     if (DisplayFamily_DisplayModel == 0x062E) return CPU::NEHALEM;
159     if (DisplayFamily_DisplayModel == 0x0625) return CPU::NEHALEM;
160     if (DisplayFamily_DisplayModel == 0x062C) return CPU::NEHALEM;
161     if (DisplayFamily_DisplayModel == 0x061E) return CPU::NEHALEM;
162     if (DisplayFamily_DisplayModel == 0x061F) return CPU::NEHALEM;
163     if (DisplayFamily_DisplayModel == 0x061A) return CPU::NEHALEM;
164     if (DisplayFamily_DisplayModel == 0x061D) return CPU::NEHALEM;
165     if (DisplayFamily_DisplayModel == 0x0617) return CPU::CORE2;
166     if (DisplayFamily_DisplayModel == 0x060F) return CPU::CORE2;
167     if (DisplayFamily_DisplayModel == 0x060E) return CPU::CORE1;
168 
169     if (DisplayFamily_DisplayModel == 0x0685) return CPU::XEON_PHI_KNIGHTS_MILL;
170     if (DisplayFamily_DisplayModel == 0x0657) return CPU::XEON_PHI_KNIGHTS_LANDING;
171 
172 #elif defined(__ARM_NEON)
173     return CPU::ARM;
174 #endif
175 
176     return CPU::UNKNOWN;
177   }
178 
stringOfCPUModel(CPU model)179   std::string stringOfCPUModel(CPU model)
180   {
181     switch (model) {
182     case CPU::XEON_ICE_LAKE           : return "Xeon Ice Lake";
183     case CPU::CORE_ICE_LAKE           : return "Core Ice Lake";
184     case CPU::CORE_TIGER_LAKE         : return "Core Tiger Lake";
185     case CPU::CORE_COMET_LAKE         : return "Core Comet Lake";
186     case CPU::CORE_CANNON_LAKE        : return "Core Cannon Lake";
187     case CPU::CORE_KABY_LAKE          : return "Core Kaby Lake";
188     case CPU::XEON_SKY_LAKE           : return "Xeon Sky Lake";
189     case CPU::CORE_SKY_LAKE           : return "Core Sky Lake";
190     case CPU::XEON_PHI_KNIGHTS_MILL   : return "Xeon Phi Knights Mill";
191     case CPU::XEON_PHI_KNIGHTS_LANDING: return "Xeon Phi Knights Landing";
192     case CPU::XEON_BROADWELL          : return "Xeon Broadwell";
193     case CPU::CORE_BROADWELL          : return "Core Broadwell";
194     case CPU::XEON_HASWELL            : return "Xeon Haswell";
195     case CPU::CORE_HASWELL            : return "Core Haswell";
196     case CPU::XEON_IVY_BRIDGE         : return "Xeon Ivy Bridge";
197     case CPU::CORE_IVY_BRIDGE         : return "Core Ivy Bridge";
198     case CPU::SANDY_BRIDGE            : return "Sandy Bridge";
199     case CPU::NEHALEM                 : return "Nehalem";
200     case CPU::CORE2                   : return "Core2";
201     case CPU::CORE1                   : return "Core";
202     case CPU::ARM                     : return "ARM";
203     case CPU::UNKNOWN                 : return "Unknown CPU";
204     }
205     return "Unknown CPU (error)";
206   }
207 
208 #if defined(__X86_ASM__)
209   /* constants to access destination registers of CPUID instruction */
210   static const int EAX = 0;
211   static const int EBX = 1;
212   static const int ECX = 2;
213   static const int EDX = 3;
214 
215   /* cpuid[eax=1].ecx */
216   static const int CPU_FEATURE_BIT_SSE3   = 1 << 0;
217   static const int CPU_FEATURE_BIT_SSSE3  = 1 << 9;
218   static const int CPU_FEATURE_BIT_FMA3   = 1 << 12;
219   static const int CPU_FEATURE_BIT_SSE4_1 = 1 << 19;
220   static const int CPU_FEATURE_BIT_SSE4_2 = 1 << 20;
221   //static const int CPU_FEATURE_BIT_MOVBE  = 1 << 22;
222   static const int CPU_FEATURE_BIT_POPCNT = 1 << 23;
223   //static const int CPU_FEATURE_BIT_XSAVE  = 1 << 26;
224   static const int CPU_FEATURE_BIT_OXSAVE = 1 << 27;
225   static const int CPU_FEATURE_BIT_AVX    = 1 << 28;
226   static const int CPU_FEATURE_BIT_F16C   = 1 << 29;
227   static const int CPU_FEATURE_BIT_RDRAND = 1 << 30;
228 
229   /* cpuid[eax=1].edx */
230   static const int CPU_FEATURE_BIT_SSE  = 1 << 25;
231   static const int CPU_FEATURE_BIT_SSE2 = 1 << 26;
232 
233   /* cpuid[eax=0x80000001].ecx */
234   static const int CPU_FEATURE_BIT_LZCNT = 1 << 5;
235 
236   /* cpuid[eax=7,ecx=0].ebx */
237   static const int CPU_FEATURE_BIT_BMI1    = 1 << 3;
238   static const int CPU_FEATURE_BIT_AVX2    = 1 << 5;
239   static const int CPU_FEATURE_BIT_BMI2    = 1 << 8;
240   static const int CPU_FEATURE_BIT_AVX512F = 1 << 16;     // AVX512F  (foundation)
241   static const int CPU_FEATURE_BIT_AVX512DQ = 1 << 17;    // AVX512DQ (doubleword and quadword instructions)
242   static const int CPU_FEATURE_BIT_AVX512PF = 1 << 26;    // AVX512PF (prefetch gather/scatter instructions)
243   static const int CPU_FEATURE_BIT_AVX512ER = 1 << 27;    // AVX512ER (exponential and reciprocal instructions)
244   static const int CPU_FEATURE_BIT_AVX512CD = 1 << 28;    // AVX512CD (conflict detection instructions)
245   static const int CPU_FEATURE_BIT_AVX512BW = 1 << 30;    // AVX512BW (byte and word instructions)
246   static const int CPU_FEATURE_BIT_AVX512VL = 1 << 31;    // AVX512VL (vector length extensions)
247   static const int CPU_FEATURE_BIT_AVX512IFMA = 1 << 21;  // AVX512IFMA (integer fused multiple-add instructions)
248 
249   /* cpuid[eax=7,ecx=0].ecx */
250   static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1;   // AVX512VBMI (vector bit manipulation instructions)
251 #endif
252 
253 #if defined(__X86_ASM__)
get_xcr0()254   __noinline int64_t get_xcr0()
255   {
256 #if defined (__WIN32__)
257     int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32
258     xcr0 = _xgetbv(0);
259     return xcr0;
260 #else
261     int xcr0 = 0;
262     __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
263     return xcr0;
264 #endif
265   }
266 #endif
267 
getCPUFeatures()268   int getCPUFeatures()
269   {
270 #if defined(__X86_ASM__)
271     /* cache CPU features access */
272     static int cpu_features = 0;
273     if (cpu_features)
274       return cpu_features;
275 
276     /* get number of CPUID leaves */
277     int cpuid_leaf0[4];
278     __cpuid(cpuid_leaf0, 0x00000000);
279     unsigned nIds = cpuid_leaf0[EAX];
280 
281     /* get number of extended CPUID leaves */
282     int cpuid_leafe[4];
283     __cpuid(cpuid_leafe, 0x80000000);
284     unsigned nExIds = cpuid_leafe[EAX];
285 
286     /* get CPUID leaves for EAX = 1,7, and 0x80000001 */
287     int cpuid_leaf_1[4] = { 0,0,0,0 };
288     int cpuid_leaf_7[4] = { 0,0,0,0 };
289     int cpuid_leaf_e1[4] = { 0,0,0,0 };
290     if (nIds >= 1) __cpuid (cpuid_leaf_1,0x00000001);
291 #if _WIN32
292 #if _MSC_VER && (_MSC_FULL_VER < 160040219)
293 #else
294     if (nIds >= 7) __cpuidex(cpuid_leaf_7,0x00000007,0);
295 #endif
296 #else
297     if (nIds >= 7) __cpuid_count(cpuid_leaf_7,0x00000007,0);
298 #endif
299     if (nExIds >= 0x80000001) __cpuid(cpuid_leaf_e1,0x80000001);
300 
301     /* detect if OS saves XMM, YMM, and ZMM states */
302     bool xmm_enabled = true;
303     bool ymm_enabled = false;
304     bool zmm_enabled = false;
305     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_OXSAVE) {
306       int64_t xcr0 = get_xcr0();
307       xmm_enabled = ((xcr0 & 0x02) == 0x02);                /* checks if xmm are enabled in XCR0 */
308       ymm_enabled = xmm_enabled && ((xcr0 & 0x04) == 0x04); /* checks if ymm state are enabled in XCR0 */
309       zmm_enabled = ymm_enabled && ((xcr0 & 0xE0) == 0xE0); /* checks if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled in XCR0 */
310     }
311     if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED;
312     if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED;
313     if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED;
314 
315     if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE   ) cpu_features |= CPU_FEATURE_SSE;
316     if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE2  ) cpu_features |= CPU_FEATURE_SSE2;
317     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE3  ) cpu_features |= CPU_FEATURE_SSE3;
318     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSSE3 ) cpu_features |= CPU_FEATURE_SSSE3;
319     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_1) cpu_features |= CPU_FEATURE_SSE41;
320     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_2) cpu_features |= CPU_FEATURE_SSE42;
321     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_POPCNT) cpu_features |= CPU_FEATURE_POPCNT;
322 
323     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_AVX   ) cpu_features |= CPU_FEATURE_AVX;
324     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_F16C  ) cpu_features |= CPU_FEATURE_F16C;
325     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_RDRAND) cpu_features |= CPU_FEATURE_RDRAND;
326     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX2  ) cpu_features |= CPU_FEATURE_AVX2;
327     if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_FMA3  ) cpu_features |= CPU_FEATURE_FMA3;
328     if (cpuid_leaf_e1[ECX] & CPU_FEATURE_BIT_LZCNT) cpu_features |= CPU_FEATURE_LZCNT;
329     if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI1 ) cpu_features |= CPU_FEATURE_BMI1;
330     if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI2 ) cpu_features |= CPU_FEATURE_BMI2;
331 
332     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512F   ) cpu_features |= CPU_FEATURE_AVX512F;
333     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512DQ  ) cpu_features |= CPU_FEATURE_AVX512DQ;
334     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512PF  ) cpu_features |= CPU_FEATURE_AVX512PF;
335     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER  ) cpu_features |= CPU_FEATURE_AVX512ER;
336     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512CD  ) cpu_features |= CPU_FEATURE_AVX512CD;
337     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512BW  ) cpu_features |= CPU_FEATURE_AVX512BW;
338     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA;
339     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL  ) cpu_features |= CPU_FEATURE_AVX512VL;
340     if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI;
341 
342     return cpu_features;
343 #elif defined(__ARM_NEON)
344     /* emulated features with sse2neon */
345     return CPU_FEATURE_SSE|CPU_FEATURE_SSE2|CPU_FEATURE_XMM_ENABLED;
346 #else
347     /* Unknown CPU. */
348     return 0;
349 #endif
350   }
351 
stringOfCPUFeatures(int features)352   std::string stringOfCPUFeatures(int features)
353   {
354     std::string str;
355     if (features & CPU_FEATURE_XMM_ENABLED) str += "XMM ";
356     if (features & CPU_FEATURE_YMM_ENABLED) str += "YMM ";
357     if (features & CPU_FEATURE_ZMM_ENABLED) str += "ZMM ";
358     if (features & CPU_FEATURE_SSE   ) str += "SSE ";
359     if (features & CPU_FEATURE_SSE2  ) str += "SSE2 ";
360     if (features & CPU_FEATURE_SSE3  ) str += "SSE3 ";
361     if (features & CPU_FEATURE_SSSE3 ) str += "SSSE3 ";
362     if (features & CPU_FEATURE_SSE41 ) str += "SSE4.1 ";
363     if (features & CPU_FEATURE_SSE42 ) str += "SSE4.2 ";
364     if (features & CPU_FEATURE_POPCNT) str += "POPCNT ";
365     if (features & CPU_FEATURE_AVX   ) str += "AVX ";
366     if (features & CPU_FEATURE_F16C  ) str += "F16C ";
367     if (features & CPU_FEATURE_RDRAND) str += "RDRAND ";
368     if (features & CPU_FEATURE_AVX2  ) str += "AVX2 ";
369     if (features & CPU_FEATURE_FMA3  ) str += "FMA3 ";
370     if (features & CPU_FEATURE_LZCNT ) str += "LZCNT ";
371     if (features & CPU_FEATURE_BMI1  ) str += "BMI1 ";
372     if (features & CPU_FEATURE_BMI2  ) str += "BMI2 ";
373     if (features & CPU_FEATURE_AVX512F) str += "AVX512F ";
374     if (features & CPU_FEATURE_AVX512DQ) str += "AVX512DQ ";
375     if (features & CPU_FEATURE_AVX512PF) str += "AVX512PF ";
376     if (features & CPU_FEATURE_AVX512ER) str += "AVX512ER ";
377     if (features & CPU_FEATURE_AVX512CD) str += "AVX512CD ";
378     if (features & CPU_FEATURE_AVX512BW) str += "AVX512BW ";
379     if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL ";
380     if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA ";
381     if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI ";
382     return str;
383   }
384 
stringOfISA(int isa)385   std::string stringOfISA (int isa)
386   {
387     if (isa == SSE) return "SSE";
388     if (isa == SSE2) return "SSE2";
389     if (isa == SSE3) return "SSE3";
390     if (isa == SSSE3) return "SSSE3";
391     if (isa == SSE41) return "SSE4.1";
392     if (isa == SSE42) return "SSE4.2";
393     if (isa == AVX) return "AVX";
394     if (isa == AVX2) return "AVX2";
395     if (isa == AVX512) return "AVX512";
396     return "UNKNOWN";
397   }
398 
hasISA(int features,int isa)399   bool hasISA(int features, int isa) {
400     return (features & isa) == isa;
401   }
402 
supportedTargetList(int features)403   std::string supportedTargetList (int features)
404   {
405     std::string v;
406     if (hasISA(features,SSE)) v += "SSE ";
407     if (hasISA(features,SSE2)) v += "SSE2 ";
408     if (hasISA(features,SSE3)) v += "SSE3 ";
409     if (hasISA(features,SSSE3)) v += "SSSE3 ";
410     if (hasISA(features,SSE41)) v += "SSE4.1 ";
411     if (hasISA(features,SSE42)) v += "SSE4.2 ";
412     if (hasISA(features,AVX)) v += "AVX ";
413     if (hasISA(features,AVXI)) v += "AVXI ";
414     if (hasISA(features,AVX2)) v += "AVX2 ";
415     if (hasISA(features,AVX512)) v += "AVX512 ";
416     return v;
417   }
418 }
419 
420 ////////////////////////////////////////////////////////////////////////////////
421 /// Windows Platform
422 ////////////////////////////////////////////////////////////////////////////////
423 
424 #if defined(__WIN32__)
425 
426 #define WIN32_LEAN_AND_MEAN
427 #include <windows.h>
428 #include <psapi.h>
429 
430 namespace embree
431 {
getExecutableFileName()432   std::string getExecutableFileName() {
433     char filename[1024];
434     if (!GetModuleFileName(nullptr, filename, sizeof(filename)))
435       return std::string();
436     return std::string(filename);
437   }
438 
getNumberOfLogicalThreads()439   unsigned int getNumberOfLogicalThreads()
440   {
441     static int nThreads = -1;
442     if (nThreads != -1) return nThreads;
443 
444     typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
445     typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
446     HMODULE hlib = LoadLibrary("Kernel32");
447     GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
448     GetActiveProcessorCountFunc      pGetActiveProcessorCount      = (GetActiveProcessorCountFunc)     GetProcAddress(hlib, "GetActiveProcessorCount");
449 
450     if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount)
451     {
452       int groups = pGetActiveProcessorGroupCount();
453       int totalProcessors = 0;
454       for (int i = 0; i < groups; i++)
455         totalProcessors += pGetActiveProcessorCount(i);
456       nThreads = totalProcessors;
457     }
458     else
459     {
460       SYSTEM_INFO sysinfo;
461       GetSystemInfo(&sysinfo);
462       nThreads = sysinfo.dwNumberOfProcessors;
463     }
464     assert(nThreads);
465     return nThreads;
466   }
467 
getTerminalWidth()468   int getTerminalWidth()
469   {
470     HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE);
471     if (handle == INVALID_HANDLE_VALUE) return 80;
472     CONSOLE_SCREEN_BUFFER_INFO info;
473     memset(&info,0,sizeof(info));
474     GetConsoleScreenBufferInfo(handle, &info);
475     return info.dwSize.X;
476   }
477 
getSeconds()478   double getSeconds()
479   {
480     LARGE_INTEGER freq, val;
481     QueryPerformanceFrequency(&freq);
482     QueryPerformanceCounter(&val);
483     return (double)val.QuadPart / (double)freq.QuadPart;
484   }
485 
sleepSeconds(double t)486   void sleepSeconds(double t) {
487     Sleep(DWORD(1000.0*t));
488   }
489 
getVirtualMemoryBytes()490   size_t getVirtualMemoryBytes()
491   {
492     PROCESS_MEMORY_COUNTERS info;
493     GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
494     return (size_t)info.QuotaPeakPagedPoolUsage;
495   }
496 
getResidentMemoryBytes()497   size_t getResidentMemoryBytes()
498   {
499     PROCESS_MEMORY_COUNTERS info;
500     GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
501     return (size_t)info.WorkingSetSize;
502   }
503 }
504 #endif
505 
506 ////////////////////////////////////////////////////////////////////////////////
507 /// Linux Platform
508 ////////////////////////////////////////////////////////////////////////////////
509 
510 #if defined(__LINUX__)
511 
512 #include <stdio.h>
513 #include <unistd.h>
514 
515 namespace embree
516 {
getExecutableFileName()517   std::string getExecutableFileName()
518   {
519     std::string pid = "/proc/" + toString(getpid()) + "/exe";
520     char buf[4096];
521     memset(buf,0,sizeof(buf));
522     if (readlink(pid.c_str(), buf, sizeof(buf)-1) == -1)
523       return std::string();
524     return std::string(buf);
525   }
526 
getVirtualMemoryBytes()527   size_t getVirtualMemoryBytes()
528   {
529     size_t virt, resident, shared;
530     std::ifstream buffer("/proc/self/statm");
531     buffer >> virt >> resident >> shared;
532     return virt*sysconf(_SC_PAGE_SIZE);
533   }
534 
getResidentMemoryBytes()535   size_t getResidentMemoryBytes()
536   {
537     size_t virt, resident, shared;
538     std::ifstream buffer("/proc/self/statm");
539     buffer >> virt >> resident >> shared;
540     return resident*sysconf(_SC_PAGE_SIZE);
541   }
542 }
543 
544 #endif
545 
546 ////////////////////////////////////////////////////////////////////////////////
547 /// DragonFly Platform
548 ////////////////////////////////////////////////////////////////////////////////
549 
550 #ifdef __DragonFly__
551 
552 #include <sys/sysctl.h>
553 
554 namespace embree
555 {
getExecutableFileName()556   std::string getExecutableFileName()
557   {
558     const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
559     char buf[1024];
560     size_t len = sizeof(buf);
561     if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1)
562       return std::string();
563     return std::string(buf);
564   }
565 
getVirtualMemoryBytes()566   size_t getVirtualMemoryBytes() {
567     return 0;
568   }
569 
getResidentMemoryBytes()570   size_t getResidentMemoryBytes() {
571     return 0;
572   }
573 }
574 
575 #endif
576 
577 ////////////////////////////////////////////////////////////////////////////////
578 /// FreeBSD Platform
579 ////////////////////////////////////////////////////////////////////////////////
580 
581 #if defined (__FreeBSD__)
582 
583 #include <sys/sysctl.h>
584 
585 namespace embree
586 {
getExecutableFileName()587   std::string getExecutableFileName()
588   {
589     const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
590     char buf[4096];
591     memset(buf,0,sizeof(buf));
592     size_t len = sizeof(buf)-1;
593     if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1)
594       return std::string();
595     return std::string(buf);
596   }
597 
getVirtualMemoryBytes()598   size_t getVirtualMemoryBytes() {
599     return 0;
600   }
601 
getResidentMemoryBytes()602   size_t getResidentMemoryBytes() {
603     return 0;
604   }
605 }
606 
607 #endif
608 
609 ////////////////////////////////////////////////////////////////////////////////
610 /// Mac OS X Platform
611 ////////////////////////////////////////////////////////////////////////////////
612 
613 #if defined(__MACOSX__)
614 
615 #include <mach-o/dyld.h>
616 
617 namespace embree
618 {
getExecutableFileName()619   std::string getExecutableFileName()
620   {
621     char buf[4096];
622     uint32_t size = sizeof(buf);
623     if (_NSGetExecutablePath(buf, &size) != 0)
624       return std::string();
625     return std::string(buf);
626   }
627 
getVirtualMemoryBytes()628   size_t getVirtualMemoryBytes() {
629     return 0;
630   }
631 
getResidentMemoryBytes()632   size_t getResidentMemoryBytes() {
633     return 0;
634   }
635 }
636 
637 #endif
638 
639 ////////////////////////////////////////////////////////////////////////////////
640 /// Unix Platform
641 ////////////////////////////////////////////////////////////////////////////////
642 
643 #if defined(__UNIX__)
644 
645 #include <unistd.h>
646 #include <sys/ioctl.h>
647 #include <sys/time.h>
648 #include <pthread.h>
649 
650 namespace embree
651 {
getNumberOfLogicalThreads()652   unsigned int getNumberOfLogicalThreads()
653   {
654     static int nThreads = -1;
655     if (nThreads != -1) return nThreads;
656 
657 #if defined(__MACOSX__)
658     nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
659     assert(nThreads);
660 #else
661     cpu_set_t set;
662     if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
663       nThreads = CPU_COUNT(&set);
664 #endif
665 
666     assert(nThreads);
667     return nThreads;
668   }
669 
getTerminalWidth()670   int getTerminalWidth()
671   {
672     struct winsize info;
673     if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &info) < 0) return 80;
674     return info.ws_col;
675   }
676 
getSeconds()677   double getSeconds() {
678     struct timeval tp; gettimeofday(&tp,nullptr);
679     return double(tp.tv_sec) + double(tp.tv_usec)/1E6;
680   }
681 
sleepSeconds(double t)682   void sleepSeconds(double t) {
683     usleep(1000000.0*t);
684   }
685 }
686 #endif
687 
688