1 // Copyright 2009-2021 Intel Corporation 2 // SPDX-License-Identifier: Apache-2.0 3 4 #include "sysinfo.h" 5 #include "intrinsics.h" 6 #include "string.h" 7 #include "ref.h" 8 #if defined(__DragonFly__) 9 #include <pthread_np.h> 10 #endif 11 #if defined(__FREEBSD__) 12 #include <sys/cpuset.h> 13 #include <pthread_np.h> 14 typedef cpuset_t cpu_set_t; 15 #endif 16 17 //////////////////////////////////////////////////////////////////////////////// 18 /// All Platforms 19 //////////////////////////////////////////////////////////////////////////////// 20 21 namespace embree 22 { 23 NullTy null; 24 getPlatformName()25 std::string getPlatformName() 26 { 27 #if defined(__LINUX__) && !defined(__64BIT__) 28 return "Linux (32bit)"; 29 #elif defined(__LINUX__) && defined(__64BIT__) 30 return "Linux (64bit)"; 31 #elif defined(__DRAGONFLY__) && defined(__X86_64__) 32 return "DragonFly (64bit)"; 33 #elif defined(__FREEBSD__) && !defined(__64BIT__) 34 return "FreeBSD (32bit)"; 35 #elif defined(__FREEBSD__) && defined(__64BIT__) 36 return "FreeBSD (64bit)"; 37 #elif defined(__CYGWIN__) && !defined(__64BIT__) 38 return "Cygwin (32bit)"; 39 #elif defined(__CYGWIN__) && defined(__64BIT__) 40 return "Cygwin (64bit)"; 41 #elif defined(__WIN32__) && !defined(__64BIT__) 42 return "Windows (32bit)"; 43 #elif defined(__WIN32__) && defined(__64BIT__) 44 return "Windows (64bit)"; 45 #elif defined(__MACOSX__) && !defined(__64BIT__) 46 return "Mac OS X (32bit)"; 47 #elif defined(__MACOSX__) && defined(__64BIT__) 48 return "Mac OS X (64bit)"; 49 #elif defined(__UNIX__) && !defined(__64BIT__) 50 return "Unix (32bit)"; 51 #elif defined(__UNIX__) && defined(__64BIT__) 52 return "Unix (64bit)"; 53 #else 54 return "Unknown"; 55 #endif 56 } 57 getCompilerName()58 std::string getCompilerName() 59 { 60 #if defined(__INTEL_COMPILER) 61 int icc_mayor = __INTEL_COMPILER / 100 % 100; 62 int icc_minor = __INTEL_COMPILER % 100; 63 std::string version = "Intel Compiler "; 64 version += toString(icc_mayor); 65 version += "." + toString(icc_minor); 66 #if defined(__INTEL_COMPILER_UPDATE) 67 version += "." + toString(__INTEL_COMPILER_UPDATE); 68 #endif 69 return version; 70 #elif defined(__clang__) 71 return "CLANG " __clang_version__; 72 #elif defined (__GNUC__) 73 return "GCC " __VERSION__; 74 #elif defined(_MSC_VER) 75 std::string version = toString(_MSC_FULL_VER); 76 version.insert(4,"."); 77 version.insert(9,"."); 78 version.insert(2,"."); 79 return "Visual C++ Compiler " + version; 80 #else 81 return "Unknown Compiler"; 82 #endif 83 } 84 getCPUVendor()85 std::string getCPUVendor() 86 { 87 #if defined(__X86_ASM__) 88 int cpuinfo[4]; 89 __cpuid (cpuinfo, 0); 90 int name[4]; 91 name[0] = cpuinfo[1]; 92 name[1] = cpuinfo[3]; 93 name[2] = cpuinfo[2]; 94 name[3] = 0; 95 return (char*)name; 96 #elif defined(__ARM_NEON) 97 return "ARM"; 98 #else 99 return "Unknown"; 100 #endif 101 } 102 getCPUModel()103 CPU getCPUModel() 104 { 105 #if defined(__X86_ASM__) 106 if (getCPUVendor() != "GenuineIntel") 107 return CPU::UNKNOWN; 108 109 int out[4]; 110 __cpuid(out, 0); 111 if (out[0] < 1) return CPU::UNKNOWN; 112 __cpuid(out, 1); 113 114 /* please see CPUID documentation for these formulas */ 115 uint32_t family_ID = (out[0] >> 8) & 0x0F; 116 uint32_t extended_family_ID = (out[0] >> 20) & 0xFF; 117 118 uint32_t model_ID = (out[0] >> 4) & 0x0F; 119 uint32_t extended_model_ID = (out[0] >> 16) & 0x0F; 120 121 uint32_t DisplayFamily = family_ID; 122 if (family_ID == 0x0F) 123 DisplayFamily += extended_family_ID; 124 125 uint32_t DisplayModel = model_ID; 126 if (family_ID == 0x06 || family_ID == 0x0F) 127 DisplayModel += extended_model_ID << 4; 128 129 uint32_t DisplayFamily_DisplayModel = (DisplayFamily << 8) + (DisplayModel << 0); 130 131 // Data from Intel® 64 and IA-32 Architectures, Volume 4, Chapter 2, Table 2-1 (CPUID Signature Values of DisplayFamily_DisplayModel) 132 if (DisplayFamily_DisplayModel == 0x067D) return CPU::CORE_ICE_LAKE; 133 if (DisplayFamily_DisplayModel == 0x067E) return CPU::CORE_ICE_LAKE; 134 if (DisplayFamily_DisplayModel == 0x068C) return CPU::CORE_TIGER_LAKE; 135 if (DisplayFamily_DisplayModel == 0x06A5) return CPU::CORE_COMET_LAKE; 136 if (DisplayFamily_DisplayModel == 0x06A6) return CPU::CORE_COMET_LAKE; 137 if (DisplayFamily_DisplayModel == 0x0666) return CPU::CORE_CANNON_LAKE; 138 if (DisplayFamily_DisplayModel == 0x068E) return CPU::CORE_KABY_LAKE; 139 if (DisplayFamily_DisplayModel == 0x069E) return CPU::CORE_KABY_LAKE; 140 if (DisplayFamily_DisplayModel == 0x066A) return CPU::XEON_ICE_LAKE; 141 if (DisplayFamily_DisplayModel == 0x066C) return CPU::XEON_ICE_LAKE; 142 if (DisplayFamily_DisplayModel == 0x0655) return CPU::XEON_SKY_LAKE; 143 if (DisplayFamily_DisplayModel == 0x064E) return CPU::CORE_SKY_LAKE; 144 if (DisplayFamily_DisplayModel == 0x065E) return CPU::CORE_SKY_LAKE; 145 if (DisplayFamily_DisplayModel == 0x0656) return CPU::XEON_BROADWELL; 146 if (DisplayFamily_DisplayModel == 0x064F) return CPU::XEON_BROADWELL; 147 if (DisplayFamily_DisplayModel == 0x0647) return CPU::CORE_BROADWELL; 148 if (DisplayFamily_DisplayModel == 0x063D) return CPU::CORE_BROADWELL; 149 if (DisplayFamily_DisplayModel == 0x063F) return CPU::XEON_HASWELL; 150 if (DisplayFamily_DisplayModel == 0x063C) return CPU::CORE_HASWELL; 151 if (DisplayFamily_DisplayModel == 0x0645) return CPU::CORE_HASWELL; 152 if (DisplayFamily_DisplayModel == 0x0646) return CPU::CORE_HASWELL; 153 if (DisplayFamily_DisplayModel == 0x063E) return CPU::XEON_IVY_BRIDGE; 154 if (DisplayFamily_DisplayModel == 0x063A) return CPU::CORE_IVY_BRIDGE; 155 if (DisplayFamily_DisplayModel == 0x062D) return CPU::SANDY_BRIDGE; 156 if (DisplayFamily_DisplayModel == 0x062F) return CPU::SANDY_BRIDGE; 157 if (DisplayFamily_DisplayModel == 0x062A) return CPU::SANDY_BRIDGE; 158 if (DisplayFamily_DisplayModel == 0x062E) return CPU::NEHALEM; 159 if (DisplayFamily_DisplayModel == 0x0625) return CPU::NEHALEM; 160 if (DisplayFamily_DisplayModel == 0x062C) return CPU::NEHALEM; 161 if (DisplayFamily_DisplayModel == 0x061E) return CPU::NEHALEM; 162 if (DisplayFamily_DisplayModel == 0x061F) return CPU::NEHALEM; 163 if (DisplayFamily_DisplayModel == 0x061A) return CPU::NEHALEM; 164 if (DisplayFamily_DisplayModel == 0x061D) return CPU::NEHALEM; 165 if (DisplayFamily_DisplayModel == 0x0617) return CPU::CORE2; 166 if (DisplayFamily_DisplayModel == 0x060F) return CPU::CORE2; 167 if (DisplayFamily_DisplayModel == 0x060E) return CPU::CORE1; 168 169 if (DisplayFamily_DisplayModel == 0x0685) return CPU::XEON_PHI_KNIGHTS_MILL; 170 if (DisplayFamily_DisplayModel == 0x0657) return CPU::XEON_PHI_KNIGHTS_LANDING; 171 172 #elif defined(__ARM_NEON) 173 return CPU::ARM; 174 #endif 175 176 return CPU::UNKNOWN; 177 } 178 stringOfCPUModel(CPU model)179 std::string stringOfCPUModel(CPU model) 180 { 181 switch (model) { 182 case CPU::XEON_ICE_LAKE : return "Xeon Ice Lake"; 183 case CPU::CORE_ICE_LAKE : return "Core Ice Lake"; 184 case CPU::CORE_TIGER_LAKE : return "Core Tiger Lake"; 185 case CPU::CORE_COMET_LAKE : return "Core Comet Lake"; 186 case CPU::CORE_CANNON_LAKE : return "Core Cannon Lake"; 187 case CPU::CORE_KABY_LAKE : return "Core Kaby Lake"; 188 case CPU::XEON_SKY_LAKE : return "Xeon Sky Lake"; 189 case CPU::CORE_SKY_LAKE : return "Core Sky Lake"; 190 case CPU::XEON_PHI_KNIGHTS_MILL : return "Xeon Phi Knights Mill"; 191 case CPU::XEON_PHI_KNIGHTS_LANDING: return "Xeon Phi Knights Landing"; 192 case CPU::XEON_BROADWELL : return "Xeon Broadwell"; 193 case CPU::CORE_BROADWELL : return "Core Broadwell"; 194 case CPU::XEON_HASWELL : return "Xeon Haswell"; 195 case CPU::CORE_HASWELL : return "Core Haswell"; 196 case CPU::XEON_IVY_BRIDGE : return "Xeon Ivy Bridge"; 197 case CPU::CORE_IVY_BRIDGE : return "Core Ivy Bridge"; 198 case CPU::SANDY_BRIDGE : return "Sandy Bridge"; 199 case CPU::NEHALEM : return "Nehalem"; 200 case CPU::CORE2 : return "Core2"; 201 case CPU::CORE1 : return "Core"; 202 case CPU::ARM : return "ARM"; 203 case CPU::UNKNOWN : return "Unknown CPU"; 204 } 205 return "Unknown CPU (error)"; 206 } 207 208 #if defined(__X86_ASM__) 209 /* constants to access destination registers of CPUID instruction */ 210 static const int EAX = 0; 211 static const int EBX = 1; 212 static const int ECX = 2; 213 static const int EDX = 3; 214 215 /* cpuid[eax=1].ecx */ 216 static const int CPU_FEATURE_BIT_SSE3 = 1 << 0; 217 static const int CPU_FEATURE_BIT_SSSE3 = 1 << 9; 218 static const int CPU_FEATURE_BIT_FMA3 = 1 << 12; 219 static const int CPU_FEATURE_BIT_SSE4_1 = 1 << 19; 220 static const int CPU_FEATURE_BIT_SSE4_2 = 1 << 20; 221 //static const int CPU_FEATURE_BIT_MOVBE = 1 << 22; 222 static const int CPU_FEATURE_BIT_POPCNT = 1 << 23; 223 //static const int CPU_FEATURE_BIT_XSAVE = 1 << 26; 224 static const int CPU_FEATURE_BIT_OXSAVE = 1 << 27; 225 static const int CPU_FEATURE_BIT_AVX = 1 << 28; 226 static const int CPU_FEATURE_BIT_F16C = 1 << 29; 227 static const int CPU_FEATURE_BIT_RDRAND = 1 << 30; 228 229 /* cpuid[eax=1].edx */ 230 static const int CPU_FEATURE_BIT_SSE = 1 << 25; 231 static const int CPU_FEATURE_BIT_SSE2 = 1 << 26; 232 233 /* cpuid[eax=0x80000001].ecx */ 234 static const int CPU_FEATURE_BIT_LZCNT = 1 << 5; 235 236 /* cpuid[eax=7,ecx=0].ebx */ 237 static const int CPU_FEATURE_BIT_BMI1 = 1 << 3; 238 static const int CPU_FEATURE_BIT_AVX2 = 1 << 5; 239 static const int CPU_FEATURE_BIT_BMI2 = 1 << 8; 240 static const int CPU_FEATURE_BIT_AVX512F = 1 << 16; // AVX512F (foundation) 241 static const int CPU_FEATURE_BIT_AVX512DQ = 1 << 17; // AVX512DQ (doubleword and quadword instructions) 242 static const int CPU_FEATURE_BIT_AVX512PF = 1 << 26; // AVX512PF (prefetch gather/scatter instructions) 243 static const int CPU_FEATURE_BIT_AVX512ER = 1 << 27; // AVX512ER (exponential and reciprocal instructions) 244 static const int CPU_FEATURE_BIT_AVX512CD = 1 << 28; // AVX512CD (conflict detection instructions) 245 static const int CPU_FEATURE_BIT_AVX512BW = 1 << 30; // AVX512BW (byte and word instructions) 246 static const int CPU_FEATURE_BIT_AVX512VL = 1 << 31; // AVX512VL (vector length extensions) 247 static const int CPU_FEATURE_BIT_AVX512IFMA = 1 << 21; // AVX512IFMA (integer fused multiple-add instructions) 248 249 /* cpuid[eax=7,ecx=0].ecx */ 250 static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1; // AVX512VBMI (vector bit manipulation instructions) 251 #endif 252 253 #if defined(__X86_ASM__) get_xcr0()254 __noinline int64_t get_xcr0() 255 { 256 #if defined (__WIN32__) 257 int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32 258 xcr0 = _xgetbv(0); 259 return xcr0; 260 #else 261 int xcr0 = 0; 262 __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); 263 return xcr0; 264 #endif 265 } 266 #endif 267 getCPUFeatures()268 int getCPUFeatures() 269 { 270 #if defined(__X86_ASM__) 271 /* cache CPU features access */ 272 static int cpu_features = 0; 273 if (cpu_features) 274 return cpu_features; 275 276 /* get number of CPUID leaves */ 277 int cpuid_leaf0[4]; 278 __cpuid(cpuid_leaf0, 0x00000000); 279 unsigned nIds = cpuid_leaf0[EAX]; 280 281 /* get number of extended CPUID leaves */ 282 int cpuid_leafe[4]; 283 __cpuid(cpuid_leafe, 0x80000000); 284 unsigned nExIds = cpuid_leafe[EAX]; 285 286 /* get CPUID leaves for EAX = 1,7, and 0x80000001 */ 287 int cpuid_leaf_1[4] = { 0,0,0,0 }; 288 int cpuid_leaf_7[4] = { 0,0,0,0 }; 289 int cpuid_leaf_e1[4] = { 0,0,0,0 }; 290 if (nIds >= 1) __cpuid (cpuid_leaf_1,0x00000001); 291 #if _WIN32 292 #if _MSC_VER && (_MSC_FULL_VER < 160040219) 293 #else 294 if (nIds >= 7) __cpuidex(cpuid_leaf_7,0x00000007,0); 295 #endif 296 #else 297 if (nIds >= 7) __cpuid_count(cpuid_leaf_7,0x00000007,0); 298 #endif 299 if (nExIds >= 0x80000001) __cpuid(cpuid_leaf_e1,0x80000001); 300 301 /* detect if OS saves XMM, YMM, and ZMM states */ 302 bool xmm_enabled = true; 303 bool ymm_enabled = false; 304 bool zmm_enabled = false; 305 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_OXSAVE) { 306 int64_t xcr0 = get_xcr0(); 307 xmm_enabled = ((xcr0 & 0x02) == 0x02); /* checks if xmm are enabled in XCR0 */ 308 ymm_enabled = xmm_enabled && ((xcr0 & 0x04) == 0x04); /* checks if ymm state are enabled in XCR0 */ 309 zmm_enabled = ymm_enabled && ((xcr0 & 0xE0) == 0xE0); /* checks if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled in XCR0 */ 310 } 311 if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED; 312 if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED; 313 if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED; 314 315 if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE ) cpu_features |= CPU_FEATURE_SSE; 316 if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE2 ) cpu_features |= CPU_FEATURE_SSE2; 317 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE3 ) cpu_features |= CPU_FEATURE_SSE3; 318 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSSE3 ) cpu_features |= CPU_FEATURE_SSSE3; 319 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_1) cpu_features |= CPU_FEATURE_SSE41; 320 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_2) cpu_features |= CPU_FEATURE_SSE42; 321 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_POPCNT) cpu_features |= CPU_FEATURE_POPCNT; 322 323 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_AVX ) cpu_features |= CPU_FEATURE_AVX; 324 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_F16C ) cpu_features |= CPU_FEATURE_F16C; 325 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_RDRAND) cpu_features |= CPU_FEATURE_RDRAND; 326 if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX2 ) cpu_features |= CPU_FEATURE_AVX2; 327 if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_FMA3 ) cpu_features |= CPU_FEATURE_FMA3; 328 if (cpuid_leaf_e1[ECX] & CPU_FEATURE_BIT_LZCNT) cpu_features |= CPU_FEATURE_LZCNT; 329 if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI1 ) cpu_features |= CPU_FEATURE_BMI1; 330 if (cpuid_leaf_7 [EBX] & CPU_FEATURE_BIT_BMI2 ) cpu_features |= CPU_FEATURE_BMI2; 331 332 if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512F ) cpu_features |= CPU_FEATURE_AVX512F; 333 if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512DQ ) cpu_features |= CPU_FEATURE_AVX512DQ; 334 if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512PF ) cpu_features |= CPU_FEATURE_AVX512PF; 335 if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER ) cpu_features |= CPU_FEATURE_AVX512ER; 336 if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512CD ) cpu_features |= CPU_FEATURE_AVX512CD; 337 if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512BW ) cpu_features |= CPU_FEATURE_AVX512BW; 338 if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA; 339 if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL; 340 if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI; 341 342 return cpu_features; 343 #elif defined(__ARM_NEON) 344 /* emulated features with sse2neon */ 345 return CPU_FEATURE_SSE|CPU_FEATURE_SSE2|CPU_FEATURE_XMM_ENABLED; 346 #else 347 /* Unknown CPU. */ 348 return 0; 349 #endif 350 } 351 stringOfCPUFeatures(int features)352 std::string stringOfCPUFeatures(int features) 353 { 354 std::string str; 355 if (features & CPU_FEATURE_XMM_ENABLED) str += "XMM "; 356 if (features & CPU_FEATURE_YMM_ENABLED) str += "YMM "; 357 if (features & CPU_FEATURE_ZMM_ENABLED) str += "ZMM "; 358 if (features & CPU_FEATURE_SSE ) str += "SSE "; 359 if (features & CPU_FEATURE_SSE2 ) str += "SSE2 "; 360 if (features & CPU_FEATURE_SSE3 ) str += "SSE3 "; 361 if (features & CPU_FEATURE_SSSE3 ) str += "SSSE3 "; 362 if (features & CPU_FEATURE_SSE41 ) str += "SSE4.1 "; 363 if (features & CPU_FEATURE_SSE42 ) str += "SSE4.2 "; 364 if (features & CPU_FEATURE_POPCNT) str += "POPCNT "; 365 if (features & CPU_FEATURE_AVX ) str += "AVX "; 366 if (features & CPU_FEATURE_F16C ) str += "F16C "; 367 if (features & CPU_FEATURE_RDRAND) str += "RDRAND "; 368 if (features & CPU_FEATURE_AVX2 ) str += "AVX2 "; 369 if (features & CPU_FEATURE_FMA3 ) str += "FMA3 "; 370 if (features & CPU_FEATURE_LZCNT ) str += "LZCNT "; 371 if (features & CPU_FEATURE_BMI1 ) str += "BMI1 "; 372 if (features & CPU_FEATURE_BMI2 ) str += "BMI2 "; 373 if (features & CPU_FEATURE_AVX512F) str += "AVX512F "; 374 if (features & CPU_FEATURE_AVX512DQ) str += "AVX512DQ "; 375 if (features & CPU_FEATURE_AVX512PF) str += "AVX512PF "; 376 if (features & CPU_FEATURE_AVX512ER) str += "AVX512ER "; 377 if (features & CPU_FEATURE_AVX512CD) str += "AVX512CD "; 378 if (features & CPU_FEATURE_AVX512BW) str += "AVX512BW "; 379 if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL "; 380 if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA "; 381 if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI "; 382 return str; 383 } 384 stringOfISA(int isa)385 std::string stringOfISA (int isa) 386 { 387 if (isa == SSE) return "SSE"; 388 if (isa == SSE2) return "SSE2"; 389 if (isa == SSE3) return "SSE3"; 390 if (isa == SSSE3) return "SSSE3"; 391 if (isa == SSE41) return "SSE4.1"; 392 if (isa == SSE42) return "SSE4.2"; 393 if (isa == AVX) return "AVX"; 394 if (isa == AVX2) return "AVX2"; 395 if (isa == AVX512) return "AVX512"; 396 return "UNKNOWN"; 397 } 398 hasISA(int features,int isa)399 bool hasISA(int features, int isa) { 400 return (features & isa) == isa; 401 } 402 supportedTargetList(int features)403 std::string supportedTargetList (int features) 404 { 405 std::string v; 406 if (hasISA(features,SSE)) v += "SSE "; 407 if (hasISA(features,SSE2)) v += "SSE2 "; 408 if (hasISA(features,SSE3)) v += "SSE3 "; 409 if (hasISA(features,SSSE3)) v += "SSSE3 "; 410 if (hasISA(features,SSE41)) v += "SSE4.1 "; 411 if (hasISA(features,SSE42)) v += "SSE4.2 "; 412 if (hasISA(features,AVX)) v += "AVX "; 413 if (hasISA(features,AVXI)) v += "AVXI "; 414 if (hasISA(features,AVX2)) v += "AVX2 "; 415 if (hasISA(features,AVX512)) v += "AVX512 "; 416 return v; 417 } 418 } 419 420 //////////////////////////////////////////////////////////////////////////////// 421 /// Windows Platform 422 //////////////////////////////////////////////////////////////////////////////// 423 424 #if defined(__WIN32__) 425 426 #define WIN32_LEAN_AND_MEAN 427 #include <windows.h> 428 #include <psapi.h> 429 430 namespace embree 431 { getExecutableFileName()432 std::string getExecutableFileName() { 433 char filename[1024]; 434 if (!GetModuleFileName(nullptr, filename, sizeof(filename))) 435 return std::string(); 436 return std::string(filename); 437 } 438 getNumberOfLogicalThreads()439 unsigned int getNumberOfLogicalThreads() 440 { 441 static int nThreads = -1; 442 if (nThreads != -1) return nThreads; 443 444 typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)(); 445 typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD); 446 HMODULE hlib = LoadLibrary("Kernel32"); 447 GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount"); 448 GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc) GetProcAddress(hlib, "GetActiveProcessorCount"); 449 450 if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount) 451 { 452 int groups = pGetActiveProcessorGroupCount(); 453 int totalProcessors = 0; 454 for (int i = 0; i < groups; i++) 455 totalProcessors += pGetActiveProcessorCount(i); 456 nThreads = totalProcessors; 457 } 458 else 459 { 460 SYSTEM_INFO sysinfo; 461 GetSystemInfo(&sysinfo); 462 nThreads = sysinfo.dwNumberOfProcessors; 463 } 464 assert(nThreads); 465 return nThreads; 466 } 467 getTerminalWidth()468 int getTerminalWidth() 469 { 470 HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE); 471 if (handle == INVALID_HANDLE_VALUE) return 80; 472 CONSOLE_SCREEN_BUFFER_INFO info; 473 memset(&info,0,sizeof(info)); 474 GetConsoleScreenBufferInfo(handle, &info); 475 return info.dwSize.X; 476 } 477 getSeconds()478 double getSeconds() 479 { 480 LARGE_INTEGER freq, val; 481 QueryPerformanceFrequency(&freq); 482 QueryPerformanceCounter(&val); 483 return (double)val.QuadPart / (double)freq.QuadPart; 484 } 485 sleepSeconds(double t)486 void sleepSeconds(double t) { 487 Sleep(DWORD(1000.0*t)); 488 } 489 getVirtualMemoryBytes()490 size_t getVirtualMemoryBytes() 491 { 492 PROCESS_MEMORY_COUNTERS info; 493 GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) ); 494 return (size_t)info.QuotaPeakPagedPoolUsage; 495 } 496 getResidentMemoryBytes()497 size_t getResidentMemoryBytes() 498 { 499 PROCESS_MEMORY_COUNTERS info; 500 GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) ); 501 return (size_t)info.WorkingSetSize; 502 } 503 } 504 #endif 505 506 //////////////////////////////////////////////////////////////////////////////// 507 /// Linux Platform 508 //////////////////////////////////////////////////////////////////////////////// 509 510 #if defined(__LINUX__) 511 512 #include <stdio.h> 513 #include <unistd.h> 514 515 namespace embree 516 { getExecutableFileName()517 std::string getExecutableFileName() 518 { 519 std::string pid = "/proc/" + toString(getpid()) + "/exe"; 520 char buf[4096]; 521 memset(buf,0,sizeof(buf)); 522 if (readlink(pid.c_str(), buf, sizeof(buf)-1) == -1) 523 return std::string(); 524 return std::string(buf); 525 } 526 getVirtualMemoryBytes()527 size_t getVirtualMemoryBytes() 528 { 529 size_t virt, resident, shared; 530 std::ifstream buffer("/proc/self/statm"); 531 buffer >> virt >> resident >> shared; 532 return virt*sysconf(_SC_PAGE_SIZE); 533 } 534 getResidentMemoryBytes()535 size_t getResidentMemoryBytes() 536 { 537 size_t virt, resident, shared; 538 std::ifstream buffer("/proc/self/statm"); 539 buffer >> virt >> resident >> shared; 540 return resident*sysconf(_SC_PAGE_SIZE); 541 } 542 } 543 544 #endif 545 546 //////////////////////////////////////////////////////////////////////////////// 547 /// DragonFly Platform 548 //////////////////////////////////////////////////////////////////////////////// 549 550 #ifdef __DragonFly__ 551 552 #include <sys/sysctl.h> 553 554 namespace embree 555 { getExecutableFileName()556 std::string getExecutableFileName() 557 { 558 const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; 559 char buf[1024]; 560 size_t len = sizeof(buf); 561 if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1) 562 return std::string(); 563 return std::string(buf); 564 } 565 getVirtualMemoryBytes()566 size_t getVirtualMemoryBytes() { 567 return 0; 568 } 569 getResidentMemoryBytes()570 size_t getResidentMemoryBytes() { 571 return 0; 572 } 573 } 574 575 #endif 576 577 //////////////////////////////////////////////////////////////////////////////// 578 /// FreeBSD Platform 579 //////////////////////////////////////////////////////////////////////////////// 580 581 #if defined (__FreeBSD__) 582 583 #include <sys/sysctl.h> 584 585 namespace embree 586 { getExecutableFileName()587 std::string getExecutableFileName() 588 { 589 const int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; 590 char buf[4096]; 591 memset(buf,0,sizeof(buf)); 592 size_t len = sizeof(buf)-1; 593 if (sysctl(mib, 4, buf, &len, 0x0, 0) == -1) 594 return std::string(); 595 return std::string(buf); 596 } 597 getVirtualMemoryBytes()598 size_t getVirtualMemoryBytes() { 599 return 0; 600 } 601 getResidentMemoryBytes()602 size_t getResidentMemoryBytes() { 603 return 0; 604 } 605 } 606 607 #endif 608 609 //////////////////////////////////////////////////////////////////////////////// 610 /// Mac OS X Platform 611 //////////////////////////////////////////////////////////////////////////////// 612 613 #if defined(__MACOSX__) 614 615 #include <mach-o/dyld.h> 616 617 namespace embree 618 { getExecutableFileName()619 std::string getExecutableFileName() 620 { 621 char buf[4096]; 622 uint32_t size = sizeof(buf); 623 if (_NSGetExecutablePath(buf, &size) != 0) 624 return std::string(); 625 return std::string(buf); 626 } 627 getVirtualMemoryBytes()628 size_t getVirtualMemoryBytes() { 629 return 0; 630 } 631 getResidentMemoryBytes()632 size_t getResidentMemoryBytes() { 633 return 0; 634 } 635 } 636 637 #endif 638 639 //////////////////////////////////////////////////////////////////////////////// 640 /// Unix Platform 641 //////////////////////////////////////////////////////////////////////////////// 642 643 #if defined(__UNIX__) 644 645 #include <unistd.h> 646 #include <sys/ioctl.h> 647 #include <sys/time.h> 648 #include <pthread.h> 649 650 namespace embree 651 { getNumberOfLogicalThreads()652 unsigned int getNumberOfLogicalThreads() 653 { 654 static int nThreads = -1; 655 if (nThreads != -1) return nThreads; 656 657 #if defined(__MACOSX__) 658 nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container 659 assert(nThreads); 660 #else 661 cpu_set_t set; 662 if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0) 663 nThreads = CPU_COUNT(&set); 664 #endif 665 666 assert(nThreads); 667 return nThreads; 668 } 669 getTerminalWidth()670 int getTerminalWidth() 671 { 672 struct winsize info; 673 if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &info) < 0) return 80; 674 return info.ws_col; 675 } 676 getSeconds()677 double getSeconds() { 678 struct timeval tp; gettimeofday(&tp,nullptr); 679 return double(tp.tv_sec) + double(tp.tv_usec)/1E6; 680 } 681 sleepSeconds(double t)682 void sleepSeconds(double t) { 683 usleep(1000000.0*t); 684 } 685 } 686 #endif 687 688