1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "internal_macros.h"
16 
17 #ifdef BENCHMARK_OS_WINDOWS
18 #include <shlwapi.h>
19 #undef StrCat  // Don't let StrCat in string_util.h be renamed to lstrcatA
20 #include <versionhelpers.h>
21 #include <windows.h>
22 #else
23 #include <fcntl.h>
24 #ifndef BENCHMARK_OS_FUCHSIA
25 #include <sys/resource.h>
26 #endif
27 #include <sys/time.h>
28 #include <sys/types.h>  // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
29 #include <unistd.h>
30 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
31     defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD
32 #define BENCHMARK_HAS_SYSCTL
33 #include <sys/sysctl.h>
34 #endif
35 #endif
36 #if defined(BENCHMARK_OS_SOLARIS)
37 #include <kstat.h>
38 #endif
39 
40 #include <algorithm>
41 #include <array>
42 #include <bitset>
43 #include <cerrno>
44 #include <climits>
45 #include <cstdint>
46 #include <cstdio>
47 #include <cstdlib>
48 #include <cstring>
49 #include <fstream>
50 #include <iostream>
51 #include <iterator>
52 #include <limits>
53 #include <memory>
54 #include <sstream>
55 
56 #include "check.h"
57 #include "cycleclock.h"
58 #include "internal_macros.h"
59 #include "log.h"
60 #include "sleep.h"
61 #include "string_util.h"
62 
63 namespace benchmark {
64 namespace {
65 
PrintImp(std::ostream & out)66 void PrintImp(std::ostream& out) { out << std::endl; }
67 
68 template <class First, class... Rest>
PrintImp(std::ostream & out,First && f,Rest &&...rest)69 void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
70   out << std::forward<First>(f);
71   PrintImp(out, std::forward<Rest>(rest)...);
72 }
73 
74 template <class... Args>
PrintErrorAndDie(Args &&...args)75 BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
76   PrintImp(std::cerr, std::forward<Args>(args)...);
77   std::exit(EXIT_FAILURE);
78 }
79 
80 #ifdef BENCHMARK_HAS_SYSCTL
81 
82 /// ValueUnion - A type used to correctly alias the byte-for-byte output of
83 /// `sysctl` with the result type it's to be interpreted as.
84 struct ValueUnion {
85   union DataT {
86     uint32_t uint32_value;
87     uint64_t uint64_value;
88     // For correct aliasing of union members from bytes.
89     char bytes[8];
90   };
91   using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
92 
93   // The size of the data union member + its trailing array size.
94   size_t Size;
95   DataPtr Buff;
96 
97  public:
ValueUnionbenchmark::__anon14e299510111::ValueUnion98   ValueUnion() : Size(0), Buff(nullptr, &std::free) {}
99 
ValueUnionbenchmark::__anon14e299510111::ValueUnion100   explicit ValueUnion(size_t BuffSize)
101       : Size(sizeof(DataT) + BuffSize),
102         Buff(::new (std::malloc(Size)) DataT(), &std::free) {}
103 
104   ValueUnion(ValueUnion&& other) = default;
105 
operator boolbenchmark::__anon14e299510111::ValueUnion106   explicit operator bool() const { return bool(Buff); }
107 
databenchmark::__anon14e299510111::ValueUnion108   char* data() const { return Buff->bytes; }
109 
GetAsStringbenchmark::__anon14e299510111::ValueUnion110   std::string GetAsString() const { return std::string(data()); }
111 
GetAsIntegerbenchmark::__anon14e299510111::ValueUnion112   int64_t GetAsInteger() const {
113     if (Size == sizeof(Buff->uint32_value))
114       return static_cast<int32_t>(Buff->uint32_value);
115     else if (Size == sizeof(Buff->uint64_value))
116       return static_cast<int64_t>(Buff->uint64_value);
117     BENCHMARK_UNREACHABLE();
118   }
119 
GetAsUnsignedbenchmark::__anon14e299510111::ValueUnion120   uint64_t GetAsUnsigned() const {
121     if (Size == sizeof(Buff->uint32_value))
122       return Buff->uint32_value;
123     else if (Size == sizeof(Buff->uint64_value))
124       return Buff->uint64_value;
125     BENCHMARK_UNREACHABLE();
126   }
127 
128   template <class T, int N>
GetAsArraybenchmark::__anon14e299510111::ValueUnion129   std::array<T, N> GetAsArray() {
130     const int ArrSize = sizeof(T) * N;
131     CHECK_LE(ArrSize, Size);
132     std::array<T, N> Arr;
133     std::memcpy(Arr.data(), data(), ArrSize);
134     return Arr;
135   }
136 };
137 
GetSysctlImp(std::string const & Name)138 ValueUnion GetSysctlImp(std::string const& Name) {
139 #if defined BENCHMARK_OS_OPENBSD
140   int mib[2];
141 
142   mib[0] = CTL_HW;
143   if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")){
144     ValueUnion buff(sizeof(int));
145 
146     if (Name == "hw.ncpu") {
147       mib[1] = HW_NCPU;
148     } else {
149       mib[1] = HW_CPUSPEED;
150     }
151 
152     if (sysctl(mib, 2, buff.data(), &buff.Size, nullptr, 0) == -1) {
153       return ValueUnion();
154     }
155     return buff;
156   }
157   return ValueUnion();
158 #else
159   size_t CurBuffSize = 0;
160   if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
161     return ValueUnion();
162 
163   ValueUnion buff(CurBuffSize);
164   if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
165     return buff;
166   return ValueUnion();
167 #endif
168 }
169 
170 BENCHMARK_MAYBE_UNUSED
GetSysctl(std::string const & Name,std::string * Out)171 bool GetSysctl(std::string const& Name, std::string* Out) {
172   Out->clear();
173   auto Buff = GetSysctlImp(Name);
174   if (!Buff) return false;
175   Out->assign(Buff.data());
176   return true;
177 }
178 
179 template <class Tp, class = std::enable_if_t<std::is_integral<Tp>::value>>
GetSysctl(std::string const & Name,Tp * Out)180 bool GetSysctl(std::string const &Name, Tp *Out) {
181   *Out = 0;
182   auto Buff = GetSysctlImp(Name);
183   if (!Buff) return false;
184   *Out = static_cast<Tp>(Buff.GetAsUnsigned());
185   return true;
186 }
187 
188 template <class Tp, size_t N>
GetSysctl(std::string const & Name,std::array<Tp,N> * Out)189 bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) {
190   auto Buff = GetSysctlImp(Name);
191   if (!Buff) return false;
192   *Out = Buff.GetAsArray<Tp, N>();
193   return true;
194 }
195 #endif
196 
197 template <class ArgT>
ReadFromFile(std::string const & fname,ArgT * arg)198 bool ReadFromFile(std::string const& fname, ArgT* arg) {
199   *arg = ArgT();
200   std::ifstream f(fname.c_str());
201   if (!f.is_open()) return false;
202   f >> *arg;
203   return f.good();
204 }
205 
CpuScalingEnabled(int num_cpus)206 bool CpuScalingEnabled(int num_cpus) {
207   // We don't have a valid CPU count, so don't even bother.
208   if (num_cpus <= 0) return false;
209 #ifndef BENCHMARK_OS_WINDOWS
210   // On Linux, the CPUfreq subsystem exposes CPU information as files on the
211   // local file system. If reading the exported files fails, then we may not be
212   // running on Linux, so we silently ignore all the read errors.
213   std::string res;
214   for (int cpu = 0; cpu < num_cpus; ++cpu) {
215     std::string governor_file =
216         StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
217     if (ReadFromFile(governor_file, &res) && res != "performance") return true;
218   }
219 #endif
220   return false;
221 }
222 
CountSetBitsInCPUMap(std::string Val)223 int CountSetBitsInCPUMap(std::string Val) {
224   auto CountBits = [](std::string Part) {
225     using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
226     Part = "0x" + Part;
227     CPUMask Mask(std::stoul(Part, nullptr, 16));
228     return static_cast<int>(Mask.count());
229   };
230   size_t Pos;
231   int total = 0;
232   while ((Pos = Val.find(',')) != std::string::npos) {
233     total += CountBits(Val.substr(0, Pos));
234     Val = Val.substr(Pos + 1);
235   }
236   if (!Val.empty()) {
237     total += CountBits(Val);
238   }
239   return total;
240 }
241 
242 BENCHMARK_MAYBE_UNUSED
GetCacheSizesFromKVFS()243 std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
244   std::vector<CPUInfo::CacheInfo> res;
245   std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
246   int Idx = 0;
247   while (true) {
248     CPUInfo::CacheInfo info;
249     std::string FPath = StrCat(dir, "index", Idx++, "/");
250     std::ifstream f(StrCat(FPath, "size").c_str());
251     if (!f.is_open()) break;
252     std::string suffix;
253     f >> info.size;
254     if (f.fail())
255       PrintErrorAndDie("Failed while reading file '", FPath, "size'");
256     if (f.good()) {
257       f >> suffix;
258       if (f.bad())
259         PrintErrorAndDie(
260             "Invalid cache size format: failed to read size suffix");
261       else if (f && suffix != "K")
262         PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
263       else if (suffix == "K")
264         info.size *= 1000;
265     }
266     if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
267       PrintErrorAndDie("Failed to read from file ", FPath, "type");
268     if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
269       PrintErrorAndDie("Failed to read from file ", FPath, "level");
270     std::string map_str;
271     if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str))
272       PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map");
273     info.num_sharing = CountSetBitsInCPUMap(map_str);
274     res.push_back(info);
275   }
276 
277   return res;
278 }
279 
280 #ifdef BENCHMARK_OS_MACOSX
GetCacheSizesMacOSX()281 std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
282   std::vector<CPUInfo::CacheInfo> res;
283   std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}};
284   GetSysctl("hw.cacheconfig", &CacheCounts);
285 
286   struct {
287     std::string name;
288     std::string type;
289     int level;
290     uint64_t num_sharing;
291   } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]},
292                {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]},
293                {"hw.l2cachesize", "Unified", 2, CacheCounts[2]},
294                {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}};
295   for (auto& C : Cases) {
296     int val;
297     if (!GetSysctl(C.name, &val)) continue;
298     CPUInfo::CacheInfo info;
299     info.type = C.type;
300     info.level = C.level;
301     info.size = val;
302     info.num_sharing = static_cast<int>(C.num_sharing);
303     res.push_back(std::move(info));
304   }
305   return res;
306 }
307 #elif defined(BENCHMARK_OS_WINDOWS)
GetCacheSizesWindows()308 std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
309   std::vector<CPUInfo::CacheInfo> res;
310   DWORD buffer_size = 0;
311   using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
312   using CInfo = CACHE_DESCRIPTOR;
313 
314   using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
315   GetLogicalProcessorInformation(nullptr, &buffer_size);
316   UPtr buff((PInfo*)malloc(buffer_size), &std::free);
317   if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
318     PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
319                      GetLastError());
320 
321   PInfo* it = buff.get();
322   PInfo* end = buff.get() + (buffer_size / sizeof(PInfo));
323 
324   for (; it != end; ++it) {
325     if (it->Relationship != RelationCache) continue;
326     using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
327     BitSet B(it->ProcessorMask);
328     // To prevent duplicates, only consider caches where CPU 0 is specified
329     if (!B.test(0)) continue;
330     CInfo* Cache = &it->Cache;
331     CPUInfo::CacheInfo C;
332     C.num_sharing = static_cast<int>(B.count());
333     C.level = Cache->Level;
334     C.size = Cache->Size;
335     C.type = "Unknown";
336     switch (Cache->Type) {
337       case CacheUnified:
338         C.type = "Unified";
339         break;
340       case CacheInstruction:
341         C.type = "Instruction";
342         break;
343       case CacheData:
344         C.type = "Data";
345         break;
346       case CacheTrace:
347         C.type = "Trace";
348         break;
349     }
350     res.push_back(C);
351   }
352   return res;
353 }
354 #endif
355 
GetCacheSizes()356 std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
357 #ifdef BENCHMARK_OS_MACOSX
358   return GetCacheSizesMacOSX();
359 #elif defined(BENCHMARK_OS_WINDOWS)
360   return GetCacheSizesWindows();
361 #else
362   return GetCacheSizesFromKVFS();
363 #endif
364 }
365 
GetNumCPUs()366 int GetNumCPUs() {
367 #ifdef BENCHMARK_HAS_SYSCTL
368   int NumCPU = -1;
369   if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU;
370   fprintf(stderr, "Err: %s\n", strerror(errno));
371   std::exit(EXIT_FAILURE);
372 #elif defined(BENCHMARK_OS_WINDOWS)
373   SYSTEM_INFO sysinfo;
374   // Use memset as opposed to = {} to avoid GCC missing initializer false
375   // positives.
376   std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
377   GetSystemInfo(&sysinfo);
378   return sysinfo.dwNumberOfProcessors;  // number of logical
379                                         // processors in the current
380                                         // group
381 #elif defined(BENCHMARK_OS_SOLARIS)
382   // Returns -1 in case of a failure.
383   int NumCPU = sysconf(_SC_NPROCESSORS_ONLN);
384   if (NumCPU < 0) {
385     fprintf(stderr,
386             "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n",
387             strerror(errno));
388   }
389   return NumCPU;
390 #else
391   int NumCPUs = 0;
392   int MaxID = -1;
393   std::ifstream f("/proc/cpuinfo");
394   if (!f.is_open()) {
395     std::cerr << "failed to open /proc/cpuinfo\n";
396     return -1;
397   }
398   const std::string Key = "processor";
399   std::string ln;
400   while (std::getline(f, ln)) {
401     if (ln.empty()) continue;
402     size_t SplitIdx = ln.find(':');
403     std::string value;
404     if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
405     if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
406       NumCPUs++;
407       if (!value.empty()) {
408         int CurID = std::stoi(value);
409         MaxID = std::max(CurID, MaxID);
410       }
411     }
412   }
413   if (f.bad()) {
414     std::cerr << "Failure reading /proc/cpuinfo\n";
415     return -1;
416   }
417   if (!f.eof()) {
418     std::cerr << "Failed to read to end of /proc/cpuinfo\n";
419     return -1;
420   }
421   f.close();
422 
423   if ((MaxID + 1) != NumCPUs) {
424     fprintf(stderr,
425             "CPU ID assignments in /proc/cpuinfo seem messed up."
426             " This is usually caused by a bad BIOS.\n");
427   }
428   return NumCPUs;
429 #endif
430   BENCHMARK_UNREACHABLE();
431 }
432 
GetCPUCyclesPerSecond()433 double GetCPUCyclesPerSecond() {
434 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
435   long freq;
436 
437   // If the kernel is exporting the tsc frequency use that. There are issues
438   // where cpuinfo_max_freq cannot be relied on because the BIOS may be
439   // exporintg an invalid p-state (on x86) or p-states may be used to put the
440   // processor in a new mode (turbo mode). Essentially, those frequencies
441   // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
442   // well.
443   if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)
444       // If CPU scaling is in effect, we want to use the *maximum* frequency,
445       // not whatever CPU speed some random processor happens to be using now.
446       || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
447                       &freq)) {
448     // The value is in kHz (as the file name suggests).  For example, on a
449     // 2GHz warpstation, the file contains the value "2000000".
450     return freq * 1000.0;
451   }
452 
453   const double error_value = -1;
454   double bogo_clock = error_value;
455 
456   std::ifstream f("/proc/cpuinfo");
457   if (!f.is_open()) {
458     std::cerr << "failed to open /proc/cpuinfo\n";
459     return error_value;
460   }
461 
462   auto startsWithKey = [](std::string const& Value, std::string const& Key) {
463     if (Key.size() > Value.size()) return false;
464     auto Cmp = [&](char X, char Y) {
465       return std::tolower(X) == std::tolower(Y);
466     };
467     return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp);
468   };
469 
470   std::string ln;
471   while (std::getline(f, ln)) {
472     if (ln.empty()) continue;
473     size_t SplitIdx = ln.find(':');
474     std::string value;
475     if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
476     // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
477     // accept positive values. Some environments (virtual machines) report zero,
478     // which would cause infinite looping in WallTime_Init.
479     if (startsWithKey(ln, "cpu MHz")) {
480       if (!value.empty()) {
481         double cycles_per_second = std::stod(value) * 1000000.0;
482         if (cycles_per_second > 0) return cycles_per_second;
483       }
484     } else if (startsWithKey(ln, "bogomips")) {
485       if (!value.empty()) {
486         bogo_clock = std::stod(value) * 1000000.0;
487         if (bogo_clock < 0.0) bogo_clock = error_value;
488       }
489     }
490   }
491   if (f.bad()) {
492     std::cerr << "Failure reading /proc/cpuinfo\n";
493     return error_value;
494   }
495   if (!f.eof()) {
496     std::cerr << "Failed to read to end of /proc/cpuinfo\n";
497     return error_value;
498   }
499   f.close();
500   // If we found the bogomips clock, but nothing better, we'll use it (but
501   // we're not happy about it); otherwise, fallback to the rough estimation
502   // below.
503   if (bogo_clock >= 0.0) return bogo_clock;
504 
505 #elif defined BENCHMARK_HAS_SYSCTL
506   constexpr auto* FreqStr =
507 #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
508       "machdep.tsc_freq";
509 #elif defined BENCHMARK_OS_OPENBSD
510       "hw.cpuspeed";
511 #else
512       "hw.cpufrequency";
513 #endif
514   unsigned long long hz = 0;
515 #if defined BENCHMARK_OS_OPENBSD
516   if (GetSysctl(FreqStr, &hz)) return hz * 1000000;
517 #else
518   if (GetSysctl(FreqStr, &hz)) return hz;
519 #endif
520   fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
521           FreqStr, strerror(errno));
522 
523 #elif defined BENCHMARK_OS_WINDOWS
524   // In NT, read MHz from the registry. If we fail to do so or we're in win9x
525   // then make a crude estimate.
526   DWORD data, data_size = sizeof(data);
527   if (IsWindowsXPOrGreater() &&
528       SUCCEEDED(
529           SHGetValueA(HKEY_LOCAL_MACHINE,
530                       "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
531                       "~MHz", nullptr, &data, &data_size)))
532     return static_cast<double>((int64_t)data *
533                                (int64_t)(1000 * 1000));  // was mhz
534 #elif defined (BENCHMARK_OS_SOLARIS)
535   kstat_ctl_t *kc = kstat_open();
536   if (!kc) {
537     std::cerr << "failed to open /dev/kstat\n";
538     return -1;
539   }
540   kstat_t *ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0");
541   if (!ksp) {
542     std::cerr << "failed to lookup in /dev/kstat\n";
543     return -1;
544   }
545   if (kstat_read(kc, ksp, NULL) < 0) {
546     std::cerr << "failed to read from /dev/kstat\n";
547     return -1;
548   }
549   kstat_named_t *knp =
550       (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz");
551   if (!knp) {
552     std::cerr << "failed to lookup data in /dev/kstat\n";
553     return -1;
554   }
555   if (knp->data_type != KSTAT_DATA_UINT64) {
556     std::cerr << "current_clock_Hz is of unexpected data type: "
557               << knp->data_type << "\n";
558     return -1;
559   }
560   double clock_hz = knp->value.ui64;
561   kstat_close(kc);
562   return clock_hz;
563 #endif
564   // If we've fallen through, attempt to roughly estimate the CPU clock rate.
565   const int estimate_time_ms = 1000;
566   const auto start_ticks = cycleclock::Now();
567   SleepForMilliseconds(estimate_time_ms);
568   return static_cast<double>(cycleclock::Now() - start_ticks);
569 }
570 
571 }  // end namespace
572 
Get()573 const CPUInfo& CPUInfo::Get() {
574   static const CPUInfo* info = new CPUInfo();
575   return *info;
576 }
577 
CPUInfo()578 CPUInfo::CPUInfo()
579     : num_cpus(GetNumCPUs()),
580       cycles_per_second(GetCPUCyclesPerSecond()),
581       caches(GetCacheSizes()),
582       scaling_enabled(CpuScalingEnabled(num_cpus)) {}
583 
584 }  // end namespace benchmark
585