1 /** @file kmp_stats.cpp
2  * Statistics gathering and processing.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_lock.h"
15 #include "kmp_stats.h"
16 #include "kmp_str.h"
17 
18 #include <algorithm>
19 #include <ctime>
20 #include <iomanip>
21 #include <sstream>
22 #include <stdlib.h> // for atexit
23 #include <cmath>
24 
25 #define STRINGIZE2(x) #x
26 #define STRINGIZE(x) STRINGIZE2(x)
27 
28 #define expandName(name, flags, ignore) {STRINGIZE(name), flags},
29 statInfo timeStat::timerInfo[] = {
30     KMP_FOREACH_TIMER(expandName, 0){"TIMER_LAST", 0}};
31 const statInfo counter::counterInfo[] = {
32     KMP_FOREACH_COUNTER(expandName, 0){"COUNTER_LAST", 0}};
33 #undef expandName
34 
35 #define expandName(ignore1, ignore2, ignore3) {0.0, 0.0, 0.0},
36 kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
37     KMP_FOREACH_TIMER(expandName, 0){0.0, 0.0, 0.0}};
38 #undef expandName
39 
40 const kmp_stats_output_module::rgb_color
41     kmp_stats_output_module::globalColorArray[] = {
42         {1.0, 0.0, 0.0}, // red
43         {1.0, 0.6, 0.0}, // orange
44         {1.0, 1.0, 0.0}, // yellow
45         {0.0, 1.0, 0.0}, // green
46         {0.0, 0.0, 1.0}, // blue
47         {0.6, 0.2, 0.8}, // purple
48         {1.0, 0.0, 1.0}, // magenta
49         {0.0, 0.4, 0.2}, // dark green
50         {1.0, 1.0, 0.6}, // light yellow
51         {0.6, 0.4, 0.6}, // dirty purple
52         {0.0, 1.0, 1.0}, // cyan
53         {1.0, 0.4, 0.8}, // pink
54         {0.5, 0.5, 0.5}, // grey
55         {0.8, 0.7, 0.5}, // brown
56         {0.6, 0.6, 1.0}, // light blue
57         {1.0, 0.7, 0.5}, // peach
58         {0.8, 0.5, 1.0}, // lavender
59         {0.6, 0.0, 0.0}, // dark red
60         {0.7, 0.6, 0.0}, // gold
61         {0.0, 0.0, 0.0} // black
62 };
63 
64 // Ensure that the atexit handler only runs once.
65 static uint32_t statsPrinted = 0;
66 
67 // output interface
68 static kmp_stats_output_module *__kmp_stats_global_output = NULL;
69 
70 double logHistogram::binMax[] = {1.e1l, 1.e2l, 1.e3l, 1.e4l, 1.e5l, 1.e6l,
71                                  1.e7l, 1.e8l, 1.e9l, 1.e10l, 1.e11l, 1.e12l,
72                                  1.e13l, 1.e14l, 1.e15l, 1.e16l, 1.e17l, 1.e18l,
73                                  1.e19l, 1.e20l, 1.e21l, 1.e22l, 1.e23l, 1.e24l,
74                                  1.e25l, 1.e26l, 1.e27l, 1.e28l, 1.e29l, 1.e30l,
75                                  // Always have infinity be the last value
76                                  std::numeric_limits<double>::infinity()};
77 
78 /* ************* statistic member functions ************* */
79 
80 void statistic::addSample(double sample) {
81   sample -= offset;
82   KMP_DEBUG_ASSERT(std::isfinite(sample));
83 
84   double delta = sample - meanVal;
85 
86   sampleCount = sampleCount + 1;
87   meanVal = meanVal + delta / sampleCount;
88   m2 = m2 + delta * (sample - meanVal);
89 
90   minVal = std::min(minVal, sample);
91   maxVal = std::max(maxVal, sample);
92   if (collectingHist)
93     hist.addSample(sample);
94 }
95 
96 statistic &statistic::operator+=(const statistic &other) {
97   if (other.sampleCount == 0)
98     return *this;
99 
100   if (sampleCount == 0) {
101     *this = other;
102     return *this;
103   }
104 
105   uint64_t newSampleCount = sampleCount + other.sampleCount;
106   double dnsc = double(newSampleCount);
107   double dsc = double(sampleCount);
108   double dscBydnsc = dsc / dnsc;
109   double dosc = double(other.sampleCount);
110   double delta = other.meanVal - meanVal;
111 
112   // Try to order these calculations to avoid overflows. If this were Fortran,
113   // then the compiler would not be able to re-order over brackets. In C++ it
114   // may be legal to do that (we certainly hope it doesn't, and CC+ Programming
115   // Language 2nd edition suggests it shouldn't, since it says that exploitation
116   // of associativity can only be made if the operation really is associative
117   // (which floating addition isn't...)).
118   meanVal = meanVal * dscBydnsc + other.meanVal * (1 - dscBydnsc);
119   m2 = m2 + other.m2 + dscBydnsc * dosc * delta * delta;
120   minVal = std::min(minVal, other.minVal);
121   maxVal = std::max(maxVal, other.maxVal);
122   sampleCount = newSampleCount;
123   if (collectingHist)
124     hist += other.hist;
125 
126   return *this;
127 }
128 
129 void statistic::scale(double factor) {
130   minVal = minVal * factor;
131   maxVal = maxVal * factor;
132   meanVal = meanVal * factor;
133   m2 = m2 * factor * factor;
134   return;
135 }
136 
137 std::string statistic::format(char unit, bool total) const {
138   std::string result = formatSI((double)sampleCount, 9, ' ');
139 
140   if (sampleCount == 0) {
141     result = result + std::string(", ") + formatSI(0.0, 9, unit);
142     result = result + std::string(", ") + formatSI(0.0, 9, unit);
143     result = result + std::string(", ") + formatSI(0.0, 9, unit);
144     if (total)
145       result = result + std::string(", ") + formatSI(0.0, 9, unit);
146     result = result + std::string(", ") + formatSI(0.0, 9, unit);
147   } else {
148     result = result + std::string(", ") + formatSI(minVal, 9, unit);
149     result = result + std::string(", ") + formatSI(meanVal, 9, unit);
150     result = result + std::string(", ") + formatSI(maxVal, 9, unit);
151     if (total)
152       result =
153           result + std::string(", ") + formatSI(meanVal * sampleCount, 9, unit);
154     result = result + std::string(", ") + formatSI(getSD(), 9, unit);
155   }
156   return result;
157 }
158 
159 /* ************* histogram member functions ************* */
160 
161 // Lowest bin that has anything in it
162 int logHistogram::minBin() const {
163   for (int i = 0; i < numBins; i++) {
164     if (bins[i].count != 0)
165       return i - logOffset;
166   }
167   return -logOffset;
168 }
169 
170 // Highest bin that has anything in it
171 int logHistogram::maxBin() const {
172   for (int i = numBins - 1; i >= 0; i--) {
173     if (bins[i].count != 0)
174       return i - logOffset;
175   }
176   return -logOffset;
177 }
178 
179 // Which bin does this sample belong in ?
180 uint32_t logHistogram::findBin(double sample) {
181   double v = std::fabs(sample);
182   // Simply loop up looking which bin to put it in.
183   // According to a micro-architect this is likely to be faster than a binary
184   // search, since
185   // it will only have one branch mis-predict
186   for (int b = 0; b < numBins - 1; b++)
187     if (binMax[b] > v)
188       return b;
189   return numBins - 1;
190 }
191 
192 void logHistogram::addSample(double sample) {
193   if (sample == 0.0) {
194     zeroCount += 1;
195 #ifdef KMP_DEBUG
196     _total++;
197     check();
198 #endif
199     return;
200   }
201   KMP_DEBUG_ASSERT(std::isfinite(sample));
202   uint32_t bin = findBin(sample);
203   KMP_DEBUG_ASSERT(0 <= bin && bin < numBins);
204 
205   bins[bin].count += 1;
206   bins[bin].total += sample;
207 #ifdef KMP_DEBUG
208   _total++;
209   check();
210 #endif
211 }
212 
213 // This may not be the format we want, but it'll do for now
214 std::string logHistogram::format(char unit) const {
215   std::stringstream result;
216 
217   result << "Bin,                Count,     Total\n";
218   if (zeroCount) {
219     result << "0,              " << formatSI(zeroCount, 9, ' ') << ", ",
220         formatSI(0.0, 9, unit);
221     if (count(minBin()) == 0)
222       return result.str();
223     result << "\n";
224   }
225   for (int i = minBin(); i <= maxBin(); i++) {
226     result << "10**" << i << "<=v<";
227     if (i + 1 == numBins - 1)
228       result << "infinity, ";
229     else
230       result << "10**" << (i + 1) << ", ";
231     result << formatSI(count(i), 9, ' ') << ", " << formatSI(total(i), 9, unit);
232     if (i != maxBin())
233       result << "\n";
234   }
235 
236   return result.str();
237 }
238 
239 /* ************* explicitTimer member functions ************* */
240 
241 void explicitTimer::start(tsc_tick_count tick) {
242   startTime = tick;
243   totalPauseTime = 0;
244   if (timeStat::logEvent(timerEnumValue)) {
245     __kmp_stats_thread_ptr->incrementNestValue();
246   }
247   return;
248 }
249 
250 void explicitTimer::stop(tsc_tick_count tick,
251                          kmp_stats_list *stats_ptr /* = nullptr */) {
252   if (startTime.getValue() == 0)
253     return;
254 
255   stat->addSample(((tick - startTime) - totalPauseTime).ticks());
256 
257   if (timeStat::logEvent(timerEnumValue)) {
258     if (!stats_ptr)
259       stats_ptr = __kmp_stats_thread_ptr;
260     stats_ptr->push_event(
261         startTime.getValue() - __kmp_stats_start_time.getValue(),
262         tick.getValue() - __kmp_stats_start_time.getValue(),
263         __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
264     stats_ptr->decrementNestValue();
265   }
266 
267   /* We accept the risk that we drop a sample because it really did start at
268      t==0. */
269   startTime = 0;
270   return;
271 }
272 
273 /* ************* partitionedTimers member functions ************* */
274 partitionedTimers::partitionedTimers() { timer_stack.reserve(8); }
275 
276 // initialize the partitioned timers to an initial timer
277 void partitionedTimers::init(explicitTimer timer) {
278   KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
279   timer_stack.push_back(timer);
280   timer_stack.back().start(tsc_tick_count::now());
281 }
282 
283 // stop/save the current timer, and start the new timer (timer_pair)
284 // There is a special condition where if the current timer is equal to
285 // the one you are trying to push, then it only manipulates the stack,
286 // and it won't stop/start the currently running timer.
287 void partitionedTimers::push(explicitTimer timer) {
288   // get the current timer
289   // pause current timer
290   // push new timer
291   // start the new timer
292   explicitTimer *current_timer, *new_timer;
293   size_t stack_size;
294   KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
295   timer_stack.push_back(timer);
296   stack_size = timer_stack.size();
297   current_timer = &(timer_stack[stack_size - 2]);
298   new_timer = &(timer_stack[stack_size - 1]);
299   tsc_tick_count tick = tsc_tick_count::now();
300   current_timer->pause(tick);
301   new_timer->start(tick);
302 }
303 
304 // stop/discard the current timer, and start the previously saved timer
305 void partitionedTimers::pop() {
306   // get the current timer
307   // stop current timer (record event/sample)
308   // pop current timer
309   // get the new current timer and resume
310   explicitTimer *old_timer, *new_timer;
311   size_t stack_size = timer_stack.size();
312   KMP_DEBUG_ASSERT(stack_size > 1);
313   old_timer = &(timer_stack[stack_size - 1]);
314   new_timer = &(timer_stack[stack_size - 2]);
315   tsc_tick_count tick = tsc_tick_count::now();
316   old_timer->stop(tick);
317   new_timer->resume(tick);
318   timer_stack.pop_back();
319 }
320 
321 void partitionedTimers::exchange(explicitTimer timer) {
322   // get the current timer
323   // stop current timer (record event/sample)
324   // push new timer
325   // start the new timer
326   explicitTimer *current_timer, *new_timer;
327   size_t stack_size;
328   KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
329   tsc_tick_count tick = tsc_tick_count::now();
330   stack_size = timer_stack.size();
331   current_timer = &(timer_stack[stack_size - 1]);
332   current_timer->stop(tick);
333   timer_stack.pop_back();
334   timer_stack.push_back(timer);
335   new_timer = &(timer_stack[stack_size - 1]);
336   new_timer->start(tick);
337 }
338 
339 // Wind up all the currently running timers.
340 // This pops off all the timers from the stack and clears the stack
341 // After this is called, init() must be run again to initialize the
342 // stack of timers
343 void partitionedTimers::windup() {
344   while (timer_stack.size() > 1) {
345     this->pop();
346   }
347   // Pop the timer from the init() call
348   if (timer_stack.size() > 0) {
349     timer_stack.back().stop(tsc_tick_count::now());
350     timer_stack.pop_back();
351   }
352 }
353 
354 /* ************* kmp_stats_event_vector member functions ************* */
355 
356 void kmp_stats_event_vector::deallocate() {
357   __kmp_free(events);
358   internal_size = 0;
359   allocated_size = 0;
360   events = NULL;
361 }
362 
363 // This function is for qsort() which requires the compare function to return
364 // either a negative number if event1 < event2, a positive number if event1 >
365 // event2 or zero if event1 == event2. This sorts by start time (lowest to
366 // highest).
367 int compare_two_events(const void *event1, const void *event2) {
368   const kmp_stats_event *ev1 = RCAST(const kmp_stats_event *, event1);
369   const kmp_stats_event *ev2 = RCAST(const kmp_stats_event *, event2);
370 
371   if (ev1->getStart() < ev2->getStart())
372     return -1;
373   else if (ev1->getStart() > ev2->getStart())
374     return 1;
375   else
376     return 0;
377 }
378 
379 void kmp_stats_event_vector::sort() {
380   qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events);
381 }
382 
383 /* ************* kmp_stats_list member functions ************* */
384 
385 // returns a pointer to newly created stats node
386 kmp_stats_list *kmp_stats_list::push_back(int gtid) {
387   kmp_stats_list *newnode =
388       (kmp_stats_list *)__kmp_allocate(sizeof(kmp_stats_list));
389   // placement new, only requires space and pointer and initializes (so
390   // __kmp_allocate instead of C++ new[] is used)
391   new (newnode) kmp_stats_list();
392   newnode->setGtid(gtid);
393   newnode->prev = this->prev;
394   newnode->next = this;
395   newnode->prev->next = newnode;
396   newnode->next->prev = newnode;
397   return newnode;
398 }
399 void kmp_stats_list::deallocate() {
400   kmp_stats_list *ptr = this->next;
401   kmp_stats_list *delptr = this->next;
402   while (ptr != this) {
403     delptr = ptr;
404     ptr = ptr->next;
405     // placement new means we have to explicitly call destructor.
406     delptr->_event_vector.deallocate();
407     delptr->~kmp_stats_list();
408     __kmp_free(delptr);
409   }
410 }
411 kmp_stats_list::iterator kmp_stats_list::begin() {
412   kmp_stats_list::iterator it;
413   it.ptr = this->next;
414   return it;
415 }
416 kmp_stats_list::iterator kmp_stats_list::end() {
417   kmp_stats_list::iterator it;
418   it.ptr = this;
419   return it;
420 }
421 int kmp_stats_list::size() {
422   int retval;
423   kmp_stats_list::iterator it;
424   for (retval = 0, it = begin(); it != end(); it++, retval++) {
425   }
426   return retval;
427 }
428 
429 /* ************* kmp_stats_list::iterator member functions ************* */
430 
431 kmp_stats_list::iterator::iterator() : ptr(NULL) {}
432 kmp_stats_list::iterator::~iterator() {}
433 kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
434   this->ptr = this->ptr->next;
435   return *this;
436 }
437 kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) {
438   this->ptr = this->ptr->next;
439   return *this;
440 }
441 kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
442   this->ptr = this->ptr->prev;
443   return *this;
444 }
445 kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) {
446   this->ptr = this->ptr->prev;
447   return *this;
448 }
449 bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator &rhs) {
450   return this->ptr != rhs.ptr;
451 }
452 bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator &rhs) {
453   return this->ptr == rhs.ptr;
454 }
455 kmp_stats_list *kmp_stats_list::iterator::operator*() const {
456   return this->ptr;
457 }
458 
459 /* *************  kmp_stats_output_module functions ************** */
460 
461 const char *kmp_stats_output_module::eventsFileName = NULL;
462 const char *kmp_stats_output_module::plotFileName = NULL;
463 int kmp_stats_output_module::printPerThreadFlag = 0;
464 int kmp_stats_output_module::printPerThreadEventsFlag = 0;
465 
466 static char const *lastName(char *name) {
467   int l = (int)strlen(name);
468   for (int i = l - 1; i >= 0; --i) {
469     if (name[i] == '.')
470       name[i] = '_';
471     if (name[i] == '/')
472       return name + i + 1;
473   }
474   return name;
475 }
476 
477 /* Read the name of the executable from /proc/self/cmdline */
478 static char const *getImageName(char *buffer, size_t buflen) {
479   FILE *f = fopen("/proc/self/cmdline", "r");
480   buffer[0] = char(0);
481   if (!f)
482     return buffer;
483 
484   // The file contains char(0) delimited words from the commandline.
485   // This just returns the last filename component of the first word on the
486   // line.
487   size_t n = fread(buffer, 1, buflen, f);
488   if (n == 0) {
489     fclose(f);
490     KMP_CHECK_SYSFAIL("fread", 1)
491   }
492   fclose(f);
493   buffer[buflen - 1] = char(0);
494   return lastName(buffer);
495 }
496 
497 static void getTime(char *buffer, size_t buflen, bool underscores = false) {
498   time_t timer;
499 
500   time(&timer);
501 
502   struct tm *tm_info = localtime(&timer);
503   if (underscores)
504     strftime(buffer, buflen, "%Y-%m-%d_%H%M%S", tm_info);
505   else
506     strftime(buffer, buflen, "%Y-%m-%d %H%M%S", tm_info);
507 }
508 
509 /* Generate a stats file name, expanding prototypes */
510 static std::string generateFilename(char const *prototype,
511                                     char const *imageName) {
512   std::string res;
513 
514   for (int i = 0; prototype[i] != char(0); i++) {
515     char ch = prototype[i];
516 
517     if (ch == '%') {
518       i++;
519       if (prototype[i] == char(0))
520         break;
521 
522       switch (prototype[i]) {
523       case 't': // Insert time and date
524       {
525         char date[26];
526         getTime(date, sizeof(date), true);
527         res += date;
528       } break;
529       case 'e': // Insert executable name
530         res += imageName;
531         break;
532       case 'p': // Insert pid
533       {
534         std::stringstream ss;
535         ss << getpid();
536         res += ss.str();
537       } break;
538       default:
539         res += prototype[i];
540         break;
541       }
542     } else
543       res += ch;
544   }
545   return res;
546 }
547 
548 // init() is called very near the beginning of execution time in the constructor
549 // of __kmp_stats_global_output
550 void kmp_stats_output_module::init() {
551 
552   char *statsFileName = getenv("KMP_STATS_FILE");
553   eventsFileName = getenv("KMP_STATS_EVENTS_FILE");
554   plotFileName = getenv("KMP_STATS_PLOT_FILE");
555   char *threadStats = getenv("KMP_STATS_THREADS");
556   char *threadEvents = getenv("KMP_STATS_EVENTS");
557 
558   // set the stats output filenames based on environment variables and defaults
559   if (statsFileName) {
560     char imageName[1024];
561     // Process any escapes (e.g., %p, %e, %t) in the name
562     outputFileName = generateFilename(
563         statsFileName, getImageName(&imageName[0], sizeof(imageName)));
564   }
565   eventsFileName = eventsFileName ? eventsFileName : "events.dat";
566   plotFileName = plotFileName ? plotFileName : "events.plt";
567 
568   // set the flags based on environment variables matching: true, on, 1, .true.
569   // , .t. , yes
570   printPerThreadFlag = __kmp_str_match_true(threadStats);
571   printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
572 
573   if (printPerThreadEventsFlag) {
574     // assigns a color to each timer for printing
575     setupEventColors();
576   } else {
577     // will clear flag so that no event will be logged
578     timeStat::clearEventFlags();
579   }
580 }
581 
582 void kmp_stats_output_module::setupEventColors() {
583   int i;
584   int globalColorIndex = 0;
585   int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color);
586   for (i = 0; i < TIMER_LAST; i++) {
587     if (timeStat::logEvent((timer_e)i)) {
588       timerColorInfo[i] = globalColorArray[globalColorIndex];
589       globalColorIndex = (globalColorIndex + 1) % numGlobalColors;
590     }
591   }
592 }
593 
594 void kmp_stats_output_module::printTimerStats(FILE *statsOut,
595                                               statistic const *theStats,
596                                               statistic const *totalStats) {
597   fprintf(statsOut,
598           "Timer,                             SampleCount,    Min,      "
599           "Mean,       Max,     Total,        SD\n");
600   for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
601     statistic const *stat = &theStats[s];
602     char tag = timeStat::noUnits(s) ? ' ' : 'T';
603 
604     fprintf(statsOut, "%-35s, %s\n", timeStat::name(s),
605             stat->format(tag, true).c_str());
606   }
607   // Also print the Total_ versions of times.
608   for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
609     char tag = timeStat::noUnits(s) ? ' ' : 'T';
610     if (totalStats && !timeStat::noTotal(s))
611       fprintf(statsOut, "Total_%-29s, %s\n", timeStat::name(s),
612               totalStats[s].format(tag, true).c_str());
613   }
614 
615   // Print histogram of statistics
616   if (theStats[0].haveHist()) {
617     fprintf(statsOut, "\nTimer distributions\n");
618     for (int s = 0; s < TIMER_LAST; s++) {
619       statistic const *stat = &theStats[s];
620 
621       if (stat->getCount() != 0) {
622         char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T';
623 
624         fprintf(statsOut, "%s\n", timeStat::name(timer_e(s)));
625         fprintf(statsOut, "%s\n", stat->getHist()->format(tag).c_str());
626       }
627     }
628   }
629 }
630 
631 void kmp_stats_output_module::printCounterStats(FILE *statsOut,
632                                                 statistic const *theStats) {
633   fprintf(statsOut, "Counter,                 ThreadCount,    Min,      Mean,  "
634                     "     Max,     Total,        SD\n");
635   for (int s = 0; s < COUNTER_LAST; s++) {
636     statistic const *stat = &theStats[s];
637     fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(s)),
638             stat->format(' ', true).c_str());
639   }
640   // Print histogram of counters
641   if (theStats[0].haveHist()) {
642     fprintf(statsOut, "\nCounter distributions\n");
643     for (int s = 0; s < COUNTER_LAST; s++) {
644       statistic const *stat = &theStats[s];
645 
646       if (stat->getCount() != 0) {
647         fprintf(statsOut, "%s\n", counter::name(counter_e(s)));
648         fprintf(statsOut, "%s\n", stat->getHist()->format(' ').c_str());
649       }
650     }
651   }
652 }
653 
654 void kmp_stats_output_module::printCounters(FILE *statsOut,
655                                             counter const *theCounters) {
656   // We print all the counters even if they are zero.
657   // That makes it easier to slice them into a spreadsheet if you need to.
658   fprintf(statsOut, "\nCounter,                    Count\n");
659   for (int c = 0; c < COUNTER_LAST; c++) {
660     counter const *stat = &theCounters[c];
661     fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(c)),
662             formatSI((double)stat->getValue(), 9, ' ').c_str());
663   }
664 }
665 
666 void kmp_stats_output_module::printEvents(FILE *eventsOut,
667                                           kmp_stats_event_vector *theEvents,
668                                           int gtid) {
669   // sort by start time before printing
670   theEvents->sort();
671   for (int i = 0; i < theEvents->size(); i++) {
672     kmp_stats_event ev = theEvents->at(i);
673     rgb_color color = getEventColor(ev.getTimerName());
674     fprintf(eventsOut, "%d %llu %llu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", gtid,
675             static_cast<unsigned long long>(ev.getStart()),
676             static_cast<unsigned long long>(ev.getStop()),
677             1.2 - (ev.getNestLevel() * 0.2), color.r, color.g, color.b,
678             timeStat::name(ev.getTimerName()));
679   }
680   return;
681 }
682 
683 void kmp_stats_output_module::windupExplicitTimers() {
684   // Wind up any explicit timers. We assume that it's fair at this point to just
685   // walk all the explicit timers in all threads and say "it's over".
686   // If the timer wasn't running, this won't record anything anyway.
687   kmp_stats_list::iterator it;
688   for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
689     kmp_stats_list *ptr = *it;
690     ptr->getPartitionedTimers()->windup();
691     ptr->endLife();
692   }
693 }
694 
695 void kmp_stats_output_module::printPloticusFile() {
696   int i;
697   int size = __kmp_stats_list->size();
698   kmp_safe_raii_file_t plotOut(plotFileName, "w+");
699   fprintf(plotOut, "#proc page\n"
700                    "   pagesize: 15 10\n"
701                    "   scale: 1.0\n\n");
702 
703   fprintf(plotOut,
704           "#proc getdata\n"
705           "   file: %s\n\n",
706           eventsFileName);
707 
708   fprintf(plotOut,
709           "#proc areadef\n"
710           "   title: OpenMP Sampling Timeline\n"
711           "   titledetails: align=center size=16\n"
712           "   rectangle: 1 1 13 9\n"
713           "   xautorange: datafield=2,3\n"
714           "   yautorange: -1 %d\n\n",
715           size);
716 
717   fprintf(plotOut, "#proc xaxis\n"
718                    "   stubs: inc\n"
719                    "   stubdetails: size=12\n"
720                    "   label: Time (ticks)\n"
721                    "   labeldetails: size=14\n\n");
722 
723   fprintf(plotOut,
724           "#proc yaxis\n"
725           "   stubs: inc 1\n"
726           "   stubrange: 0 %d\n"
727           "   stubdetails: size=12\n"
728           "   label: Thread #\n"
729           "   labeldetails: size=14\n\n",
730           size - 1);
731 
732   fprintf(plotOut, "#proc bars\n"
733                    "   exactcolorfield: 5\n"
734                    "   axis: x\n"
735                    "   locfield: 1\n"
736                    "   segmentfields: 2 3\n"
737                    "   barwidthfield: 4\n\n");
738 
739   // create legend entries corresponding to the timer color
740   for (i = 0; i < TIMER_LAST; i++) {
741     if (timeStat::logEvent((timer_e)i)) {
742       rgb_color c = getEventColor((timer_e)i);
743       fprintf(plotOut,
744               "#proc legendentry\n"
745               "   sampletype: color\n"
746               "   label: %s\n"
747               "   details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
748               timeStat::name((timer_e)i), c.r, c.g, c.b);
749     }
750   }
751 
752   fprintf(plotOut, "#proc legend\n"
753                    "   format: down\n"
754                    "   location: max max\n\n");
755   return;
756 }
757 
758 static void outputEnvVariable(FILE *statsOut, char const *name) {
759   char const *value = getenv(name);
760   fprintf(statsOut, "# %s = %s\n", name, value ? value : "*unspecified*");
761 }
762 
763 /* Print some useful information about
764    * the date and time this experiment ran.
765    * the machine on which it ran.
766    We output all of this as stylised comments, though we may decide to parse
767    some of it. */
768 void kmp_stats_output_module::printHeaderInfo(FILE *statsOut) {
769   std::time_t now = std::time(0);
770   char buffer[40];
771   char hostName[80];
772 
773   std::strftime(&buffer[0], sizeof(buffer), "%c", std::localtime(&now));
774   fprintf(statsOut, "# Time of run: %s\n", &buffer[0]);
775   if (gethostname(&hostName[0], sizeof(hostName)) == 0)
776     fprintf(statsOut, "# Hostname: %s\n", &hostName[0]);
777 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
778   fprintf(statsOut, "# CPU:  %s\n", &__kmp_cpuinfo.name[0]);
779   fprintf(statsOut, "# Family: %d, Model: %d, Stepping: %d\n",
780           __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping);
781   if (__kmp_cpuinfo.frequency == 0)
782     fprintf(statsOut, "# Nominal frequency: Unknown\n");
783   else
784     fprintf(statsOut, "# Nominal frequency: %sz\n",
785             formatSI(double(__kmp_cpuinfo.frequency), 9, 'H').c_str());
786   outputEnvVariable(statsOut, "KMP_HW_SUBSET");
787   outputEnvVariable(statsOut, "KMP_AFFINITY");
788   outputEnvVariable(statsOut, "KMP_BLOCKTIME");
789   outputEnvVariable(statsOut, "KMP_LIBRARY");
790   fprintf(statsOut, "# Production runtime built " __DATE__ " " __TIME__ "\n");
791 #endif
792 }
793 
794 void kmp_stats_output_module::outputStats(const char *heading) {
795   // Stop all the explicit timers in all threads
796   // Do this before declaring the local statistics because thay have
797   // constructors so will take time to create.
798   windupExplicitTimers();
799 
800   statistic allStats[TIMER_LAST];
801   statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of
802                                        normal timer stats */
803   statistic allCounters[COUNTER_LAST];
804 
805   kmp_safe_raii_file_t statsOut;
806   if (!outputFileName.empty()) {
807     statsOut.open(outputFileName.c_str(), "a+");
808   } else {
809     statsOut.set_stderr();
810   }
811 
812   kmp_safe_raii_file_t eventsOut;
813   if (eventPrintingEnabled()) {
814     eventsOut.open(eventsFileName, "w+");
815   }
816 
817   printHeaderInfo(statsOut);
818   fprintf(statsOut, "%s\n", heading);
819   // Accumulate across threads.
820   kmp_stats_list::iterator it;
821   for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
822     int t = (*it)->getGtid();
823     // Output per thread stats if requested.
824     if (printPerThreadFlag) {
825       fprintf(statsOut, "Thread %d\n", t);
826       printTimerStats(statsOut, (*it)->getTimers(), 0);
827       printCounters(statsOut, (*it)->getCounters());
828       fprintf(statsOut, "\n");
829     }
830     // Output per thread events if requested.
831     if (eventPrintingEnabled()) {
832       kmp_stats_event_vector events = (*it)->getEventVector();
833       printEvents(eventsOut, &events, t);
834     }
835 
836     // Accumulate timers.
837     for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
838       // See if we should ignore this timer when aggregating
839       if ((timeStat::masterOnly(s) && (t != 0)) || // Timer only valid on
840           // primary thread and this thread is worker
841           (timeStat::workerOnly(s) && (t == 0)) // Timer only valid on worker
842           // and this thread is the primary thread
843           ) {
844         continue;
845       }
846 
847       statistic *threadStat = (*it)->getTimer(s);
848       allStats[s] += *threadStat;
849 
850       // Add Total stats for timers that are valid in more than one thread
851       if (!timeStat::noTotal(s))
852         totalStats[s].addSample(threadStat->getTotal());
853     }
854 
855     // Accumulate counters.
856     for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) {
857       if (counter::masterOnly(c) && t != 0)
858         continue;
859       allCounters[c].addSample((double)(*it)->getCounter(c)->getValue());
860     }
861   }
862 
863   if (eventPrintingEnabled()) {
864     printPloticusFile();
865   }
866 
867   fprintf(statsOut, "Aggregate for all threads\n");
868   printTimerStats(statsOut, &allStats[0], &totalStats[0]);
869   fprintf(statsOut, "\n");
870   printCounterStats(statsOut, &allCounters[0]);
871 }
872 
873 /* *************  exported C functions ************** */
874 
875 // no name mangling for these functions, we want the c files to be able to get
876 // at these functions
877 extern "C" {
878 
879 void __kmp_reset_stats() {
880   kmp_stats_list::iterator it;
881   for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
882     timeStat *timers = (*it)->getTimers();
883     counter *counters = (*it)->getCounters();
884 
885     for (int t = 0; t < TIMER_LAST; t++)
886       timers[t].reset();
887 
888     for (int c = 0; c < COUNTER_LAST; c++)
889       counters[c].reset();
890 
891     // reset the event vector so all previous events are "erased"
892     (*it)->resetEventVector();
893   }
894 }
895 
896 // This function will reset all stats and stop all threads' explicit timers if
897 // they haven't been stopped already.
898 void __kmp_output_stats(const char *heading) {
899   __kmp_stats_global_output->outputStats(heading);
900   __kmp_reset_stats();
901 }
902 
903 void __kmp_accumulate_stats_at_exit(void) {
904   // Only do this once.
905   if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
906     return;
907 
908   __kmp_output_stats("Statistics on exit");
909 }
910 
911 void __kmp_stats_init(void) {
912   __kmp_init_tas_lock(&__kmp_stats_lock);
913   __kmp_stats_start_time = tsc_tick_count::now();
914   __kmp_stats_global_output = new kmp_stats_output_module();
915   __kmp_stats_list = new kmp_stats_list();
916 }
917 
918 void __kmp_stats_fini(void) {
919   __kmp_accumulate_stats_at_exit();
920   __kmp_stats_list->deallocate();
921   delete __kmp_stats_global_output;
922   delete __kmp_stats_list;
923 }
924 
925 } // extern "C"
926