1 // @HEADER
2 // ***********************************************************************
3 //
4 //                    Teuchos: Common Tools Package
5 //                 Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ***********************************************************************
38 // @HEADER
39 
40 #ifndef TEUCHOS_STACKED_TIMER_HPP
41 #define TEUCHOS_STACKED_TIMER_HPP
42 
43 #include "Teuchos_ConfigDefs.hpp"
44 #include "Teuchos_Comm.hpp"
45 #include "Teuchos_DefaultComm.hpp"
46 #include "Teuchos_CommHelpers.hpp"
47 #include "Teuchos_RCP.hpp"
48 #include "Teuchos_Array.hpp"
49 #include "Teuchos_PerformanceMonitorBase.hpp"
50 #include <string>
51 #include <vector>
52 #include <cassert>
53 #include <chrono>
54 #include <climits>
55 #include <cstdlib> // for std::getenv and atoi
56 #include <ctime> // for timestamp support
57 #include <iostream>
58 
59 #if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOSCORE)
60 namespace Kokkos {
61 namespace Profiling {
62 extern void pushRegion (const std::string&);
63 extern void popRegion ();
64 } // namespace Profiling
65 } // namespace Kokkos
66 #endif
67 
68 
69 namespace Teuchos {
70 
71 //! Error reporting function for stacked timer.
72 void error_out(const std::string& msg, const bool fail_all = false);
73 
74 /**
75  * \brief the basic timer used elsewhere, uses MPI_Wtime for time
76  *
77  * This class hold  a time and number of times this timer is called, and a
78  * count of how many "updates" this timer has services.  Example if you have a
79  * mesh of 100 elements your count might be 7, but total iterations would be 700
80  * this number is useful for dynamic systems where the total number of items services
81  * might change in time
82  */
83 class BaseTimer {
84 
85 public:
86 
87   using Clock = std::chrono::high_resolution_clock;
88 
BaseTimer()89   BaseTimer() : accumulation_(0.0), count_started_(0), count_updates_(0), running_(false) {}
90 
91   /// Start a currently stopped timer
start()92   void start(){
93     if (running_)
94       error_out("Base_Timer:start Failed timer already running");
95     start_time_ = Clock::now();
96 
97     count_started_++;
98     running_ = true;
99   }
100 
101   /// Stop a current running timer and accumulate time difference
stop()102   void stop(){
103     if (!running_)
104       error_out("Base_Timer:stop Failed timer not running");
105     accumulation_ += std::chrono::duration_cast<std::chrono::duration<double>>(Clock::now() - start_time_).count();
106     running_ = false;
107   }
108 
109   /// Increment the total number of items updated between a start stop
incrementUpdates(unsigned long long count=1)110   unsigned long long incrementUpdates(unsigned long long count=1) {count_updates_ += count; return count_updates_;}
111 
112   /// Get the total accumulated time since last reset or construction when the timer is running
accumulatedTime() const113   double accumulatedTime() const {return accumulation_;}
114 
115   /// Setter for accumulated time
setAccumulatedTime(double accum=0)116   void setAccumulatedTime(double accum=0)  {accumulation_=accum;}
117 
118   /**
119    * \brief return the average time per item updated
120    *
121    * This returns the time on average that the code spends updating an
122    * iteration.  If it is running than it will not include the current time.  It
123    * differs from accumulatedTimePerTimerCall in that it is meant to be timer per
124    * event other that start/stop, like mesh update
125    * @return average time per iteration pair
accumulatedTimePerUpdate() const126    */  double accumulatedTimePerUpdate() const {
127     if (count_updates_ > 0) {
128       return accumulation_/count_updates_;
129     } else {
130       return 0;
131     }
132   }
133 
134 
135   /**
136    * \brief return the average time per timer start/stop
137    *
138    * This returns the time on average that the code spends between
139    * a call to start and stop.  If it is running than it will not include the current time
140    * @return average time per start/stop pair
141    */
accumulatedTimePerTimerCall() const142   double accumulatedTimePerTimerCall() const {
143     if (count_started_> 0) {
144       return accumulation_/count_started_;
145     } else {
146       return 0;
147     }
148   }
149 
150   /**
151    * \brief  Return the difference between two timers in seconds,
152    * @param [in] from reference time you are computing difference from
153    * @return my time - from times
154    */
155 
difference(const BaseTimer & from) const156   double difference(const BaseTimer &from) const {
157     return accumulation_ - from.accumulation_;
158   }
159 
160   /// Reset all the timer stats, throws if it is already running
reset()161   void reset() {
162     if (running_)
163        error_out("BaseTimer, cannot reset a running timer");
164     accumulation_ = 0.0;
165     count_started_ = count_updates_ = 0;
166   }
167 
168   /// Returns true if the timer is currently accumulating time.
running() const169   bool running() const { return running_;}
170 
171   /// Returns the number of calls to start().
numCalls() const172   unsigned long numCalls() const { return count_started_; }
173 
174   /// Returns the number of updates added to this timer.
numUpdates() const175   unsigned long long numUpdates() const { return count_updates_; }
176 
177   /// Sets the number of calls to start() for this timer. This is only used for unit testing.
overrideNumCallsForUnitTesting(const unsigned long num_calls)178   void overrideNumCallsForUnitTesting(const unsigned long num_calls)
179   { count_started_ = num_calls; }
180 
181   /// Sets the number of counts for this timer. This is only used for unit testing.
overrideNumUpdatesForUnitTesting(const unsigned long long num_updates)182   void overrideNumUpdatesForUnitTesting(const unsigned long long num_updates)
183   { count_updates_ = num_updates; }
184 
185   struct TimeInfo {
TimeInfoTeuchos::BaseTimer::TimeInfo186     TimeInfo():time(0.0), count(0), updates(0), running(false){}
TimeInfoTeuchos::BaseTimer::TimeInfo187     TimeInfo(BaseTimer* t): time(t->accumulation_), count(t->count_started_), updates(t->count_updates_), running(t->running()) {}
188     double time;
189     unsigned long count;
190     unsigned long long updates;
191     bool running;
192   };
193 
194 protected:
195   double accumulation_;       // total time
196   unsigned long count_started_; // Number of times this timer has been started
197   unsigned long long count_updates_; // Total count of items updated during this timer
198   Clock::time_point start_time_;
199   bool running_;
200 
201   friend struct TimeInfo;
202 };
203 
204 /**
205  *\brief This class allows one to push and pop timers on and off a stack.
206  *
207  * To use this class one would do something like this
208  *
209  * StackedTimer timer(); // construct top level timer
210  * timer.start("some comp");
211  * do_work();
212  * timer.start("sub comp");
213  * more_work();
214  * timer.stop(); // stopping sub comp
215  * timer.stop(); // stopping comp
216  *
217  * timer.stop(); // stopping all timer stuff
218  * timer.report(std::cout); // dump to screen
219  *
220  */
221 class StackedTimer
222 {
223 protected:
224 
225   /**
226    * \brief Timer info at a given level and all the children
227    *
228    * This holds the timer info for the leaf node of a timer stack.  It has
229    * both its timer info and all the sub leaves bellow it.  You can start and
230    * stop sub timers, get sub info and dump out things to a ostream
231    *
232    */
233   class LevelTimer : public BaseTimer {
234   protected:
235 
236     // TODO: implement operator=
237 
238     unsigned level_;
239     std::string name_;
240     LevelTimer *parent_;
241     std::vector<LevelTimer> sub_timers_;
242   public:
243     /// Default constructor, shouldn't be used but needed for std::vector
244     LevelTimer();
245 
246     /**
247      * Standard constructor
248      * @param [in] level Integer level of this timer
249      * @param [in] name Name for this timer
250      * @param [in] parent Parent of this timer
251      * @param [in] start bool to start the timer on construction
252      */
LevelTimer(int level,const char * name="RootTimer",LevelTimer * parent=nullptr,bool start_timer=true)253     LevelTimer(int level,
254         const char* name = "RootTimer",
255         LevelTimer *parent=nullptr,
256         bool start_timer=true) :
257           BaseTimer(),
258           level_(level),
259           name_(name),
260           parent_(parent)
261     {
262       if ( start_timer )
263         BaseTimer::start();
264 
265     }
266 
267     /// Copy constructor
LevelTimer(const LevelTimer & src)268     LevelTimer(const LevelTimer &src) :
269       BaseTimer(src), level_(src.level_), name_(src.name_),parent_(src.parent_), sub_timers_(src.sub_timers_)
270     {
271       for (unsigned i=0;i<sub_timers_.size();++i)
272         sub_timers_[i].parent_ = this;
273     }
274 
275     /**
276      * Start a sub timer of a given name, create if doesn't exist
277      * @param [in] sub_name Name of subtimer
278      * @return Pointer to the sub timer which was started
279      */
start(const char * sub_name)280     LevelTimer* start(const char* sub_name) {
281       for (unsigned i=0;i<sub_timers_.size();i++ )
282         if (sub_name == sub_timers_[i].name_ ) {
283           sub_timers_[i].BaseTimer::start();
284           return &sub_timers_[i];
285         }
286       sub_timers_.push_back(LevelTimer(level_+1,sub_name,this,true));
287       return &sub_timers_[sub_timers_.size()-1];
288     }
289 
290     /**
291      * Stop the current running timer.  The timer name is required to verify that you are
292      * stopping the current running timer and will error out if the names don't match
293      *
294      * @param [in] name the name of the timer you want to stop, used for matching start only
295      * @return parent level timer
296      */
stop(const std::string & name="RootTimer")297     LevelTimer* stop(const std::string &name = "RootTimer") {
298       if (name != name_)
299         error_out("Stopping timer "+name+" But top level running timer is "+name_);
300       BaseTimer::stop();
301       return parent_;
302     }
303 
304 
305     /**
306      * Return the full name of the timer with each level split by :
307      * @return The full name of the timer
308      */
get_full_name() const309     std::string get_full_name() const {
310       std::string parent_name("");
311       if ((parent_ != nullptr))
312         parent_name = parent_->get_full_name() + "@";
313 
314       std::string my_name(name_);
315 
316       std::string full_name = parent_name + my_name;
317       return full_name;
318     }
319 
320     /**
321      * Return the number of timers on this level
322      * @return the number of timers and sub timers
323      */
324 
countTimers()325     int countTimers() {
326       int count=1;
327       for (unsigned i=0;i<sub_timers_.size(); ++i)
328         count += sub_timers_[i].countTimers();
329       return count;
330     }
331 
addTimerNames(Array<std::string> & names,unsigned & pos)332     void addTimerNames(Array<std::string> &names, unsigned &pos) {
333       names[pos++] = get_full_name();
334       for (unsigned i=0;i<sub_timers_.size(); ++i)
335         sub_timers_[i].addTimerNames(names, pos);
336     }
337 
338     /**
339      * Return the time spent at a given level
340      * @param [in] locate_name name of subtimer, if blank return current level time
341      * @return time in seconds at provided level
342      */
accumulatedTime(const std::string & locate_name="")343     double accumulatedTime(const std::string &locate_name="") {
344 
345       if (locate_name == "")
346         return BaseTimer::accumulatedTime();
347 
348       std::string first_name,second_name;
349 
350       size_t i = locate_name.find_first_of('@');
351       if ( i >= locate_name.size() ) {
352         first_name = locate_name;
353         second_name = "";
354       } else {
355         first_name.assign(locate_name,0,i);
356         second_name.assign(locate_name,i+1,locate_name.size()-i-1);
357       }
358       for (unsigned j=0;j<sub_timers_.size();++j)
359         if ( first_name == sub_timers_[j].name_)
360           return sub_timers_[j].accumulatedTime(second_name);
361       return 0;
362     }
363 
364     /// Returns the level of the timer in the stack
level() const365     unsigned level() const
366     {return level_;}
367 
368   protected:
369     /**
370      * \brief split a string into two parts split by a '@' if no '@' first gets the full string
371      * @param [in] locate_name input string to split
372      * @param [out] first_name Part of string before the first '@'
373      * @param [out] second_name part of string after the first '@'
374      */
splitString(const std::string & locate_name,std::string & first_name,std::string & second_name)375     void splitString(const std::string &locate_name, std::string &first_name, std::string &second_name) {
376       size_t i = locate_name.find_first_of('@');
377       if ( i >= locate_name.size() ) {
378         first_name = locate_name;
379         second_name = "";
380       } else {
381         first_name.assign(locate_name,0,i);
382         second_name.assign(locate_name,i+1,locate_name.size()-i-1);
383       }
384     }
385 
386   public:
387     /**
388      * Return the time spent per update at a given level
389      * @param [in] locate_name name of subtimer, if blank return current level time
390      * @return time in seconds per update at provided level
391      */
accumulatedTimePerUpdate(const std::string & locate_name="")392     double accumulatedTimePerUpdate(const std::string &locate_name="") {
393 
394        if (locate_name == "")
395          return BaseTimer::accumulatedTimePerUpdate();
396 
397        std::string first_name,second_name;
398        splitString(locate_name, first_name, second_name);
399 
400        for (unsigned j=0;j<sub_timers_.size();j++)
401          if ( first_name == sub_timers_[j].name_)
402            return sub_timers_[j].accumulatedTimePerUpdate(second_name);
403        return 0;
404      }
405 
406      /**
407       * Return the time spent per timer start/stop pair at a given level
408       * @param [in] locate_name name of subtimer, if blank return current level time
409       * @return time in seconds per timer start/stop pair at provided level
410       */
accumulatedTimePerTimerCall(const std::string & locate_name="")411      double accumulatedTimePerTimerCall(const std::string &locate_name="") {
412 
413        if (locate_name == "")
414          return BaseTimer::accumulatedTimePerTimerCall();
415 
416        std::string first_name,second_name;
417        splitString(locate_name, first_name, second_name);
418 
419        for (unsigned j=0;j<sub_timers_.size();j++)
420          if ( first_name == sub_timers_[j].name_)
421            return sub_timers_[j].accumulatedTimePerTimerCall(second_name);
422        return 0;
423      }
424 
425      /**
426       * Pack up all the levels into a buffer and mpi send to rank 0
427       */
428      void pack();
429 
430      /**
431       * Unpack the level timer stack from a mpi recv
432       * @param [in] from rank you are sending from
433       * @return pointer to level timer unpacked
434       */
435      LevelTimer* unpack(unsigned from);
436 
437      /**
438       * Dump the timer stats in a pretty format to ostream
439       * @param [in,out] os  Where are you dumping the stats, stdout??
440       */
441      void report(std::ostream &os);
442 
443     /**
444      * Return pointer to the BaseTimer corresponding to a given string
445      * @param name input string to search for
446      * @return pointer to BaseTimer (nullptr if none found)
447      */
448     const BaseTimer* findBaseTimer(const std::string &name) const;
449 
450      /**
451       * Return the time info for a given string
452       * @param name input string to search for
453       * @param set to true on exit if timer was found
454       * @return Time data
455       */
456     BaseTimer::TimeInfo findTimer(const std::string &name,bool& found);
457 
458   protected:
459 
460 
461   }; // LevelTimer
462 
463 
464 
465 
466 public:
467    /**
468     * Construct a stacked timer
469     * @param [in] name Top level name of the timer
470     * @param [in] start_top_timer Automatically start the top level timer. If set to false, the user will have to start it manually.
471     */
StackedTimer(const char * name,const bool start_base_timer=true)472   explicit StackedTimer(const char *name, const bool start_base_timer = true)
473     : timer_(0,name,nullptr,false),
474       enable_verbose_(false),
475       verbose_timestamp_levels_(0), // 0 disables
476       verbose_ostream_(Teuchos::rcpFromRef(std::cout))
477   {
478     top_ = &timer_;
479     if (start_base_timer)
480       this->startBaseTimer();
481 
482     auto check_verbose = std::getenv("TEUCHOS_ENABLE_VERBOSE_TIMERS");
483     if (check_verbose != nullptr)
484       enable_verbose_ = true;
485 
486     auto check_timestamp = std::getenv("TEUCHOS_ENABLE_VERBOSE_TIMESTAMP_LEVELS");
487     if (check_timestamp != nullptr) {
488       verbose_timestamp_levels_ = std::atoi(check_timestamp);
489     }
490   }
491 
492   /**
493    * Start the base level timer only
494    */
startBaseTimer()495   void startBaseTimer() {
496     timer_.BaseTimer::start();
497 #if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOSCORE)
498     ::Kokkos::Profiling::pushRegion(timer_.get_full_name());
499 #endif
500   }
501 
502   /**
503    * Stop the base level timer only
504    */
stopBaseTimer()505   void stopBaseTimer() {
506     timer_.BaseTimer::stop();
507 #if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOSCORE)
508     ::Kokkos::Profiling::popRegion();
509 #endif
510   }
511 
512   /**
513    * Start a sublevel timer
514    * @param [in] name Name of the timer you wish to start
515    * @param [in] push_kokkos_profiling_region Optional parameter that if set to true, will pushRegion() in kokkos profiling for this timer. The TimeMonitor will always set this to false since it does its own pushRegion() in the Timer object (this prevents double registering with kokkos).
516    */
start(const std::string name,const bool push_kokkos_profiling_region=true)517   void start(const std::string name,
518              const bool push_kokkos_profiling_region = true) {
519     if (top_ == nullptr)
520       top_ = timer_.start(name.c_str());
521     else
522       top_ = top_->start(name.c_str());
523 #if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOSCORE)
524     if (push_kokkos_profiling_region) {
525       ::Kokkos::Profiling::pushRegion(name);
526     }
527 #endif
528     if (enable_verbose_) {
529       if (!verbose_timestamp_levels_) {
530         *verbose_ostream_ << "STARTING: " << name << std::endl;
531       }
532       // gcc 4.X is incomplete in c++11 standard - missing
533       // std::put_time. We'll disable this feature for gcc 4.
534 #if !defined(__GNUC__) || ( defined(__GNUC__) && (__GNUC__ > 4) )
535       else if (top_ != nullptr) {
536         if ( top_->level() <= verbose_timestamp_levels_) {
537           auto now = std::chrono::system_clock::now();
538           auto now_time = std::chrono::system_clock::to_time_t(now);
539           auto gmt = gmtime(&now_time);
540           auto timestamp = std::put_time(gmt, "%Y-%m-%d %H:%M:%S");
541           auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()) % 1000;
542           *verbose_ostream_ << "STARTING: " << name << " LEVEL: " << top_->level() << " COUNT: " << timer_.numCalls() << " TIMESTAMP: " << timestamp << "." << ms.count() << std::endl;
543         }
544       }
545 #endif
546     }
547   }
548 
549   /**
550    * Stop the current top running timer, should be called for the root timer prior to final output
551    * @param [in] name Name of the timer you wish to stop
552    * @param [in] pop_kokkos_profiling_region Optional parameter that if set to true, will popRegion() in kokkos profiling for this timer. The TimeMonitor will always set this to false since it does its own pushRegion() in the Timer object (this prevents double registering with kokkos).
553    */
stop(const std::string & name,const bool pop_kokkos_profiling_region=true)554   void stop(const std::string &name,
555             const bool pop_kokkos_profiling_region = true) {
556     if (top_)
557       top_ = top_->stop(name);
558     else
559       timer_.BaseTimer::stop();
560 #if defined(HAVE_TEUCHOS_KOKKOS_PROFILING) && defined(HAVE_TEUCHOSCORE_KOKKOSCORE)
561     if (pop_kokkos_profiling_region) {
562       ::Kokkos::Profiling::popRegion();
563     }
564 #endif
565     if (enable_verbose_) {
566       if (!verbose_timestamp_levels_) {
567         *verbose_ostream_ << "STOPPING: " << name << std::endl;
568       }
569       // gcc 4.X is incomplete in c++11 standard - missing
570       // std::put_time. We'll disable this feature for gcc 4.
571 #if !defined(__GNUC__) || ( defined(__GNUC__) && (__GNUC__ > 4) )
572       // The stop adjusts the top level, need to adjust by +1 for printing
573       else if (top_ != nullptr) {
574         if ( (top_->level()+1) <= verbose_timestamp_levels_) {
575           auto now = std::chrono::system_clock::now();
576           auto now_time = std::chrono::system_clock::to_time_t(now);
577           auto gmt = gmtime(&now_time);
578           auto timestamp = std::put_time(gmt, "%Y-%m-%d %H:%M:%S");
579           auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()) % 1000;
580           *verbose_ostream_ << "STOPPING: " << name << " LEVEL: " << top_->level()+1 << " COUNT: " << timer_.numCalls() << " TIMESTAMP: " << timestamp << "." << ms.count() << std::endl;
581         }
582       }
583 #endif
584     }
585   }
586 
587   /**
588    * Increment the iteration count for the running timer
589    *   @param [in] i amount to increment the count
590    */
incrementUpdates(const long long i=1)591   void incrementUpdates(const long long i = 1) {
592     top_->incrementUpdates(i);
593   }
594 
595   /**
596    * Return the accumulated time the named timer has, if name is blank top level timer
597    * @param [in] name Name of the timer to output
598    * @return amount of time in seconds
599    */
accumulatedTime(const std::string & name="")600   double accumulatedTime(const std::string &name="") {
601     if (top_) // Top is null for the head node when nothing is running
602       return top_->accumulatedTime(name);
603     else
604       return timer_.accumulatedTime(name);
605   }
606 
607   /**
608    * Return the time spent per update at a given level
609    * @param [in] name name of subtimer, if blank return current level time
610    * @return time in seconds per update at provided level
611    */
accumulatedTimePerUpdate(const std::string & name="")612   double accumulatedTimePerUpdate(const std::string &name="") {
613      if (top_) // Top is null for the head node when nothing is running
614        return top_->accumulatedTimePerUpdate(name);
615      else
616        return timer_.accumulatedTimePerUpdate(name);
617    }
618   /**
619    * Return the time spent per timer start/stop at a given level
620    * @param [in] name name of subtimer, if blank return current level time
621    * @return time in seconds per start/stop call at provided level
622    */
accumulatedTimePerTimerCall(const std::string & name="")623   double accumulatedTimePerTimerCall(const std::string &name="") {
624      if (top_) // Top is null for the head node when nothing is running
625        return top_->accumulatedTimePerTimerCall(name);
626      else
627        return timer_.accumulatedTimePerTimerCall(name);
628    }
629 
630   /**
631    * Return pointer to the BaseTimer corresponding to a given string (full string name)
632    * @param name input string to search for
633    * @return BaseTimer
634    */
findBaseTimer(const std::string & name) const635   const BaseTimer* findBaseTimer(const std::string &name) const {
636     const BaseTimer* baseTimer = timer_.findBaseTimer(name);
637     TEUCHOS_TEST_FOR_EXCEPTION(baseTimer == nullptr, std::runtime_error,
638                                "StackedTimer::findBaseTimer() failed to find a timer named \"" << name << "\"!\n");
639     return baseTimer;
640   }
641 
642   /**
643    * Return the time info for a given string (full string name)
644    * @param name input string to search for
645    * @return Time data
646    */
findTimer(const std::string & name)647   BaseTimer::TimeInfo findTimer(const std::string &name) {
648     bool foundTimer = false;
649     const auto timeInfo = timer_.findTimer(name,foundTimer);
650     TEUCHOS_TEST_FOR_EXCEPTION(!foundTimer, std::runtime_error,
651       "StackedTimer::findTimer() failed to find a timer named \"" << name << "\"!\n");
652     return timeInfo;
653   }
654 
report(std::ostream & os)655   void report(std::ostream &os) {
656     timer_.report(os);
657   }
658 
659   /** Struct for controlling output options like histograms
660 
661       @param output_fraction Print the timer fractions within a level.
662       @param output_total_updates Print the updates counter.
663       @param output_historgram Print the histogram.
664       @param output_minmax Print the min max and standard deviation across MPI processes.
665       @param num_histogram The number of equally size bickets to use in the histogram.
666       @param max_level The number of levels in the stacked timer to print (default prints all levels).
667       @param print_warnings Print any relevant warnings on stacked timer use.
668       @param align_columns Output will align the columsn of stacked timer data.
669       @param print_names_before_values If set to true, writes the timer names before values.
670       @param drop_time If a timer has a total time less that this value, the timer will not be printed and the total time of that timer will be added to the Remainder. Useful for ignoring negligible timers. Default is -1.0 to force printing of all timers even if they have zero accumulated time.
671    */
672   struct OutputOptions {
OutputOptionsTeuchos::StackedTimer::OutputOptions673     OutputOptions() : output_fraction(false), output_total_updates(false), output_histogram(false),
674                       output_minmax(false), output_proc_minmax(false), num_histogram(10), max_levels(INT_MAX),
675                       print_warnings(true), align_columns(false), print_names_before_values(true),
676                       drop_time(-1.0) {}
677     bool output_fraction;
678     bool output_total_updates;
679     bool output_histogram;
680     bool output_minmax;
681     bool output_proc_minmax;
682     int num_histogram;
683     int max_levels;
684     bool print_warnings;
685     bool align_columns;
686     bool print_names_before_values;
687     double drop_time;
688   };
689 
690   /**
691    * Dump all the data from all the MPI ranks to an ostream
692    * @param [in,out] os - Output stream
693    * @param [in] comm - Teuchos comm pointer
694    */
695   void report(std::ostream &os, Teuchos::RCP<const Teuchos::Comm<int> > comm, OutputOptions options = OutputOptions());
696 
697   /**
698    * Dump all the data from all the MPI ranks to the given output stream, in Watchr XML format.
699    *  \c reportWatchrXML() is a wrapper for this function that creates \c os as a \c std::ofstream.
700    * @param [in,out] os - stream where XML will be written
701    * @param [in] datestamp - UTC datestamp in the format YYYY_MM_DD
702    * @param [in] timestamp - UTC timestamp in ISO 8601 format: YYYY-MM-DDTHH:MM:SS (HH in 24-hour time, and T is just the character 'T')
703    * @param [in] comm - Teuchos comm pointer
704    * @return The complete filename, or empty string if no output was produced.
705    */
706   void reportXML(std::ostream &os, const std::string& datestamp, const std::string& timestamp, Teuchos::RCP<const Teuchos::Comm<int> > comm);
707 
708   /**
709    * Dump all the data from all the MPI ranks to an XML file. This function
710    * calls reportXML and is intended to be used directly by performance tests.
711    *
712    * In the XML report, the top-level timer name (representing the total) will be replaced with
713    * \c name (the name of the test). This becomes the top-level chart name in Watchr.
714    *
715    * The output filename is controlled by two environment variables: \c $WATCHR_PERF_DIR (required for output)
716    * and \c $WATCHR_BUILD_NAME (optional). \c $WATCHR_PERF_DIR is the directory where the output file
717    * will be created. If it is not set or is empty, no output will be produced and the function returns an empty
718    * string on all ranks.
719    *
720    * If \c $WATCHR_BUILD_NAME is set, the filename is
721    * \c $WATCHR_BUILD_NAME-name_$DATESTAMP.xml. Additionally, the build name is prepended
722    * (verbatim) to the the top-level chart name,
723    * so that Watchr knows it is a different data series than runs of the same test from other builds.
724    *
725    * If \c $WATCHR_BUILD_NAME is not set or is empty, the filename is just \c name_$DATESTAMP.xml .
726    * DATESTAMP is calculated from the current UTC time, in the format YYYY_MM_DD.
727    *
728    * In the filename, all spaces in will be replaced by underscores.
729    *
730    * @param [in] name - Name of performance test
731    * @param [in] comm - Teuchos comm pointer
732    * @return If on rank 0 and output was produced, the complete output filename. Otherwise the empty string.
733    */
734   std::string reportWatchrXML(const std::string& name, Teuchos::RCP<const Teuchos::Comm<int> > comm);
735 
736   // If set to true, print timer start/stop to verbose ostream.
737   void enableVerbose(const bool enable_verbose);
738 
739   // Enable timestamps in verbose mode for the number of levels specified.
740   void enableVerboseTimestamps(const unsigned levels);
741 
742   // Set the ostream for verbose mode(defaults to std::cout).
743   void setVerboseOstream(const Teuchos::RCP<std::ostream>& os);
744 
745 protected:
746   /// Current level running
747   LevelTimer *top_;
748   /// Base timer
749   LevelTimer timer_;
750 
751   Array<std::string> flat_names_;
752   Array<double> min_;
753   Array<double> max_;
754   Array<int> procmin_;
755   Array<int> procmax_;
756   Array<double> sum_;
757   Array<double> sum_sq_;
758   Array<Array<int>> hist_;
759   Array<unsigned long> count_;
760   Array<unsigned long long> updates_;
761   Array<int> active_;
762 
763   /// Stores the column widths for output alignment
764   struct AlignmentWidths {
765     std::string::size_type timer_names_;
766     std::string::size_type average_time_;
767     std::string::size_type fraction_;
768     std::string::size_type count_;
769     std::string::size_type total_updates_;
770     std::string::size_type min_;
771     std::string::size_type max_;
772     std::string::size_type procmin_;
773     std::string::size_type procmax_;
774     std::string::size_type stddev_;
775     std::string::size_type histogram_;
AlignmentWidthsTeuchos::StackedTimer::AlignmentWidths776     AlignmentWidths() :
777       timer_names_(0),
778       average_time_(0),
779       fraction_(0),
780       count_(0),
781       total_updates_(0),
782       min_(0),
783       max_(0),
784       procmax_(0),
785       stddev_(0),
786       histogram_(0){}
787   } alignments_;
788 
789   /// If set to true, prints to the debug ostream. At construction, default value is set from environment variable.
790   bool enable_verbose_;
791 
792   /// If set to a value greater than 0, verbose mode will print that many levels of timers with timestamps. A value of zero disables timestamps.
793   unsigned verbose_timestamp_levels_;
794 
795   /// For debugging, this is the ostream used for printing.
796   Teuchos::RCP<std::ostream> verbose_ostream_;
797 
798   /**
799     * Flatten the timers into a single array
800     */
801    void flatten();
802 
803    /**
804     * Merge all the timers together into a single structure
805     * @param [in] comm - Communicator to use
806     */
807    void merge(Teuchos::RCP<const Teuchos::Comm<int> > comm);
808 
809    /**
810     * Migrate all the timer data to rank=0 if parallel
811     */
812    void collectRemoteData(Teuchos::RCP<const Teuchos::Comm<int> > comm, const OutputOptions &options );
813 
814   /**
815    * Compute the column widths to align the output from report() in
816    * columns.
817    *
818    * \returns total time for this level
819    */
820   double computeColumnWidthsForAligment(std::string prefix, int print_level,
821                                         std::vector<bool> &printed, double parent_time,
822                                         const OutputOptions &options);
823 
824    /**
825     * Recursive call to print a level of timer data.
826     */
827   double printLevel(std::string prefix, int level, std::ostream &os, std::vector<bool> &printed,
828                     double parent_time, const OutputOptions &options);
829 
830    /**
831     * Recursive call to print a level of timer data, in Watchr XML format.
832     * If non-empty, rootName will replace the root level's timer name in the output. If empty or level > 0, it has no effect.
833     */
834   double printLevelXML(std::string prefix, int level, std::ostream &os, std::vector<bool> &printed, double parent_time, const std::string& rootName = "");
835 
836 };  //StackedTimer
837 
838 
839 } //namespace Teuchos
840 
841 #endif /* TEUCHOS_STACKED_TIMER_HPP */
842