1 // @HEADER
2 // ***********************************************************************
3 //
4 //                    Teuchos: Common Tools Package
5 //                 Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ***********************************************************************
38 // @HEADER
39 
40 #ifndef TEUCHOS_TIMEMONITOR_HPP
41 #define TEUCHOS_TIMEMONITOR_HPP
42 
43 
44 /*! \file Teuchos_TimeMonitor.hpp
45  *
46  * \brief Scope guard for Teuchos::Time, with MPI collective timer
47  *   reporting.
48  *
49  * An instance of the Teuchos::TimeMonitor class wraps a nonconst
50  * reference to a Teuchos::Time timer object.  TimeMonitor's
51  * constructor starts the timer, and its destructor stops the timer.
52  * This ensures scope safety of timers, so that no matter how a scope
53  * is exited (whether the normal way or when an exception is thrown),
54  * a timer started in the scope is stopped when the scope is left.
55  *
56  * TimeMonitor also has class methods that create or destroy timers
57  * (in such a way that it can track the complete set of created timers
58  * on each process) and compute global timer statistics.
59  */
60 
61 /** \example TimeMonitor/cxx_main.cpp
62  *
63  * This is an example of how to use the Teuchos::TimeMonitor class.
64  */
65 
66 #include "Teuchos_PerformanceMonitorBase.hpp"
67 #include "Teuchos_ParameterList.hpp"
68 #include "Teuchos_Comm.hpp"
69 #include "Teuchos_Time.hpp"
70 
71 #include "Teuchos_CommandLineProcessor.hpp"
72 
73 /// \brief Defines a static non-member function that returns a Teuchos timer.
74 ///
75 /// \warning Please don't use this macro.  It is a bad idea to keep
76 ///   around static RCP objects past return from main().
77 #define TEUCHOS_TIMER(funcName, strName) \
78   static Teuchos::Time& funcName() \
79   {static Teuchos::RCP<Time> rtn = \
80       Teuchos::TimeMonitor::getNewCounter(strName); return *rtn;}
81 
82 
83 /** \brief Defines a timer for a specific function (with differentiator).
84  *
85  * Same as TEUCHOS_FUNC_TIME_MONITOR(...) except required when used more than
86  * once in the same function (like a block of code).
87  *
88  * \warning Please don't use this macro.  It is a bad idea to keep
89  *   around static RCP objects past return from main().
90  */
91 #define TEUCHOS_FUNC_TIME_MONITOR_DIFF( FUNCNAME, DIFF ) \
92   static Teuchos::RCP<Teuchos::Time> DIFF ## blabla_localTimer; \
93   if(!DIFF ## blabla_localTimer.get()) { \
94     std::ostringstream oss; \
95     oss << FUNCNAME; \
96     DIFF ## blabla_localTimer = Teuchos::TimeMonitor::getNewCounter(oss.str()); \
97   } \
98   Teuchos::TimeMonitor DIFF ## blabla_localTimeMonitor(*DIFF ## blabla_localTimer)
99 
100 
101 /** \brief Defines a timer for a specific function.
102  *
103  Note that the name of the timer can be formated with stream inserts.
104  For example, we can define a time monitor for a function as follows:
105 
106  \code
107  template<typename Scalar>
108  void foo()
109  {
110    TEUCHOS_FUNC_TIME_MONITOR(
111      "foo<" << Teuchos::ScalarTraits<Scalar>::name () << ">()"
112      );
113    ...
114  }
115  \endcode
116 
117  The timer can then be printed at the end of the program using any of
118  various class methods, including summarize():
119  \code
120  Teuchos::TimeMonitor::summarize ();
121  \endcode
122 */
123 #define TEUCHOS_FUNC_TIME_MONITOR( FUNCNAME ) \
124   TEUCHOS_FUNC_TIME_MONITOR_DIFF( FUNCNAME, main )
125 
126 
127 namespace Teuchos {
128 
129 // Forward declaration
130 class StackedTimer;
131 
132 /// \typedef stat_map_type
133 /// \brief Global statistics collected from timer data.
134 ///
135 /// Key: name of the timer.
136 ///
137 /// Value: each entry in the vector is a timing and call count for
138 ///   that timer, corresponding to a particular statistic (e.g.,
139 ///   minimum, arithmetic mean, or maximum).  What statistic that is
140 ///   depends on an auxiliary array "statNames" which has the same
141 ///   ordering as the entries in this vector.  See the documentation
142 ///   of \c TimeMonitor::computeGlobalTimerStatistics().
143 typedef std::map<std::string, std::vector<std::pair<double, double> > > stat_map_type;
144 
145 /// \class TimeMonitor
146 /// \brief Scope guard for Time, that can compute MPI collective timer
147 ///   statistics.
148 ///
149 /// An instance of the TimeMonitor class wraps a nonconst reference to
150 /// a Time timer object.  TimeMonitor's constructor starts the timer,
151 /// and its destructor stops the timer.  This ensures scope safety of
152 /// timers, so that no matter how a scope is exited (whether the
153 /// normal way or when an exception is thrown), a timer started in the
154 /// scope is stopped when the scope is left.
155 ///
156 /// TimeMonitor also has class methods that create or destroy timers
157 /// and compute global timer statistics.  If you create a timer using
158 /// getNewCounter() (or the deprecated getNewTimer()), it will add
159 /// that timer to the set of timers for which to compute global
160 /// statistics.  The summarize() and report() methods will print
161 /// global statistics for these timers, like the minimum, mean, and
162 /// maximum time over all processes in the communicator, for each
163 /// timer.  These methods work correctly even if some processes have
164 /// different timers than other processes.  You may also use
165 /// computeGlobalTimerStatistics() to compute the same global
166 /// statistics, if you wish to use them in your program or output them
167 /// in a different format than that of these methods.
168 ///
169 /// If Teuchos is configured with <tt>TPL_ENABLE_Valgrind=ON</tt>
170 /// and <tt>Teuchos_TIME_MASSIF_SNAPSHOTS=ON</tt> Valgrind Massif
171 /// snapshots are taken before and after each Time invocation. The
172 /// resulting memory profile can be plotted using
173 /// <tt>core/utils/plotMassifMemoryUsage.py</tt>
174 ///
175 /// \warning This class must only be used to time functions that are
176 ///   called only within the main program.  It may <i>not</i> be used
177 ///   in pre-program setup or post-program teardown!
178 class TEUCHOSCOMM_LIB_DLL_EXPORT TimeMonitor :
179     public PerformanceMonitorBase<Time> {
180 public:
181 
182   /** \name Constructor/Destructor */
183   //@{
184 
185   /// \brief Constructor: starts the timer.
186   ///
187   /// \param timer [in/out] Reference to the timer to be wrapped.
188   ///   This constructor starts the timer, and the destructor stops
189   ///   the timer.
190   ///
191   /// \param reset [in] If true, reset the timer before starting it.
192   ///   Default behavior is not to reset the timer.
193   TimeMonitor (Time& timer, bool reset=false);
194 
195   //! Default constructor is deleted, since it would be unsafe.
196   TimeMonitor () = delete;
197 
198   //! Destructor: stops the timer.
199   ~TimeMonitor() override;
200   //@}
201 
202   /// \brief Return a new timer with the given name (class method).
203   ///
204   /// Call getNewCounter() or this method if you want to create a new
205   /// named timer, and you would like TimeMonitor to track the timer
206   /// for later computation of global statistics over processes.
207   ///
208   /// This method wraps getNewCounter() (inherited from the base
209   /// class) for backwards compatibiity.
getNewTimer(const std::string & name)210   static RCP<Time> getNewTimer (const std::string& name) {
211     return getNewCounter (name);
212   }
213 
214   /// \brief Disable the timer with the given name.
215   ///
216   /// "Disable" means that the timer (Time instance) will ignore all
217   /// calls to start(), stop(), and incrementNumCalls().  The effect
218   /// will be as if the TimeMonitor had never touched the timer.
219   ///
220   /// If the timer with the given name does not exist (was never
221   /// created using getNewCounter() or getNewTimer()), then this
222   /// method throws std::invalid_argument.  Otherwise, it disables the
223   /// timer.  This effect lasts until the timer is cleared or until
224   /// the timer is enabled, either by calling enableTimer() (see
225   /// below) or by calling the Time instance's enable() method.
226   ///
227   /// Disabling a timer does <i>not</i> exclude it from the list of
228   /// timers printed by summarize() or report().
229   static void disableTimer (const std::string& name);
230 
231   /// \brief Enable the timer with the given name.
232   ///
233   /// If the timer with the given name does not exist (was never
234   /// created using getNewCounter() or getNewTimer()), then this
235   /// method throws std::invalid_argument.  Otherwise, it undoes the
236   /// effect of disableTimer() on the timer with the given name.  If
237   /// the timer with the given name was not disabled, then this method
238   /// does nothing.
239   static void enableTimer (const std::string& name);
240 
241   /// \brief Reset all global timers to zero.
242   ///
243   /// This method only affects Time objects created by getNewCounter()
244   /// or getNewTimer().
245   ///
246   /// \pre None of the timers must currently be running.
247   static void zeroOutTimers();
248 
249   /// \brief Compute global timer statistics for all timers on the
250   ///   given communicator.
251   ///
252   /// The typical use case for Time and TimeMonitor is that all
253   /// processes in a communicator create the same set of timers, and
254   /// then want to report summary statistics.  This method supports
255   /// that typical use case.  For each timer in the set, this method
256   /// computes a list of global statistics.  "Global" means "for all
257   /// processes in the communicator."  "Statistic" means the result of
258   /// a reduction over the timing and call count values.  Thus, each
259   /// statistic includes both a timing and a call count.  The current
260   /// list of computed statistics includes the minimum and maximum
261   /// timing (and the corresponding call count for each) and the
262   /// arithmetic mean (timing and call count).  This list may expand
263   /// in the future.
264   ///
265   /// Different processes may have different sets of timers.  This
266   /// method gives you two options for reconciling the sets.  If setOp
267   /// is Intersection, it computes the intersection (the common
268   /// subset) of timers on all processes in the communicator.
269   /// Otherwise, if setOp is Union, this method computes the union of
270   /// timers on all processes in the communicator.  Intersection is
271   /// the default, since it means that all reported timers exist on
272   /// all participating processes.  For setOp=Union, timers that do
273   /// not exist on some processes will be given a zero timing and call
274   /// count, so that statistics make sense.
275   ///
276   /// \note This method must called as a collective by all processes
277   ///   in the communicator.
278   ///
279   /// All output arguments are returned redundantly on all processes
280   /// in the communicator.  That makes this method an all-reduce.
281   ///
282   /// \section Teuchos_TimeMonitor_computeGlobalTimerStatistics_stats Statistics collected
283   ///
284   /// The "MinOverProcs" and "MaxOverProcs" timings are cumulative:
285   /// the reported timing is for all calls.  Along with the min resp.
286   /// max timing comes the call count of the process who had the min
287   /// resp. max.  (If more than one process had the min resp. max
288   /// timing, then the call count on the process with the smallest
289   /// rank is reported.)
290   ///
291   /// The "MeanOverProcs" equals the sum of the processes' cumulative
292   /// timings, divided by the number of processes.  Thus, it is
293   /// cumulative over all calls, and is comparable with the
294   /// "MinOverProcs" and "MaxOverProcs" timings.  This differs from
295   /// the "MeanOverCallCounts" (see below).  This does <i>not</i>
296   /// weight the mean by call counts.
297   ///
298   /// The "MeanOverCallCounts" is an arithmetic mean of all timings.
299   /// It is <i>not</i> cumulative.  It reports the mean timing for a
300   /// single invocation over all calls on all processes, not weighting
301   /// any one process more than the others.  For each timer, this is
302   /// the sum of the cumulative timing over all processes, divided by
303   /// the sum of the call counts over all processes for that timing.
304   /// (We compute it a bit differently to help prevent overflow.)  The
305   /// "MeanOverCallCounts" is <i>not</i> comparable with the min, max,
306   /// or "MeanOverProcs".
307   ///
308   /// We report with both versions of the mean timing the mean call
309   /// count over processes.  This may be fractional, which is one
310   /// reason why we report call counts as <tt>double</tt> rather than
311   /// <tt>int</tt>.  It has no particular connection to the mean
312   /// timing.
313   ///
314   /// \section Teuchos_TimeMonitor_computeGlobalTimerStatistics_perf Performance
315   ///
316   /// This operation requires interprocess communication.  Suppose
317   /// there are \f$P\f$ processes in the given communicator, and
318   /// \f$N\f$ unique timers in the global union of all processes'
319   /// timers.  Then, this method requires \f$O(\log P)\f$ messages
320   /// (\f$O(1)\f$ "reductions" and exactly 1 "broadcast") and
321   /// \f$O(N)\f$ per-processor storage (in the worst case) when
322   /// computing either the intersection or the union of timers (the
323   /// algorithm is similar in either case).  The whole algorithm takes
324   /// at worst \f$O(N (\log N) (\log P))\f$ time along the critical
325   /// path (i.e., on the "slowest process" in the communicator).  The
326   /// \f$N \log N\f$ term comes from sorting the timers by label at
327   /// each stage of the reduction in order to compute their union or
328   /// intersection.
329   ///
330   /// \param statData [out] On output: Global timer statistics, stored
331   ///   as a map with key timer name, and with value the ordered list
332   ///   of statistics for that timer.  The \c statNames output has the
333   ///   same order as the ordered list of statistics for each timer.
334   ///   Each entry of the statistics list is a (timing, call count)
335   ///   pair, the meaning of which depends on the particular statistic
336   ///   (see above).
337   ///
338   /// \param statNames [out] On output: Each value in the statData map
339   ///   is a vector.  That vector v has the same number of entries as
340   ///   statNames.  statNames[k] is the name of the statistic (see
341   ///   above) stored as v[k].  Always refer to statNames for the
342   ///   number and names of statistics.
343   ///
344   /// \param comm [in] Communicator whose process(es) will participate
345   ///   in the gathering of timer statistics.  This is a Ptr and not
346   ///   an RCP, because RCP would suggest that TimeMonitor were
347   ///   keeping the communicator around after return of this method.
348   ///   Ptr suggests instead that TimeMonitor will only reference the
349   ///   communicator during this method.  If you have an RCP, you can
350   ///   turn it into a Ptr by calling its ptr() method:
351   ///   \code
352   ///   RCP<const Comm<int> > myComm = ...;
353   ///   TimeMonitor::computeGlobalTimerStatistics (statData, statNames, myComm.ptr());
354   ///   \endcode
355   ///
356   /// \param setOp [in] If \c Intersection, compute statistics for the
357   ///   intersection of all created timers over all processes in the
358   ///   communicator.  If \c Union, compute statistics for the union
359   ///   of all created timers over all processes in the communicator.
360   ///
361   /// \param filter [in] Filter for timer labels.  If filter is not
362   ///   empty, this method will only compute statistics for timers
363   ///   whose labels begin with this string.
364   static void
365   computeGlobalTimerStatistics (stat_map_type& statData,
366                                 std::vector<std::string>& statNames,
367                                 Ptr<const Comm<int> > comm,
368                                 const ECounterSetOp setOp=Intersection,
369                                 const std::string& filter="");
370 
371   /// \brief Compute global timer statistics for all timers on all
372   ///   (MPI) processes (in MPI_COMM_WORLD).
373   ///
374   /// This is an overload of the above computeGlobalTimerStatistics()
375   /// method for when the caller does not want to provide a
376   /// communicator explicitly.  This method "does the right thing" in
377   /// that case.  Specifically:
378   ///
379   /// <ul>
380   /// <li> If Trilinos was not built with MPI support, this method
381   ///      assumes a serial "communicator" containing one
382   ///      process. </li>
383   /// <li> If Trilinos was built with MPI support and MPI has been
384   ///      initialized (via MPI_Init() or one of the wrappers in
385   ///      Epetra or Teuchos), this method uses MPI_COMM_WORLD as the
386   ///      communicator.  This is the most common case. </li>
387   /// <li> If Trilinos was built with MPI support and MPI has
388   ///      <i>not</i> been initialized, this method will use a
389   ///      "serial" communicator (that does not actually use MPI).
390   ///      This may produce output on all the MPI processes if you are
391   ///      running with Trilinos as an MPI job with more than one
392   ///      process.  Thus, if you intend to use this method in
393   ///      parallel, you should first initialize MPI.  (We cannot
394   ///      initialize MPI for you, because we have no way to know
395   ///      whether you intend to run an MPI-enabled build serially.)
396   ///      </li>
397   /// </ul>
398   ///
399   /// \warning If you call this method when MPI is running, you
400   ///   <i>must</i> call it on all processes in \c MPI_COMM_WORLD.
401   ///   Otherwise, the method will never finish, since it will be
402   ///   waiting forever for the non-participating processes.  If you
403   ///   want to use computeGlobalTimerStatistics() on a
404   ///   subcommunicator, please use the overloaded version above that
405   ///   takes a communicator as an input argument.
406   static void
407   computeGlobalTimerStatistics (stat_map_type& statData,
408                                 std::vector<std::string>& statNames,
409                                 const ECounterSetOp setOp=Intersection,
410                                 const std::string& filter="");
411 
412   /// \brief Print summary statistics for all timers on the given
413   ///   communicator.
414   ///
415   /// If writeGlobalStatus=true, this method computes the same
416   /// statistics as computeGlobalTimerStatistics(), using the same
417   /// collective algorithm.  (<tt>writeGlobalStatus=false</tt> means
418   /// that only the process with rank 0 in the communicator reports
419   /// its timers' data.)  It then reports the results to the given
420   /// output stream on the process with rank 0 in the given
421   /// communicator.  Output follows a human-readable tabular form.
422   ///
423   /// \param comm [in] Communicator whose process(es) will participate
424   ///   in the gathering of timer statistics.  This is a Ptr and not
425   ///   an RCP, because RCP would suggest that TimeMonitor were
426   ///   keeping the communicator around after return of this method.
427   ///   Ptr suggests instead that TimeMonitor will only reference the
428   ///   communicator during this method.  If you have an RCP, you can
429   ///   turn it into a Ptr by calling its ptr() method:
430   ///   \code
431   ///   RCP<const Comm<int> > myComm = ...;
432   ///   TimeMonitor::summarize (myComm.ptr());
433   ///   \endcode
434   ///
435   /// \param out [out] Output stream to which to write.  This will
436   ///   only be used on the process with rank 0 in the communicator.
437   ///
438   /// \param alwaysWriteLocal [in] If true, the process with Rank 0 in
439   ///   the communicator will write its local timings to the given
440   ///   output stream.  Defaults to false, since the global statistics
441   ///   are more meaningful.  If the local set of timers differs from
442   ///   the global set of timers (either the union or the
443   ///   intersection, depending on \c setOp), Proc 0 will create
444   ///   corresponding local timer data (<i>not</i> corresponding
445   ///   timers) with zero elapsed times and call counts, just to pad
446   ///   the table of output.
447   ///
448   /// \param writeGlobalStats [in] If true (the default), compute and
449   ///   display the statistics that \c computeGlobalTimerStatistics()
450   ///   computes.  If there is only one MPI process or if this is a
451   ///   non-MPI build of Trilinos, only compute and show the "global"
452   ///   timings, without the "statistics" that would be all the same
453   ///   anyway.
454   ///
455   /// \param writeZeroTimers [in] If false, do not display results for
456   ///   timers that have never been called (numCalls() == 0).  If
457   ///   true, display results for all timers, regardless of their call
458   ///   count.  Note that \c setOp and \c writeGlobalStats might
459   ///   reintroduce timers with zero call counts.
460   ///
461   /// \param setOp [in] If \c Intersection, compute and display the
462   ///   intersection of all created timers over all processes in the
463   ///   communicator.  If \c Union, compute and display the union of
464   ///   all created timers over all processes in the communicator.
465   ///
466   /// \param filter [in] Filter for timer labels.  If filter is not
467   ///   empty, this method will only print timers whose labels begin
468   ///   with this string.
469   ///
470   /// \param ignoreZeroTimers [in] Processes that either do not have
471   ///   a particular timer or have zero time for a timer are not used
472   ///   in calculating global statistics. This mode requires one
473   ///   additional all-reduce per invocation.
474   ///
475   /// \note If \c writeGlobalStats is true, this method <i>must</i> be
476   ///   called as a collective by all processes in the communicator.
477   ///   This method will <i>only</i> perform communication if
478   ///   <tt>writeGlobalStats</tt> is true.
479   static void
480   summarize (Ptr<const Comm<int> > comm,
481              std::ostream &out=std::cout,
482              const bool alwaysWriteLocal=false,
483              const bool writeGlobalStats=true,
484              const bool writeZeroTimers=true,
485              const ECounterSetOp setOp=Intersection,
486              const std::string& filter="",
487              const bool ignoreZeroTimers=false);
488 
489   /// \brief Print summary statistics for all timers on all (MPI)
490   ///   processes (in MPI_COMM_WORLD).
491   ///
492   /// This is an overload of the above summarize() method for when the
493   /// caller does not want to provide a communicator explicitly.  This
494   /// method "does the right thing" in that case.  For an explanation
495   /// of what that means, see the documentation of the overload of
496   /// computeGlobalTimerStatistics() that does not require a
497   /// communicator argument.
498   ///
499   /// \warning If you call this method when MPI is running, you
500   ///   <i>must</i> call it on all processes in \c MPI_COMM_WORLD.
501   ///   Otherwise, the method will never finish, since it will be
502   ///   waiting forever for the non-participating processes.  If you
503   ///   want to use \c summarize() on a subcommunicator, please use
504   ///   the overloaded version above that takes a communicator as an
505   ///   input argument.
506   static void
507   summarize (std::ostream& out=std::cout,
508              const bool alwaysWriteLocal=false,
509              const bool writeGlobalStats=true,
510              const bool writeZeroTimers=true,
511              const ECounterSetOp setOp=Intersection,
512              const std::string& filter="",
513              const bool ignoreZeroTimers=false);
514 
515   /// \brief Report timer statistics to the given output stream.
516   ///
517   /// This is like summarize(), but gives you more control over the
518   /// output format.  To get the default parameters, either call
519   /// getValidReportParameters(), or call this method with params
520   /// nonnull but empty (it will fill in default parameters).
521   ///
522   /// \param comm [in] Communicator whose process(es) will participate
523   ///   in the gathering of timer statistics.  This is a Ptr and not
524   ///   an RCP, because RCP would suggest that TimeMonitor were
525   ///   keeping the communicator around after return of this method.
526   ///   Ptr suggests instead that TimeMonitor will only reference the
527   ///   communicator during this method.  If you have an RCP, you can
528   ///   turn it into a Ptr by calling its ptr() method:
529   ///   \code
530   ///   RCP<const Comm<int> > myComm = ...;
531   ///   TimeMonitor::report (myComm.ptr (), ...);
532   ///   \endcode
533   ///
534   /// \param out [out] Output stream to which to write.  This will
535   ///   only be used on the process with rank 0 in the communicator.
536   ///
537   /// \param filter [in] Filter for timer labels.  If filter is not
538   ///   empty, this method will only print timers whose labels begin
539   ///   with this string.
540   ///
541   /// \param params [in/out] Parameters to control output format and
542   ///   which statistics to generate.  If null, we use default
543   ///   parameters if this method was not yet called with params
544   ///   nonnull, otherwise we use the previous set of parameters.  If
545   ///   nonnull, we read the given parameters, filling in defaults,
546   ///   and use the resulting parameters for all subsequent calls to
547   ///   report() (until new parameters are set).
548   ///
549   /// \section Teuchos_TimeMonitor_report_SupportedParams Supported parameters
550   ///
551   /// Here is the current set of supported parameters:
552   ///
553   /// <ul>
554   /// <li> "Report format": "Table" (default), "YAML" </li>
555   /// <li> "YAML style": "spacious" (default), "compact" </li>
556   /// <li> "How to merge timer sets": "Intersection" (default), "Union" </li>
557   /// <li> "alwaysWriteLocal": true, false (default) </li>
558   /// <li> "writeGlobalStats": true (default), false </li>
559   /// <li> "writeZeroTimers": true (default), false </li>
560   /// </ul>
561   ///
562   /// This method currently supports two different output formats.
563   /// "Table" format is the same tabular format which summarize()
564   /// uses.  It displays times and call counts in a table that is easy
565   /// for humans to read, but hard to parse.  "YAML" format uses a
566   /// standard, structured, human-readable output format called YAML.
567   /// <a href="http://yaml.org">YAML</a> stands for YAML Ain't Markup
568   /// Language.
569   ///
570   /// "YAML style" refers to two variants of YAML output that report()
571   /// can generate.  The "compact" mode attempts to put as much data
572   /// on each line as possible.  It may be more readable when there
573   /// are a small number of timers.  The "spacious" mode prefers one
574   /// line per datum whenever possible.  Both modes have the same
575   /// schema, that is, their output has the same hierarchical
576   /// structure and thus the same parse tree.
577   ///
578   /// (In technical terms: compact mode uses YAML's so-called "flow
579   /// style" for sequences and mappings whenever possible, except at
580   /// the outermost level where it would hinder readability.  Spacious
581   /// mode does not use "flow style" for lists or mappings.  For an
582   /// explanation of YAML's flow style, see <a
583   /// href="http://www.yaml.org/spec/1.2/spec.html#style/flow/">Chapter
584   /// 7 of the YAML 1.2 spec</a>.)
585   ///
586   /// "How to merge timer sets" refers to the set operation by which
587   /// processors should combine their sets of timers in order to
588   /// compute global timer statistics.  This corresponds to the
589   /// <tt>setOp</tt> argument of summarize().
590   ///
591   /// The remaining Boolean parameters are the same as the eponymous
592   /// arguments of summarize(), to whose documentation one should
593   /// refer.  There are some wrinkles: in particular, YAML output
594   /// ignores the "alwaysWriteLocal" parameter and assumes
595   /// "writeGlobalStats" is true.
596   static void
597   report (Ptr<const Comm<int> > comm,
598           std::ostream& out,
599           const std::string& filter,
600           const RCP<ParameterList>& params=null);
601 
602   /// \brief Report timer statistics to the given output stream.
603   ///
604   /// This is like the 4-argument version of report(), but with a
605   /// default filter.
606   static void
607   report (Ptr<const Comm<int> > comm,
608           std::ostream& out,
609           const RCP<ParameterList>& params=null);
610 
611   /// \brief Report timer statistics to the given output stream.
612   ///
613   /// This is like the 4-argument version of report(), but with a
614   /// default communicator.
615   static void
616   report (std::ostream& out,
617           const std::string& filter,
618           const RCP<ParameterList>& params=null);
619 
620   /// \brief Report timer statistics to the given output stream.
621   ///
622   /// This is like the 4-argument version of report(), but with a
623   /// default communicator and a default filter.
624   static void
625   report (std::ostream& out,
626           const RCP<ParameterList>& params=null);
627 
628   //! Default parameters (with validators) for report().
629   static RCP<const ParameterList> getValidReportParameters ();
630 
631   /// \brief Sets the StackedTimer into which the TimeMonitor will
632   ///   insert timings.
633   ///
634   /// \param t [in/out] StackedTimer object.
635   static void setStackedTimer(const Teuchos::RCP<Teuchos::StackedTimer>& t);
636 
637   //! The StackedTimer used by the TimeMonitor.
638   static Teuchos::RCP<Teuchos::StackedTimer> getStackedTimer();
639 
640  private:
641   /// \brief Valid output formats for report().
642   ///
643   /// \warning This is an implementation detail of TimeMonitor.  It is
644   ///   subject to change at any time without notice.
645   enum ETimeMonitorReportFormat {
646     REPORT_FORMAT_YAML,
647     REPORT_FORMAT_TABLE
648   };
649 
650   /// \brief Valid YAML output formats for report().
651   ///
652   /// \warning This is an implementation detail of TimeMonitor.  It is
653   ///   subject to change at any time without notice.
654   enum ETimeMonitorYamlFormat {
655     YAML_FORMAT_COMPACT,
656     YAML_FORMAT_SPACIOUS
657   };
658 
659   /// \brief Like summarize(), but with YAML-format output.
660   ///
661   /// \param comm [in] Communicator over which to compute timer
662   ///   statistics.
663   /// \param out [out] Output stream to which to write (on Proc 0 of
664   ///   the given communicator only).
665   /// \param yamlStyle [in] Whether to print YAML output in "compact"
666   ///   or "spacious" style.
667   /// \param filter [in] Filter for timer labels.  If filter is not
668   ///   empty, this method will only print timers whose labels begin
669   ///   with this string.
670   ///
671   /// \warning This is an experimental interface.  It may change or
672   ///   disappear without warning.
673   static void
674   summarizeToYaml (Ptr<const Comm<int> > comm,
675                    std::ostream& out,
676                    const ETimeMonitorYamlFormat yamlStyle,
677                    const std::string& filter="");
678 
679   /// \brief Like summarize(), but with YAML-format output and default communicator.
680   ///
681   /// \warning This is an experimental interface.  It may change or
682   ///   disappear without warning.
683   static void
684   summarizeToYaml (std::ostream& out,
685                    const ETimeMonitorYamlFormat yamlStyle,
686                    const std::string& filter="");
687 
688   /// \brief Add the "Report format" parameter to plist.
689   ///
690   /// \note Call this in getValidReportParameters() to set a default
691   ///   value and validator for this parameter.
692   static void setReportFormatParameter (ParameterList& plist);
693 
694   /// \brief Add the "YAML style" parameter to plist.
695   ///
696   /// \note Call this in getValidReportParameters() to set a default
697   ///   value and validator for this parameter.
698   static void setYamlFormatParameter (ParameterList& plist);
699 
700   /// \brief Add the "How to merge timer sets" parameter to plist.
701   ///
702   /// \note Call this in getValidReportParameters() to set a default
703   ///   value and validator for this parameter.
704   static void setSetOpParameter (ParameterList& plist);
705 
706   /// \brief Set parameters for report().  Call only from report().
707   ///
708   /// If this method completes successfully, it sets setParams_ to
709   /// true as a flag.
710   ///
711   /// \param params [in/out] Parameters for report().  This may be
712   ///   null, in which case we use defaults or the last set of
713   ///   parameters.
714   ///
715   /// \warning This method is not thread safe, in the sense that it
716   ///   does not set the class data atomically.  Behavior when calling
717   ///   this method from multiple threads is undefined.  Calling this
718   ///   routine with different parameter lists from different threads
719   ///   will certainly not accomplish what you want to accomplish.
720   static void setReportParameters (const RCP<ParameterList>& params);
721 
722   //! Parameters for the report() class method.
723   //@{
724 
725   /// \brief Current output format for report().
726   ///
727   /// Set via setReportParameters().
728   static ETimeMonitorReportFormat reportFormat_;
729 
730   /// \brief Current output style for report(), when using YAML output.
731   ///
732   /// Set via setReportParameters().
733   static ETimeMonitorYamlFormat yamlStyle_;
734 
735   /// \brief Whether report() should use the intersection or union of
736   ///   timers over (MPI) processes.
737   static ECounterSetOp setOp_;
738 
739   /// \brief Whether report() should always report (MPI) Process 0's
740   ///   local timer results.
741   static bool alwaysWriteLocal_;
742 
743   /// \brief Whether report() should always compute global timer
744   ///   statistics.
745   ///
746   /// If true and if using MPI, report() will require MPI
747   /// communication equivalent to O(1) all-reduces.
748   static bool writeGlobalStats_;
749 
750   //! Whether report() should report timers with zero call counts.
751   static bool writeZeroTimers_;
752   //@}
753 
754   /// \brief Whether setReportParameters() completed successfully.
755   ///
756   /// \note Keeping this helps us avoid keeping the whole
757   ///   ParameterList around.
758   static bool setParams_;
759 
760 protected:
761   /// \brief Stacked timer for optional injection of timing from
762   ///   TimeMonitor-enabled objects.
763   static Teuchos::RCP<Teuchos::StackedTimer> stackedTimer_;
764 };
765 
766 
767 /// \class SyncTimeMonitor
768 /// \brief A TimeMonitor that waits at a MPI barrier before destruction.
769 class SyncTimeMonitor :
770     public TimeMonitor {
771 public:
772 
773   /** \name Constructor/Destructor */
774   //@{
775 
776   /// \brief Constructor: starts the timer.
777   ///
778   /// \param timer [in/out] Reference to the timer to be wrapped.
779   ///   This constructor starts the timer, and the destructor stops
780   ///   the timer.
781   ///
782   /// \param reset [in] If true, reset the timer before starting it.
783   ///   Default behavior is not to reset the timer.
784   SyncTimeMonitor(Time& timer, Ptr<const Comm<int> > comm, bool reset=false);
785 
786   //! Default constructor is deleted, since it would be unsafe.
787   SyncTimeMonitor () = delete;
788 
789   //! Destructor: stops the timer.
790   ~SyncTimeMonitor() override;
791   //@}
792 
793 private:
794   // \brief Communicator on which barrier will be called.
795   Ptr<const Comm<int> > comm_;
796 };
797 
798 
799 } // namespace Teuchos
800 
801 
802 namespace Teuchos {
803 
804 /// \class TimeMonitorSurrogateImpl
805 /// \brief Implementation of TimeMonitorSurrogate that invokes TimeMonitor.
806 /// \warning Users should not use this class or rely on it in any way.
807 ///   It is an implementation detail.
808 ///
809 /// Please refer to the documentation of
810 /// TimeMonitorSurrogateImplInserter and TimeMonitorSurrogate for an
811 /// explanation of the purpose of this class.
812 class TimeMonitorSurrogateImpl : public CommandLineProcessor::TimeMonitorSurrogate
813 {
summarize(std::ostream & out)814   virtual void summarize (std::ostream& out) {
815     TimeMonitor::summarize (out);
816   }
817 };
818 
819 /// \class TimeMonitorSurrogateImplInserter
820 /// \brief Injects run-time dependency of a class on TimeMonitor.
821 /// \warning Users should not use this class or rely on it in any way.
822 ///   It is an implementation detail.
823 ///
824 /// \section Teuchos_TimeMonitorSurrogateImplInserter_Summary Summary
825 ///
826 /// Classes and functions with the name "TimeMonitorSurrogate" in them
827 /// let CommandLineProcessor optionally call TimeMonitor::summarize(),
828 /// without needing to know that the TimeMonitor class exists.  This
829 /// allows Teuchos to put CommandLineProcessor in a separate package
830 /// from TimeMonitor.  We want to do this because TimeMonitor depends
831 /// on Comm, and is therefore in the TeuchosComm subpackage (which
832 /// depends on TeuchosCore), but CommandLineProcessor is in a
833 /// different subpackage which does not depend on Comm.
834 ///
835 /// The TimeMonitorSurrogateImplInserter class' constructor ensures
836 /// that CommandLineProcessor gets informed about TimeMonitor even
837 /// before the program starts executing main().  This happens
838 /// automatically, without changes to main(), because we declare an
839 /// instance of this class in the header file.  If the TeuchosComm
840 /// subpackage was built and its libraries were linked in,
841 /// CommandLineProcessor will know about TimeMonitor.
842 ///
843 /// \section Teuchos_TimeMonitorSurrogateImplInserter_Note Note to Teuchos developers
844 ///
845 /// This is an instance of the
846 /// <a href="http://en.wikipedia.org/wiki/Dependency_injection">Dependency injection</a>
847 /// design pattern.  CommandLineProcessor is not supposed to know
848 /// about TimeMonitor, because CommandLineProcessor's subpackage does
849 /// not depend on TimeMonitor's subpackage.  Thus,
850 /// CommandLineProcessor interacts with TimeMonitor through the
851 /// TimeMonitorSurrogate interface.  TimeMonitorSurrogateImplInserter
852 /// "injects" the dependency at run time, if the TeuchosComm
853 /// subpackage was enabled and the application linked with its
854 /// libraries.
855 ///
856 /// Teuchos developers could imitate the pattern of this class in
857 /// order to use TimeMonitor's class methods (such as summarize())
858 /// from any other class that does not depend on the TeuchosComm
859 /// subpackage.
860 class TimeMonitorSurrogateImplInserter {
861 public:
862   //! Constructor: inject dependency on TimeMonitor into CommandLineProcessor.
TimeMonitorSurrogateImplInserter()863   TimeMonitorSurrogateImplInserter () {
864     if (is_null (CommandLineProcessor::getTimeMonitorSurrogate ())) {
865       CommandLineProcessor::setTimeMonitorSurrogate (Teuchos::rcp (new TimeMonitorSurrogateImpl));
866     }
867   }
868 };
869 
870 } // end namespace Teuchos
871 
872 
873 namespace {
874 
875 // Inject the implementation in every translation unit.
876 Teuchos::TimeMonitorSurrogateImplInserter timeMonitorSurrogateImplInserter;
877 
878 } // namespace (anonymous)
879 
880 #endif // TEUCHOS_TIMEMONITOR_H
881