1 // @HEADER 2 // *********************************************************************** 3 // 4 // Teuchos: Common Tools Package 5 // Copyright (2004) Sandia Corporation 6 // 7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 8 // license for use of this work by or on behalf of the U.S. Government. 9 // 10 // Redistribution and use in source and binary forms, with or without 11 // modification, are permitted provided that the following conditions are 12 // met: 13 // 14 // 1. Redistributions of source code must retain the above copyright 15 // notice, this list of conditions and the following disclaimer. 16 // 17 // 2. Redistributions in binary form must reproduce the above copyright 18 // notice, this list of conditions and the following disclaimer in the 19 // documentation and/or other materials provided with the distribution. 20 // 21 // 3. Neither the name of the Corporation nor the names of the 22 // contributors may be used to endorse or promote products derived from 23 // this software without specific prior written permission. 24 // 25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 // 37 // *********************************************************************** 38 // @HEADER 39 40 #ifndef TEUCHOS_TIMEMONITOR_HPP 41 #define TEUCHOS_TIMEMONITOR_HPP 42 43 44 /*! \file Teuchos_TimeMonitor.hpp 45 * 46 * \brief Scope guard for Teuchos::Time, with MPI collective timer 47 * reporting. 48 * 49 * An instance of the Teuchos::TimeMonitor class wraps a nonconst 50 * reference to a Teuchos::Time timer object. TimeMonitor's 51 * constructor starts the timer, and its destructor stops the timer. 52 * This ensures scope safety of timers, so that no matter how a scope 53 * is exited (whether the normal way or when an exception is thrown), 54 * a timer started in the scope is stopped when the scope is left. 55 * 56 * TimeMonitor also has class methods that create or destroy timers 57 * (in such a way that it can track the complete set of created timers 58 * on each process) and compute global timer statistics. 59 */ 60 61 /** \example TimeMonitor/cxx_main.cpp 62 * 63 * This is an example of how to use the Teuchos::TimeMonitor class. 64 */ 65 66 #include "Teuchos_PerformanceMonitorBase.hpp" 67 #include "Teuchos_ParameterList.hpp" 68 #include "Teuchos_Comm.hpp" 69 #include "Teuchos_Time.hpp" 70 71 #include "Teuchos_CommandLineProcessor.hpp" 72 73 /// \brief Defines a static non-member function that returns a Teuchos timer. 74 /// 75 /// \warning Please don't use this macro. It is a bad idea to keep 76 /// around static RCP objects past return from main(). 77 #define TEUCHOS_TIMER(funcName, strName) \ 78 static Teuchos::Time& funcName() \ 79 {static Teuchos::RCP<Time> rtn = \ 80 Teuchos::TimeMonitor::getNewCounter(strName); return *rtn;} 81 82 83 /** \brief Defines a timer for a specific function (with differentiator). 84 * 85 * Same as TEUCHOS_FUNC_TIME_MONITOR(...) except required when used more than 86 * once in the same function (like a block of code). 87 * 88 * \warning Please don't use this macro. It is a bad idea to keep 89 * around static RCP objects past return from main(). 90 */ 91 #define TEUCHOS_FUNC_TIME_MONITOR_DIFF( FUNCNAME, DIFF ) \ 92 static Teuchos::RCP<Teuchos::Time> DIFF ## blabla_localTimer; \ 93 if(!DIFF ## blabla_localTimer.get()) { \ 94 std::ostringstream oss; \ 95 oss << FUNCNAME; \ 96 DIFF ## blabla_localTimer = Teuchos::TimeMonitor::getNewCounter(oss.str()); \ 97 } \ 98 Teuchos::TimeMonitor DIFF ## blabla_localTimeMonitor(*DIFF ## blabla_localTimer) 99 100 101 /** \brief Defines a timer for a specific function. 102 * 103 Note that the name of the timer can be formated with stream inserts. 104 For example, we can define a time monitor for a function as follows: 105 106 \code 107 template<typename Scalar> 108 void foo() 109 { 110 TEUCHOS_FUNC_TIME_MONITOR( 111 "foo<" << Teuchos::ScalarTraits<Scalar>::name () << ">()" 112 ); 113 ... 114 } 115 \endcode 116 117 The timer can then be printed at the end of the program using any of 118 various class methods, including summarize(): 119 \code 120 Teuchos::TimeMonitor::summarize (); 121 \endcode 122 */ 123 #define TEUCHOS_FUNC_TIME_MONITOR( FUNCNAME ) \ 124 TEUCHOS_FUNC_TIME_MONITOR_DIFF( FUNCNAME, main ) 125 126 127 namespace Teuchos { 128 129 // Forward declaration 130 class StackedTimer; 131 132 /// \typedef stat_map_type 133 /// \brief Global statistics collected from timer data. 134 /// 135 /// Key: name of the timer. 136 /// 137 /// Value: each entry in the vector is a timing and call count for 138 /// that timer, corresponding to a particular statistic (e.g., 139 /// minimum, arithmetic mean, or maximum). What statistic that is 140 /// depends on an auxiliary array "statNames" which has the same 141 /// ordering as the entries in this vector. See the documentation 142 /// of \c TimeMonitor::computeGlobalTimerStatistics(). 143 typedef std::map<std::string, std::vector<std::pair<double, double> > > stat_map_type; 144 145 /// \class TimeMonitor 146 /// \brief Scope guard for Time, that can compute MPI collective timer 147 /// statistics. 148 /// 149 /// An instance of the TimeMonitor class wraps a nonconst reference to 150 /// a Time timer object. TimeMonitor's constructor starts the timer, 151 /// and its destructor stops the timer. This ensures scope safety of 152 /// timers, so that no matter how a scope is exited (whether the 153 /// normal way or when an exception is thrown), a timer started in the 154 /// scope is stopped when the scope is left. 155 /// 156 /// TimeMonitor also has class methods that create or destroy timers 157 /// and compute global timer statistics. If you create a timer using 158 /// getNewCounter() (or the deprecated getNewTimer()), it will add 159 /// that timer to the set of timers for which to compute global 160 /// statistics. The summarize() and report() methods will print 161 /// global statistics for these timers, like the minimum, mean, and 162 /// maximum time over all processes in the communicator, for each 163 /// timer. These methods work correctly even if some processes have 164 /// different timers than other processes. You may also use 165 /// computeGlobalTimerStatistics() to compute the same global 166 /// statistics, if you wish to use them in your program or output them 167 /// in a different format than that of these methods. 168 /// 169 /// If Teuchos is configured with <tt>TPL_ENABLE_Valgrind=ON</tt> 170 /// and <tt>Teuchos_TIME_MASSIF_SNAPSHOTS=ON</tt> Valgrind Massif 171 /// snapshots are taken before and after each Time invocation. The 172 /// resulting memory profile can be plotted using 173 /// <tt>core/utils/plotMassifMemoryUsage.py</tt> 174 /// 175 /// \warning This class must only be used to time functions that are 176 /// called only within the main program. It may <i>not</i> be used 177 /// in pre-program setup or post-program teardown! 178 class TEUCHOSCOMM_LIB_DLL_EXPORT TimeMonitor : 179 public PerformanceMonitorBase<Time> { 180 public: 181 182 /** \name Constructor/Destructor */ 183 //@{ 184 185 /// \brief Constructor: starts the timer. 186 /// 187 /// \param timer [in/out] Reference to the timer to be wrapped. 188 /// This constructor starts the timer, and the destructor stops 189 /// the timer. 190 /// 191 /// \param reset [in] If true, reset the timer before starting it. 192 /// Default behavior is not to reset the timer. 193 TimeMonitor (Time& timer, bool reset=false); 194 195 //! Default constructor is deleted, since it would be unsafe. 196 TimeMonitor () = delete; 197 198 //! Destructor: stops the timer. 199 ~TimeMonitor() override; 200 //@} 201 202 /// \brief Return a new timer with the given name (class method). 203 /// 204 /// Call getNewCounter() or this method if you want to create a new 205 /// named timer, and you would like TimeMonitor to track the timer 206 /// for later computation of global statistics over processes. 207 /// 208 /// This method wraps getNewCounter() (inherited from the base 209 /// class) for backwards compatibiity. getNewTimer(const std::string & name)210 static RCP<Time> getNewTimer (const std::string& name) { 211 return getNewCounter (name); 212 } 213 214 /// \brief Disable the timer with the given name. 215 /// 216 /// "Disable" means that the timer (Time instance) will ignore all 217 /// calls to start(), stop(), and incrementNumCalls(). The effect 218 /// will be as if the TimeMonitor had never touched the timer. 219 /// 220 /// If the timer with the given name does not exist (was never 221 /// created using getNewCounter() or getNewTimer()), then this 222 /// method throws std::invalid_argument. Otherwise, it disables the 223 /// timer. This effect lasts until the timer is cleared or until 224 /// the timer is enabled, either by calling enableTimer() (see 225 /// below) or by calling the Time instance's enable() method. 226 /// 227 /// Disabling a timer does <i>not</i> exclude it from the list of 228 /// timers printed by summarize() or report(). 229 static void disableTimer (const std::string& name); 230 231 /// \brief Enable the timer with the given name. 232 /// 233 /// If the timer with the given name does not exist (was never 234 /// created using getNewCounter() or getNewTimer()), then this 235 /// method throws std::invalid_argument. Otherwise, it undoes the 236 /// effect of disableTimer() on the timer with the given name. If 237 /// the timer with the given name was not disabled, then this method 238 /// does nothing. 239 static void enableTimer (const std::string& name); 240 241 /// \brief Reset all global timers to zero. 242 /// 243 /// This method only affects Time objects created by getNewCounter() 244 /// or getNewTimer(). 245 /// 246 /// \pre None of the timers must currently be running. 247 static void zeroOutTimers(); 248 249 /// \brief Compute global timer statistics for all timers on the 250 /// given communicator. 251 /// 252 /// The typical use case for Time and TimeMonitor is that all 253 /// processes in a communicator create the same set of timers, and 254 /// then want to report summary statistics. This method supports 255 /// that typical use case. For each timer in the set, this method 256 /// computes a list of global statistics. "Global" means "for all 257 /// processes in the communicator." "Statistic" means the result of 258 /// a reduction over the timing and call count values. Thus, each 259 /// statistic includes both a timing and a call count. The current 260 /// list of computed statistics includes the minimum and maximum 261 /// timing (and the corresponding call count for each) and the 262 /// arithmetic mean (timing and call count). This list may expand 263 /// in the future. 264 /// 265 /// Different processes may have different sets of timers. This 266 /// method gives you two options for reconciling the sets. If setOp 267 /// is Intersection, it computes the intersection (the common 268 /// subset) of timers on all processes in the communicator. 269 /// Otherwise, if setOp is Union, this method computes the union of 270 /// timers on all processes in the communicator. Intersection is 271 /// the default, since it means that all reported timers exist on 272 /// all participating processes. For setOp=Union, timers that do 273 /// not exist on some processes will be given a zero timing and call 274 /// count, so that statistics make sense. 275 /// 276 /// \note This method must called as a collective by all processes 277 /// in the communicator. 278 /// 279 /// All output arguments are returned redundantly on all processes 280 /// in the communicator. That makes this method an all-reduce. 281 /// 282 /// \section Teuchos_TimeMonitor_computeGlobalTimerStatistics_stats Statistics collected 283 /// 284 /// The "MinOverProcs" and "MaxOverProcs" timings are cumulative: 285 /// the reported timing is for all calls. Along with the min resp. 286 /// max timing comes the call count of the process who had the min 287 /// resp. max. (If more than one process had the min resp. max 288 /// timing, then the call count on the process with the smallest 289 /// rank is reported.) 290 /// 291 /// The "MeanOverProcs" equals the sum of the processes' cumulative 292 /// timings, divided by the number of processes. Thus, it is 293 /// cumulative over all calls, and is comparable with the 294 /// "MinOverProcs" and "MaxOverProcs" timings. This differs from 295 /// the "MeanOverCallCounts" (see below). This does <i>not</i> 296 /// weight the mean by call counts. 297 /// 298 /// The "MeanOverCallCounts" is an arithmetic mean of all timings. 299 /// It is <i>not</i> cumulative. It reports the mean timing for a 300 /// single invocation over all calls on all processes, not weighting 301 /// any one process more than the others. For each timer, this is 302 /// the sum of the cumulative timing over all processes, divided by 303 /// the sum of the call counts over all processes for that timing. 304 /// (We compute it a bit differently to help prevent overflow.) The 305 /// "MeanOverCallCounts" is <i>not</i> comparable with the min, max, 306 /// or "MeanOverProcs". 307 /// 308 /// We report with both versions of the mean timing the mean call 309 /// count over processes. This may be fractional, which is one 310 /// reason why we report call counts as <tt>double</tt> rather than 311 /// <tt>int</tt>. It has no particular connection to the mean 312 /// timing. 313 /// 314 /// \section Teuchos_TimeMonitor_computeGlobalTimerStatistics_perf Performance 315 /// 316 /// This operation requires interprocess communication. Suppose 317 /// there are \f$P\f$ processes in the given communicator, and 318 /// \f$N\f$ unique timers in the global union of all processes' 319 /// timers. Then, this method requires \f$O(\log P)\f$ messages 320 /// (\f$O(1)\f$ "reductions" and exactly 1 "broadcast") and 321 /// \f$O(N)\f$ per-processor storage (in the worst case) when 322 /// computing either the intersection or the union of timers (the 323 /// algorithm is similar in either case). The whole algorithm takes 324 /// at worst \f$O(N (\log N) (\log P))\f$ time along the critical 325 /// path (i.e., on the "slowest process" in the communicator). The 326 /// \f$N \log N\f$ term comes from sorting the timers by label at 327 /// each stage of the reduction in order to compute their union or 328 /// intersection. 329 /// 330 /// \param statData [out] On output: Global timer statistics, stored 331 /// as a map with key timer name, and with value the ordered list 332 /// of statistics for that timer. The \c statNames output has the 333 /// same order as the ordered list of statistics for each timer. 334 /// Each entry of the statistics list is a (timing, call count) 335 /// pair, the meaning of which depends on the particular statistic 336 /// (see above). 337 /// 338 /// \param statNames [out] On output: Each value in the statData map 339 /// is a vector. That vector v has the same number of entries as 340 /// statNames. statNames[k] is the name of the statistic (see 341 /// above) stored as v[k]. Always refer to statNames for the 342 /// number and names of statistics. 343 /// 344 /// \param comm [in] Communicator whose process(es) will participate 345 /// in the gathering of timer statistics. This is a Ptr and not 346 /// an RCP, because RCP would suggest that TimeMonitor were 347 /// keeping the communicator around after return of this method. 348 /// Ptr suggests instead that TimeMonitor will only reference the 349 /// communicator during this method. If you have an RCP, you can 350 /// turn it into a Ptr by calling its ptr() method: 351 /// \code 352 /// RCP<const Comm<int> > myComm = ...; 353 /// TimeMonitor::computeGlobalTimerStatistics (statData, statNames, myComm.ptr()); 354 /// \endcode 355 /// 356 /// \param setOp [in] If \c Intersection, compute statistics for the 357 /// intersection of all created timers over all processes in the 358 /// communicator. If \c Union, compute statistics for the union 359 /// of all created timers over all processes in the communicator. 360 /// 361 /// \param filter [in] Filter for timer labels. If filter is not 362 /// empty, this method will only compute statistics for timers 363 /// whose labels begin with this string. 364 static void 365 computeGlobalTimerStatistics (stat_map_type& statData, 366 std::vector<std::string>& statNames, 367 Ptr<const Comm<int> > comm, 368 const ECounterSetOp setOp=Intersection, 369 const std::string& filter=""); 370 371 /// \brief Compute global timer statistics for all timers on all 372 /// (MPI) processes (in MPI_COMM_WORLD). 373 /// 374 /// This is an overload of the above computeGlobalTimerStatistics() 375 /// method for when the caller does not want to provide a 376 /// communicator explicitly. This method "does the right thing" in 377 /// that case. Specifically: 378 /// 379 /// <ul> 380 /// <li> If Trilinos was not built with MPI support, this method 381 /// assumes a serial "communicator" containing one 382 /// process. </li> 383 /// <li> If Trilinos was built with MPI support and MPI has been 384 /// initialized (via MPI_Init() or one of the wrappers in 385 /// Epetra or Teuchos), this method uses MPI_COMM_WORLD as the 386 /// communicator. This is the most common case. </li> 387 /// <li> If Trilinos was built with MPI support and MPI has 388 /// <i>not</i> been initialized, this method will use a 389 /// "serial" communicator (that does not actually use MPI). 390 /// This may produce output on all the MPI processes if you are 391 /// running with Trilinos as an MPI job with more than one 392 /// process. Thus, if you intend to use this method in 393 /// parallel, you should first initialize MPI. (We cannot 394 /// initialize MPI for you, because we have no way to know 395 /// whether you intend to run an MPI-enabled build serially.) 396 /// </li> 397 /// </ul> 398 /// 399 /// \warning If you call this method when MPI is running, you 400 /// <i>must</i> call it on all processes in \c MPI_COMM_WORLD. 401 /// Otherwise, the method will never finish, since it will be 402 /// waiting forever for the non-participating processes. If you 403 /// want to use computeGlobalTimerStatistics() on a 404 /// subcommunicator, please use the overloaded version above that 405 /// takes a communicator as an input argument. 406 static void 407 computeGlobalTimerStatistics (stat_map_type& statData, 408 std::vector<std::string>& statNames, 409 const ECounterSetOp setOp=Intersection, 410 const std::string& filter=""); 411 412 /// \brief Print summary statistics for all timers on the given 413 /// communicator. 414 /// 415 /// If writeGlobalStatus=true, this method computes the same 416 /// statistics as computeGlobalTimerStatistics(), using the same 417 /// collective algorithm. (<tt>writeGlobalStatus=false</tt> means 418 /// that only the process with rank 0 in the communicator reports 419 /// its timers' data.) It then reports the results to the given 420 /// output stream on the process with rank 0 in the given 421 /// communicator. Output follows a human-readable tabular form. 422 /// 423 /// \param comm [in] Communicator whose process(es) will participate 424 /// in the gathering of timer statistics. This is a Ptr and not 425 /// an RCP, because RCP would suggest that TimeMonitor were 426 /// keeping the communicator around after return of this method. 427 /// Ptr suggests instead that TimeMonitor will only reference the 428 /// communicator during this method. If you have an RCP, you can 429 /// turn it into a Ptr by calling its ptr() method: 430 /// \code 431 /// RCP<const Comm<int> > myComm = ...; 432 /// TimeMonitor::summarize (myComm.ptr()); 433 /// \endcode 434 /// 435 /// \param out [out] Output stream to which to write. This will 436 /// only be used on the process with rank 0 in the communicator. 437 /// 438 /// \param alwaysWriteLocal [in] If true, the process with Rank 0 in 439 /// the communicator will write its local timings to the given 440 /// output stream. Defaults to false, since the global statistics 441 /// are more meaningful. If the local set of timers differs from 442 /// the global set of timers (either the union or the 443 /// intersection, depending on \c setOp), Proc 0 will create 444 /// corresponding local timer data (<i>not</i> corresponding 445 /// timers) with zero elapsed times and call counts, just to pad 446 /// the table of output. 447 /// 448 /// \param writeGlobalStats [in] If true (the default), compute and 449 /// display the statistics that \c computeGlobalTimerStatistics() 450 /// computes. If there is only one MPI process or if this is a 451 /// non-MPI build of Trilinos, only compute and show the "global" 452 /// timings, without the "statistics" that would be all the same 453 /// anyway. 454 /// 455 /// \param writeZeroTimers [in] If false, do not display results for 456 /// timers that have never been called (numCalls() == 0). If 457 /// true, display results for all timers, regardless of their call 458 /// count. Note that \c setOp and \c writeGlobalStats might 459 /// reintroduce timers with zero call counts. 460 /// 461 /// \param setOp [in] If \c Intersection, compute and display the 462 /// intersection of all created timers over all processes in the 463 /// communicator. If \c Union, compute and display the union of 464 /// all created timers over all processes in the communicator. 465 /// 466 /// \param filter [in] Filter for timer labels. If filter is not 467 /// empty, this method will only print timers whose labels begin 468 /// with this string. 469 /// 470 /// \param ignoreZeroTimers [in] Processes that either do not have 471 /// a particular timer or have zero time for a timer are not used 472 /// in calculating global statistics. This mode requires one 473 /// additional all-reduce per invocation. 474 /// 475 /// \note If \c writeGlobalStats is true, this method <i>must</i> be 476 /// called as a collective by all processes in the communicator. 477 /// This method will <i>only</i> perform communication if 478 /// <tt>writeGlobalStats</tt> is true. 479 static void 480 summarize (Ptr<const Comm<int> > comm, 481 std::ostream &out=std::cout, 482 const bool alwaysWriteLocal=false, 483 const bool writeGlobalStats=true, 484 const bool writeZeroTimers=true, 485 const ECounterSetOp setOp=Intersection, 486 const std::string& filter="", 487 const bool ignoreZeroTimers=false); 488 489 /// \brief Print summary statistics for all timers on all (MPI) 490 /// processes (in MPI_COMM_WORLD). 491 /// 492 /// This is an overload of the above summarize() method for when the 493 /// caller does not want to provide a communicator explicitly. This 494 /// method "does the right thing" in that case. For an explanation 495 /// of what that means, see the documentation of the overload of 496 /// computeGlobalTimerStatistics() that does not require a 497 /// communicator argument. 498 /// 499 /// \warning If you call this method when MPI is running, you 500 /// <i>must</i> call it on all processes in \c MPI_COMM_WORLD. 501 /// Otherwise, the method will never finish, since it will be 502 /// waiting forever for the non-participating processes. If you 503 /// want to use \c summarize() on a subcommunicator, please use 504 /// the overloaded version above that takes a communicator as an 505 /// input argument. 506 static void 507 summarize (std::ostream& out=std::cout, 508 const bool alwaysWriteLocal=false, 509 const bool writeGlobalStats=true, 510 const bool writeZeroTimers=true, 511 const ECounterSetOp setOp=Intersection, 512 const std::string& filter="", 513 const bool ignoreZeroTimers=false); 514 515 /// \brief Report timer statistics to the given output stream. 516 /// 517 /// This is like summarize(), but gives you more control over the 518 /// output format. To get the default parameters, either call 519 /// getValidReportParameters(), or call this method with params 520 /// nonnull but empty (it will fill in default parameters). 521 /// 522 /// \param comm [in] Communicator whose process(es) will participate 523 /// in the gathering of timer statistics. This is a Ptr and not 524 /// an RCP, because RCP would suggest that TimeMonitor were 525 /// keeping the communicator around after return of this method. 526 /// Ptr suggests instead that TimeMonitor will only reference the 527 /// communicator during this method. If you have an RCP, you can 528 /// turn it into a Ptr by calling its ptr() method: 529 /// \code 530 /// RCP<const Comm<int> > myComm = ...; 531 /// TimeMonitor::report (myComm.ptr (), ...); 532 /// \endcode 533 /// 534 /// \param out [out] Output stream to which to write. This will 535 /// only be used on the process with rank 0 in the communicator. 536 /// 537 /// \param filter [in] Filter for timer labels. If filter is not 538 /// empty, this method will only print timers whose labels begin 539 /// with this string. 540 /// 541 /// \param params [in/out] Parameters to control output format and 542 /// which statistics to generate. If null, we use default 543 /// parameters if this method was not yet called with params 544 /// nonnull, otherwise we use the previous set of parameters. If 545 /// nonnull, we read the given parameters, filling in defaults, 546 /// and use the resulting parameters for all subsequent calls to 547 /// report() (until new parameters are set). 548 /// 549 /// \section Teuchos_TimeMonitor_report_SupportedParams Supported parameters 550 /// 551 /// Here is the current set of supported parameters: 552 /// 553 /// <ul> 554 /// <li> "Report format": "Table" (default), "YAML" </li> 555 /// <li> "YAML style": "spacious" (default), "compact" </li> 556 /// <li> "How to merge timer sets": "Intersection" (default), "Union" </li> 557 /// <li> "alwaysWriteLocal": true, false (default) </li> 558 /// <li> "writeGlobalStats": true (default), false </li> 559 /// <li> "writeZeroTimers": true (default), false </li> 560 /// </ul> 561 /// 562 /// This method currently supports two different output formats. 563 /// "Table" format is the same tabular format which summarize() 564 /// uses. It displays times and call counts in a table that is easy 565 /// for humans to read, but hard to parse. "YAML" format uses a 566 /// standard, structured, human-readable output format called YAML. 567 /// <a href="http://yaml.org">YAML</a> stands for YAML Ain't Markup 568 /// Language. 569 /// 570 /// "YAML style" refers to two variants of YAML output that report() 571 /// can generate. The "compact" mode attempts to put as much data 572 /// on each line as possible. It may be more readable when there 573 /// are a small number of timers. The "spacious" mode prefers one 574 /// line per datum whenever possible. Both modes have the same 575 /// schema, that is, their output has the same hierarchical 576 /// structure and thus the same parse tree. 577 /// 578 /// (In technical terms: compact mode uses YAML's so-called "flow 579 /// style" for sequences and mappings whenever possible, except at 580 /// the outermost level where it would hinder readability. Spacious 581 /// mode does not use "flow style" for lists or mappings. For an 582 /// explanation of YAML's flow style, see <a 583 /// href="http://www.yaml.org/spec/1.2/spec.html#style/flow/">Chapter 584 /// 7 of the YAML 1.2 spec</a>.) 585 /// 586 /// "How to merge timer sets" refers to the set operation by which 587 /// processors should combine their sets of timers in order to 588 /// compute global timer statistics. This corresponds to the 589 /// <tt>setOp</tt> argument of summarize(). 590 /// 591 /// The remaining Boolean parameters are the same as the eponymous 592 /// arguments of summarize(), to whose documentation one should 593 /// refer. There are some wrinkles: in particular, YAML output 594 /// ignores the "alwaysWriteLocal" parameter and assumes 595 /// "writeGlobalStats" is true. 596 static void 597 report (Ptr<const Comm<int> > comm, 598 std::ostream& out, 599 const std::string& filter, 600 const RCP<ParameterList>& params=null); 601 602 /// \brief Report timer statistics to the given output stream. 603 /// 604 /// This is like the 4-argument version of report(), but with a 605 /// default filter. 606 static void 607 report (Ptr<const Comm<int> > comm, 608 std::ostream& out, 609 const RCP<ParameterList>& params=null); 610 611 /// \brief Report timer statistics to the given output stream. 612 /// 613 /// This is like the 4-argument version of report(), but with a 614 /// default communicator. 615 static void 616 report (std::ostream& out, 617 const std::string& filter, 618 const RCP<ParameterList>& params=null); 619 620 /// \brief Report timer statistics to the given output stream. 621 /// 622 /// This is like the 4-argument version of report(), but with a 623 /// default communicator and a default filter. 624 static void 625 report (std::ostream& out, 626 const RCP<ParameterList>& params=null); 627 628 //! Default parameters (with validators) for report(). 629 static RCP<const ParameterList> getValidReportParameters (); 630 631 /// \brief Sets the StackedTimer into which the TimeMonitor will 632 /// insert timings. 633 /// 634 /// \param t [in/out] StackedTimer object. 635 static void setStackedTimer(const Teuchos::RCP<Teuchos::StackedTimer>& t); 636 637 //! The StackedTimer used by the TimeMonitor. 638 static Teuchos::RCP<Teuchos::StackedTimer> getStackedTimer(); 639 640 private: 641 /// \brief Valid output formats for report(). 642 /// 643 /// \warning This is an implementation detail of TimeMonitor. It is 644 /// subject to change at any time without notice. 645 enum ETimeMonitorReportFormat { 646 REPORT_FORMAT_YAML, 647 REPORT_FORMAT_TABLE 648 }; 649 650 /// \brief Valid YAML output formats for report(). 651 /// 652 /// \warning This is an implementation detail of TimeMonitor. It is 653 /// subject to change at any time without notice. 654 enum ETimeMonitorYamlFormat { 655 YAML_FORMAT_COMPACT, 656 YAML_FORMAT_SPACIOUS 657 }; 658 659 /// \brief Like summarize(), but with YAML-format output. 660 /// 661 /// \param comm [in] Communicator over which to compute timer 662 /// statistics. 663 /// \param out [out] Output stream to which to write (on Proc 0 of 664 /// the given communicator only). 665 /// \param yamlStyle [in] Whether to print YAML output in "compact" 666 /// or "spacious" style. 667 /// \param filter [in] Filter for timer labels. If filter is not 668 /// empty, this method will only print timers whose labels begin 669 /// with this string. 670 /// 671 /// \warning This is an experimental interface. It may change or 672 /// disappear without warning. 673 static void 674 summarizeToYaml (Ptr<const Comm<int> > comm, 675 std::ostream& out, 676 const ETimeMonitorYamlFormat yamlStyle, 677 const std::string& filter=""); 678 679 /// \brief Like summarize(), but with YAML-format output and default communicator. 680 /// 681 /// \warning This is an experimental interface. It may change or 682 /// disappear without warning. 683 static void 684 summarizeToYaml (std::ostream& out, 685 const ETimeMonitorYamlFormat yamlStyle, 686 const std::string& filter=""); 687 688 /// \brief Add the "Report format" parameter to plist. 689 /// 690 /// \note Call this in getValidReportParameters() to set a default 691 /// value and validator for this parameter. 692 static void setReportFormatParameter (ParameterList& plist); 693 694 /// \brief Add the "YAML style" parameter to plist. 695 /// 696 /// \note Call this in getValidReportParameters() to set a default 697 /// value and validator for this parameter. 698 static void setYamlFormatParameter (ParameterList& plist); 699 700 /// \brief Add the "How to merge timer sets" parameter to plist. 701 /// 702 /// \note Call this in getValidReportParameters() to set a default 703 /// value and validator for this parameter. 704 static void setSetOpParameter (ParameterList& plist); 705 706 /// \brief Set parameters for report(). Call only from report(). 707 /// 708 /// If this method completes successfully, it sets setParams_ to 709 /// true as a flag. 710 /// 711 /// \param params [in/out] Parameters for report(). This may be 712 /// null, in which case we use defaults or the last set of 713 /// parameters. 714 /// 715 /// \warning This method is not thread safe, in the sense that it 716 /// does not set the class data atomically. Behavior when calling 717 /// this method from multiple threads is undefined. Calling this 718 /// routine with different parameter lists from different threads 719 /// will certainly not accomplish what you want to accomplish. 720 static void setReportParameters (const RCP<ParameterList>& params); 721 722 //! Parameters for the report() class method. 723 //@{ 724 725 /// \brief Current output format for report(). 726 /// 727 /// Set via setReportParameters(). 728 static ETimeMonitorReportFormat reportFormat_; 729 730 /// \brief Current output style for report(), when using YAML output. 731 /// 732 /// Set via setReportParameters(). 733 static ETimeMonitorYamlFormat yamlStyle_; 734 735 /// \brief Whether report() should use the intersection or union of 736 /// timers over (MPI) processes. 737 static ECounterSetOp setOp_; 738 739 /// \brief Whether report() should always report (MPI) Process 0's 740 /// local timer results. 741 static bool alwaysWriteLocal_; 742 743 /// \brief Whether report() should always compute global timer 744 /// statistics. 745 /// 746 /// If true and if using MPI, report() will require MPI 747 /// communication equivalent to O(1) all-reduces. 748 static bool writeGlobalStats_; 749 750 //! Whether report() should report timers with zero call counts. 751 static bool writeZeroTimers_; 752 //@} 753 754 /// \brief Whether setReportParameters() completed successfully. 755 /// 756 /// \note Keeping this helps us avoid keeping the whole 757 /// ParameterList around. 758 static bool setParams_; 759 760 protected: 761 /// \brief Stacked timer for optional injection of timing from 762 /// TimeMonitor-enabled objects. 763 static Teuchos::RCP<Teuchos::StackedTimer> stackedTimer_; 764 }; 765 766 767 /// \class SyncTimeMonitor 768 /// \brief A TimeMonitor that waits at a MPI barrier before destruction. 769 class SyncTimeMonitor : 770 public TimeMonitor { 771 public: 772 773 /** \name Constructor/Destructor */ 774 //@{ 775 776 /// \brief Constructor: starts the timer. 777 /// 778 /// \param timer [in/out] Reference to the timer to be wrapped. 779 /// This constructor starts the timer, and the destructor stops 780 /// the timer. 781 /// 782 /// \param reset [in] If true, reset the timer before starting it. 783 /// Default behavior is not to reset the timer. 784 SyncTimeMonitor(Time& timer, Ptr<const Comm<int> > comm, bool reset=false); 785 786 //! Default constructor is deleted, since it would be unsafe. 787 SyncTimeMonitor () = delete; 788 789 //! Destructor: stops the timer. 790 ~SyncTimeMonitor() override; 791 //@} 792 793 private: 794 // \brief Communicator on which barrier will be called. 795 Ptr<const Comm<int> > comm_; 796 }; 797 798 799 } // namespace Teuchos 800 801 802 namespace Teuchos { 803 804 /// \class TimeMonitorSurrogateImpl 805 /// \brief Implementation of TimeMonitorSurrogate that invokes TimeMonitor. 806 /// \warning Users should not use this class or rely on it in any way. 807 /// It is an implementation detail. 808 /// 809 /// Please refer to the documentation of 810 /// TimeMonitorSurrogateImplInserter and TimeMonitorSurrogate for an 811 /// explanation of the purpose of this class. 812 class TimeMonitorSurrogateImpl : public CommandLineProcessor::TimeMonitorSurrogate 813 { summarize(std::ostream & out)814 virtual void summarize (std::ostream& out) { 815 TimeMonitor::summarize (out); 816 } 817 }; 818 819 /// \class TimeMonitorSurrogateImplInserter 820 /// \brief Injects run-time dependency of a class on TimeMonitor. 821 /// \warning Users should not use this class or rely on it in any way. 822 /// It is an implementation detail. 823 /// 824 /// \section Teuchos_TimeMonitorSurrogateImplInserter_Summary Summary 825 /// 826 /// Classes and functions with the name "TimeMonitorSurrogate" in them 827 /// let CommandLineProcessor optionally call TimeMonitor::summarize(), 828 /// without needing to know that the TimeMonitor class exists. This 829 /// allows Teuchos to put CommandLineProcessor in a separate package 830 /// from TimeMonitor. We want to do this because TimeMonitor depends 831 /// on Comm, and is therefore in the TeuchosComm subpackage (which 832 /// depends on TeuchosCore), but CommandLineProcessor is in a 833 /// different subpackage which does not depend on Comm. 834 /// 835 /// The TimeMonitorSurrogateImplInserter class' constructor ensures 836 /// that CommandLineProcessor gets informed about TimeMonitor even 837 /// before the program starts executing main(). This happens 838 /// automatically, without changes to main(), because we declare an 839 /// instance of this class in the header file. If the TeuchosComm 840 /// subpackage was built and its libraries were linked in, 841 /// CommandLineProcessor will know about TimeMonitor. 842 /// 843 /// \section Teuchos_TimeMonitorSurrogateImplInserter_Note Note to Teuchos developers 844 /// 845 /// This is an instance of the 846 /// <a href="http://en.wikipedia.org/wiki/Dependency_injection">Dependency injection</a> 847 /// design pattern. CommandLineProcessor is not supposed to know 848 /// about TimeMonitor, because CommandLineProcessor's subpackage does 849 /// not depend on TimeMonitor's subpackage. Thus, 850 /// CommandLineProcessor interacts with TimeMonitor through the 851 /// TimeMonitorSurrogate interface. TimeMonitorSurrogateImplInserter 852 /// "injects" the dependency at run time, if the TeuchosComm 853 /// subpackage was enabled and the application linked with its 854 /// libraries. 855 /// 856 /// Teuchos developers could imitate the pattern of this class in 857 /// order to use TimeMonitor's class methods (such as summarize()) 858 /// from any other class that does not depend on the TeuchosComm 859 /// subpackage. 860 class TimeMonitorSurrogateImplInserter { 861 public: 862 //! Constructor: inject dependency on TimeMonitor into CommandLineProcessor. TimeMonitorSurrogateImplInserter()863 TimeMonitorSurrogateImplInserter () { 864 if (is_null (CommandLineProcessor::getTimeMonitorSurrogate ())) { 865 CommandLineProcessor::setTimeMonitorSurrogate (Teuchos::rcp (new TimeMonitorSurrogateImpl)); 866 } 867 } 868 }; 869 870 } // end namespace Teuchos 871 872 873 namespace { 874 875 // Inject the implementation in every translation unit. 876 Teuchos::TimeMonitorSurrogateImplInserter timeMonitorSurrogateImplInserter; 877 878 } // namespace (anonymous) 879 880 #endif // TEUCHOS_TIMEMONITOR_H 881