1 /* 2 * This file is part of the GROMACS molecular simulation package. 3 * 4 * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by 5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, 6 * and including many others, as listed in the AUTHORS file in the 7 * top-level source directory and at http://www.gromacs.org. 8 * 9 * GROMACS is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public License 11 * as published by the Free Software Foundation; either version 2.1 12 * of the License, or (at your option) any later version. 13 * 14 * GROMACS is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with GROMACS; if not, see 21 * http://www.gnu.org/licenses, or write to the Free Software Foundation, 22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 23 * 24 * If you want to redistribute modifications to GROMACS, please 25 * consider that scientific software is very special. Version 26 * control is crucial - bugs must be traceable. We will be happy to 27 * consider code for inclusion in the official distribution, but 28 * derived work must not be called official GROMACS. Details are found 29 * in the README & COPYING files - if they are missing, get the 30 * official version at http://www.gromacs.org. 31 * 32 * To help us fund GROMACS development, we humbly ask that you cite 33 * the research papers on the package. Check out http://www.gromacs.org. 34 */ 35 36 /*! \libinternal \file 37 * \brief Implements the GPU region timer for OpenCL. 38 * 39 * \author Aleksei Iupinov <a.yupinov@gmail.com> 40 * 41 * \inlibraryapi 42 */ 43 44 #ifndef GMX_GPU_UTILS_GPUREGIONTIMER_OCL_H 45 #define GMX_GPU_UTILS_GPUREGIONTIMER_OCL_H 46 47 #include <array> 48 49 #include "gromacs/gpu_utils/gputraits_ocl.h" 50 #include "gromacs/gpu_utils/oclutils.h" 51 #include "gromacs/utility/stringutil.h" 52 53 #include "gpuregiontimer.h" 54 55 /*! \libinternal \brief 56 * The OpenCL implementation of the GPU code region timing. 57 * With OpenCL, one has to use cl_event handle for each API call that has to be timed, and 58 * accumulate the timing afterwards. As we would like to avoid overhead on API calls, 59 * we only query and accumulate cl_event timing at the end of time steps, not after the API calls. 60 * Thus, this implementation does not reuse a single cl_event for multiple calls, but instead 61 * maintains an array of cl_events to be used within any single code region. 62 * The array size is fixed at a small but sufficiently large value for the number of cl_events 63 * that might contribute to a timer region, currently 10. 64 */ 65 class GpuRegionTimerImpl 66 { 67 /*! \brief The underlying individual timing events array. 68 * The maximum size is chosen arbitrarily to work with current code, and can be changed. 69 * There is simply no need for run-time resizing, and it's unlikely we'll ever need more than 10. 70 */ 71 std::array<cl_event, 10> events_ = { { nullptr } }; 72 //! Index of the active event 73 size_t currentEvent_ = 0; 74 75 public: 76 GpuRegionTimerImpl() = default; 77 ~GpuRegionTimerImpl() = default; 78 //! No copying 79 GpuRegionTimerImpl(const GpuRegionTimerImpl&) = delete; 80 //! No assignment 81 GpuRegionTimerImpl& operator=(GpuRegionTimerImpl&&) = delete; 82 //! Moving is disabled but can be considered in the future if needed 83 GpuRegionTimerImpl(GpuRegionTimerImpl&&) = delete; 84 85 /*! \brief Should be called before the region start. */ openTimingRegion(const DeviceStream &)86 inline void openTimingRegion(const DeviceStream& /*unused*/) {} 87 /*! \brief Should be called after the region end. */ closeTimingRegion(const DeviceStream &)88 inline void closeTimingRegion(const DeviceStream& /*unused*/) {} 89 /*! \brief Returns the last measured region timespan (in milliseconds) and calls reset(). */ getLastRangeTime()90 inline double getLastRangeTime() 91 { 92 double milliseconds = 0.0; 93 for (size_t i = 0; i < currentEvent_; i++) 94 { 95 if (events_[i]) // This conditional is ugly, but is required to make some tests (e.g. empty domain) pass 96 { 97 cl_ulong start_ns, end_ns; 98 cl_int gmx_unused cl_error; 99 100 cl_error = clGetEventProfilingInfo(events_[i], CL_PROFILING_COMMAND_START, 101 sizeof(cl_ulong), &start_ns, nullptr); 102 GMX_ASSERT(CL_SUCCESS == cl_error, 103 gmx::formatString("GPU timing update failure (OpenCL error %d: %s).", 104 cl_error, ocl_get_error_string(cl_error).c_str()) 105 .c_str()); 106 cl_error = clGetEventProfilingInfo(events_[i], CL_PROFILING_COMMAND_END, 107 sizeof(cl_ulong), &end_ns, nullptr); 108 GMX_ASSERT(CL_SUCCESS == cl_error, 109 gmx::formatString("GPU timing update failure (OpenCL error %d: %s).", 110 cl_error, ocl_get_error_string(cl_error).c_str()) 111 .c_str()); 112 milliseconds += (end_ns - start_ns) / 1000000.0; 113 } 114 } 115 reset(); 116 return milliseconds; 117 } 118 /*! \brief Resets the internal state, releasing the used cl_events. */ reset()119 inline void reset() 120 { 121 for (size_t i = 0; i < currentEvent_; i++) 122 { 123 if (events_[i]) // This conditional is ugly, but is required to make some tests (e.g. empty domain) pass 124 { 125 cl_int gmx_unused cl_error = clReleaseEvent(events_[i]); 126 GMX_ASSERT(CL_SUCCESS == cl_error, "OpenCL event release failure"); 127 } 128 } 129 currentEvent_ = 0; 130 // As long as we're doing nullptr checks, we might want to be extra cautious. 131 events_.fill(nullptr); 132 } 133 /*! \brief Returns a new raw timing event 134 * for passing into individual GPU API calls 135 * within the region if the API requires it (e.g. on OpenCL). 136 */ fetchNextEvent()137 inline CommandEvent* fetchNextEvent() 138 { 139 GMX_ASSERT(currentEvent_ < events_.size(), "Increase c_maxEventNumber_ if needed"); 140 cl_event* result = &events_[currentEvent_]; 141 currentEvent_++; 142 return result; 143 } 144 }; 145 146 //! Short-hand for external use 147 using GpuRegionTimer = GpuRegionTimerWrapper<GpuRegionTimerImpl>; 148 149 #endif 150