1 /* Copyright (C) 2010 Wildfire Games. 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining 4 * a copy of this software and associated documentation files (the 5 * "Software"), to deal in the Software without restriction, including 6 * without limitation the rights to use, copy, modify, merge, publish, 7 * distribute, sublicense, and/or sell copies of the Software, and to 8 * permit persons to whom the Software is furnished to do so, subject to 9 * the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 /* 24 * platform-independent high resolution timer 25 */ 26 27 #ifndef INCLUDED_TIMER 28 #define INCLUDED_TIMER 29 30 #include "lib/config2.h" // CONFIG2_TIMER_ALLOW_RDTSC 31 #include "lib/sysdep/cpu.h" // cpu_AtomicAdd 32 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC 33 # include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency 34 # include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::rdtsc 35 #endif 36 37 #include "lib/utf8.h" 38 39 /** 40 * timer_Time will subsequently return values relative to the current time. 41 **/ 42 LIB_API void timer_LatchStartTime(); 43 44 /** 45 * @return high resolution (> 1 us) timestamp [s]. 46 **/ 47 LIB_API double timer_Time(); 48 49 /** 50 * @return resolution [s] of the timer. 51 **/ 52 LIB_API double timer_Resolution(); 53 54 55 // (allow using XADD (faster than CMPXCHG) in 64-bit builds without casting) 56 #if ARCH_AMD64 57 typedef intptr_t Cycles; 58 #else 59 typedef i64 Cycles; 60 #endif 61 62 /** 63 * internal helper functions for returning an easily readable 64 * string (i.e. re-scaled to appropriate units) 65 **/ 66 LIB_API std::string StringForSeconds(double seconds); 67 LIB_API std::string StringForCycles(Cycles cycles); 68 69 70 //----------------------------------------------------------------------------- 71 // scope timing 72 73 /// used by TIMER 74 class ScopeTimer 75 { 76 NONCOPYABLE(ScopeTimer); 77 public: ScopeTimer(const wchar_t * description)78 ScopeTimer(const wchar_t* description) 79 : m_t0(timer_Time()), m_description(description) 80 { 81 } 82 ~ScopeTimer()83 ~ScopeTimer() 84 { 85 const double t1 = timer_Time(); 86 const std::string elapsedTimeString = StringForSeconds(t1-m_t0); 87 debug_printf("TIMER| %s: %s\n", utf8_from_wstring(m_description).c_str(), elapsedTimeString.c_str()); 88 } 89 90 private: 91 double m_t0; 92 const wchar_t* m_description; 93 }; 94 95 /** 96 * Measures the time taken to execute code up until end of the current scope; 97 * displays it via debug_printf. Can safely be nested. 98 * Useful for measuring time spent in a function or basic block. 99 * <description> must remain valid over the lifetime of this object; 100 * a string literal is safest. 101 * 102 * Example usage: 103 * void func() 104 * { 105 * TIMER(L"description"); 106 * // code to be measured 107 * } 108 **/ 109 #define TIMER(description) ScopeTimer UID__(description) 110 111 /** 112 * Measures the time taken to execute code between BEGIN and END markers; 113 * displays it via debug_printf. Can safely be nested. 114 * Useful for measuring several pieces of code within the same function/block. 115 * <description> must remain valid over the lifetime of this object; 116 * a string literal is safest. 117 * 118 * Caveats: 119 * - this wraps the code to be measured in a basic block, so any 120 * variables defined there are invisible to surrounding code. 121 * - the description passed to END isn't inspected; you are responsible for 122 * ensuring correct nesting! 123 * 124 * Example usage: 125 * void func2() 126 * { 127 * // uninteresting code 128 * TIMER_BEGIN(L"description2"); 129 * // code to be measured 130 * TIMER_END(L"description2"); 131 * // uninteresting code 132 * } 133 **/ 134 #define TIMER_BEGIN(description) { ScopeTimer UID__(description) 135 #define TIMER_END(description) } 136 137 138 //----------------------------------------------------------------------------- 139 // cumulative timer API 140 141 // this supplements in-game profiling by providing low-overhead, 142 // high resolution time accounting of specific areas. 143 144 // since TIMER_ACCRUE et al. are called so often, we try to keep 145 // overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles 146 // returned by x86_x64::rdtsc) instead of absolute time has two benefits: 147 // - no need to convert from raw->time on every call 148 // (instead, it's only done once when displaying the totals) 149 // - possibly less overhead to querying the time itself 150 // (timer_Time may be using slower time sources with ~3us overhead) 151 // 152 // however, the cycle count is not necessarily a measure of wall-clock time 153 // (see http://www.gamedev.net/reference/programming/features/timing). 154 // therefore, on systems with SpeedStep active, measurements of I/O or other 155 // non-CPU bound activity may be skewed. this is ok because the timer is 156 // only used for profiling; just be aware of the issue. 157 // if this is a problem, disable CONFIG2_TIMER_ALLOW_RDTSC. 158 // 159 // note that overflow isn't an issue either way (63 bit cycle counts 160 // at 10 GHz cover intervals of 29 years). 161 162 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC 163 164 class TimerUnit 165 { 166 public: SetToZero()167 void SetToZero() 168 { 169 m_cycles = 0; 170 } 171 SetFromTimer()172 void SetFromTimer() 173 { 174 m_cycles = x86_x64::rdtsc(); 175 } 176 AddDifference(TimerUnit t0,TimerUnit t1)177 void AddDifference(TimerUnit t0, TimerUnit t1) 178 { 179 m_cycles += t1.m_cycles - t0.m_cycles; 180 } 181 AddDifferenceAtomic(TimerUnit t0,TimerUnit t1)182 void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1) 183 { 184 const Cycles delta = t1.m_cycles - t0.m_cycles; 185 #if ARCH_AMD64 186 cpu_AtomicAdd(&m_cycles, delta); 187 #elif ARCH_IA32 188 retry: 189 if(!cpu_CAS64(&m_cycles, m_cycles, m_cycles+delta)) 190 goto retry; 191 #else 192 # error "port" 193 #endif 194 } 195 Subtract(TimerUnit t)196 void Subtract(TimerUnit t) 197 { 198 m_cycles -= t.m_cycles; 199 } 200 ToString()201 std::string ToString() const 202 { 203 ENSURE(m_cycles >= 0); 204 return StringForCycles(m_cycles); 205 } 206 ToSeconds()207 double ToSeconds() const 208 { 209 return (double)m_cycles / os_cpu_ClockFrequency(); 210 } 211 212 private: 213 Cycles m_cycles; 214 }; 215 216 #else 217 218 class TimerUnit 219 { 220 public: SetToZero()221 void SetToZero() 222 { 223 m_seconds = 0.0; 224 } 225 SetFromTimer()226 void SetFromTimer() 227 { 228 m_seconds = timer_Time(); 229 } 230 AddDifference(TimerUnit t0,TimerUnit t1)231 void AddDifference(TimerUnit t0, TimerUnit t1) 232 { 233 m_seconds += t1.m_seconds - t0.m_seconds; 234 } 235 AddDifferenceAtomic(TimerUnit t0,TimerUnit t1)236 void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1) 237 { 238 retry: 239 i64 oldRepresentation; 240 memcpy(&oldRepresentation, &m_seconds, sizeof(oldRepresentation)); 241 242 const double seconds = m_seconds + t1.m_seconds - t0.m_seconds; 243 i64 newRepresentation; 244 memcpy(&newRepresentation, &seconds, sizeof(newRepresentation)); 245 246 if(!cpu_CAS64((volatile i64*)&m_seconds, oldRepresentation, newRepresentation)) 247 goto retry; 248 } 249 Subtract(TimerUnit t)250 void Subtract(TimerUnit t) 251 { 252 m_seconds -= t.m_seconds; 253 } 254 ToString()255 std::string ToString() const 256 { 257 ENSURE(m_seconds >= 0.0); 258 return StringForSeconds(m_seconds); 259 } 260 ToSeconds()261 double ToSeconds() const 262 { 263 return m_seconds; 264 } 265 266 private: 267 double m_seconds; 268 }; 269 270 #endif 271 272 // opaque - do not access its fields! 273 // note: must be defined here because clients instantiate them; 274 // fields cannot be made private due to POD requirement. 275 struct TimerClient 276 { 277 TimerUnit sum; // total bill 278 279 // only store a pointer for efficiency. 280 const wchar_t* description; 281 282 TimerClient* next; 283 284 // how often the timer was billed (helps measure relative 285 // performance of something that is done indeterminately often). 286 intptr_t num_calls; 287 }; 288 289 /** 290 * make the given TimerClient (usually instantiated as static data) 291 * ready for use. returns its address for TIMER_ADD_CLIENT's convenience. 292 * this client's total (which is increased by a BillingPolicy) will be 293 * displayed by timer_DisplayClientTotals. 294 * notes: 295 * - may be called at any time; 296 * - always succeeds (there's no fixed limit); 297 * - free() is not needed nor possible. 298 * - description must remain valid until exit; a string literal is safest. 299 **/ 300 LIB_API TimerClient* timer_AddClient(TimerClient* tc, const wchar_t* description); 301 302 /** 303 * "allocate" a new TimerClient that will keep track of the total time 304 * billed to it, along with a description string. These are displayed when 305 * timer_DisplayClientTotals is called. 306 * Invoke this at file or function scope; a (static) TimerClient pointer of 307 * name \<id\> will be defined, which should be passed to TIMER_ACCRUE. 308 **/ 309 #define TIMER_ADD_CLIENT(id)\ 310 static TimerClient UID__;\ 311 static TimerClient* id = timer_AddClient(&UID__, WIDEN(#id)) 312 313 /** 314 * bill the difference between t0 and t1 to the client's total. 315 **/ 316 struct BillingPolicy_Default 317 { operatorBillingPolicy_Default318 void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const 319 { 320 tc->sum.AddDifference(t0, t1); 321 tc->num_calls++; 322 } 323 }; 324 325 /** 326 * thread-safe (not used by default due to its higher overhead) 327 * note: we can't just use thread-local variables to avoid 328 * synchronization overhead because we don't have control over all 329 * threads (for accumulating their separate timer copies). 330 **/ 331 struct BillingPolicy_Atomic 332 { operatorBillingPolicy_Atomic333 void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const 334 { 335 tc->sum.AddDifferenceAtomic(t0, t1); 336 cpu_AtomicAdd(&tc->num_calls, +1); 337 } 338 }; 339 340 /** 341 * display all clients' totals; does not reset them. 342 * typically called at exit. 343 **/ 344 LIB_API void timer_DisplayClientTotals(); 345 346 347 /// used by TIMER_ACCRUE 348 template<class BillingPolicy = BillingPolicy_Default> 349 class ScopeTimerAccrue 350 { 351 NONCOPYABLE(ScopeTimerAccrue); 352 public: ScopeTimerAccrue(TimerClient * tc)353 ScopeTimerAccrue(TimerClient* tc) 354 : m_tc(tc) 355 { 356 m_t0.SetFromTimer(); 357 } 358 ~ScopeTimerAccrue()359 ~ScopeTimerAccrue() 360 { 361 TimerUnit t1; 362 t1.SetFromTimer(); 363 BillingPolicy()(m_tc, m_t0, t1); 364 } 365 366 private: 367 TimerUnit m_t0; 368 TimerClient* m_tc; 369 }; 370 371 /** 372 * Measure the time taken to execute code up until end of the current scope; 373 * bill it to the given TimerClient object. Can safely be nested. 374 * Useful for measuring total time spent in a function or basic block over the 375 * entire program. 376 * `client' is an identifier registered via TIMER_ADD_CLIENT. 377 * 378 * Example usage: 379 * TIMER_ADD_CLIENT(client); 380 * 381 * void func() 382 * { 383 * TIMER_ACCRUE(client); 384 * // code to be measured 385 * } 386 * 387 * [later or at exit] 388 * timer_DisplayClientTotals(); 389 **/ 390 #define TIMER_ACCRUE(client) ScopeTimerAccrue<> UID__(client) 391 #define TIMER_ACCRUE_ATOMIC(client) ScopeTimerAccrue<BillingPolicy_Atomic> UID__(client) 392 393 #endif // #ifndef INCLUDED_TIMER 394