1 /* Copyright (C) 2010 Wildfire Games.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24  * platform-independent high resolution timer
25  */
26 
27 #ifndef INCLUDED_TIMER
28 #define INCLUDED_TIMER
29 
30 #include "lib/config2.h"	// CONFIG2_TIMER_ALLOW_RDTSC
31 #include "lib/sysdep/cpu.h"	// cpu_AtomicAdd
32 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
33 # include "lib/sysdep/os_cpu.h"	// os_cpu_ClockFrequency
34 # include "lib/sysdep/arch/x86_x64/x86_x64.h"	// x86_x64::rdtsc
35 #endif
36 
37 #include "lib/utf8.h"
38 
39 /**
40  * timer_Time will subsequently return values relative to the current time.
41  **/
42 LIB_API void timer_LatchStartTime();
43 
44 /**
45  * @return high resolution (> 1 us) timestamp [s].
46  **/
47 LIB_API double timer_Time();
48 
49 /**
50  * @return resolution [s] of the timer.
51  **/
52 LIB_API double timer_Resolution();
53 
54 
55 // (allow using XADD (faster than CMPXCHG) in 64-bit builds without casting)
56 #if ARCH_AMD64
57 typedef intptr_t Cycles;
58 #else
59 typedef i64 Cycles;
60 #endif
61 
62 /**
63  * internal helper functions for returning an easily readable
64  * string (i.e. re-scaled to appropriate units)
65  **/
66 LIB_API std::string StringForSeconds(double seconds);
67 LIB_API std::string StringForCycles(Cycles cycles);
68 
69 
70 //-----------------------------------------------------------------------------
71 // scope timing
72 
73 /// used by TIMER
74 class ScopeTimer
75 {
76 	NONCOPYABLE(ScopeTimer);
77 public:
ScopeTimer(const wchar_t * description)78 	ScopeTimer(const wchar_t* description)
79 		: m_t0(timer_Time()), m_description(description)
80 	{
81 	}
82 
~ScopeTimer()83 	~ScopeTimer()
84 	{
85 		const double t1 = timer_Time();
86 		const std::string elapsedTimeString = StringForSeconds(t1-m_t0);
87 		debug_printf("TIMER| %s: %s\n", utf8_from_wstring(m_description).c_str(), elapsedTimeString.c_str());
88 	}
89 
90 private:
91 	double m_t0;
92 	const wchar_t* m_description;
93 };
94 
95 /**
96  * Measures the time taken to execute code up until end of the current scope;
97  * displays it via debug_printf. Can safely be nested.
98  * Useful for measuring time spent in a function or basic block.
99  * <description> must remain valid over the lifetime of this object;
100  * a string literal is safest.
101  *
102  * Example usage:
103  * 	void func()
104  * 	{
105  * 		TIMER(L"description");
106  * 		// code to be measured
107  * 	}
108  **/
109 #define TIMER(description) ScopeTimer UID__(description)
110 
111 /**
112  * Measures the time taken to execute code between BEGIN and END markers;
113  * displays it via debug_printf. Can safely be nested.
114  * Useful for measuring several pieces of code within the same function/block.
115  * <description> must remain valid over the lifetime of this object;
116  * a string literal is safest.
117  *
118  * Caveats:
119  * - this wraps the code to be measured in a basic block, so any
120  *   variables defined there are invisible to surrounding code.
121  * - the description passed to END isn't inspected; you are responsible for
122  *   ensuring correct nesting!
123  *
124  * Example usage:
125  * 	void func2()
126  * 	{
127  * 		// uninteresting code
128  * 		TIMER_BEGIN(L"description2");
129  * 		// code to be measured
130  * 		TIMER_END(L"description2");
131  * 		// uninteresting code
132  * 	}
133  **/
134 #define TIMER_BEGIN(description) { ScopeTimer UID__(description)
135 #define TIMER_END(description) }
136 
137 
138 //-----------------------------------------------------------------------------
139 // cumulative timer API
140 
141 // this supplements in-game profiling by providing low-overhead,
142 // high resolution time accounting of specific areas.
143 
144 // since TIMER_ACCRUE et al. are called so often, we try to keep
145 // overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles
146 // returned by x86_x64::rdtsc) instead of absolute time has two benefits:
147 // - no need to convert from raw->time on every call
148 //   (instead, it's only done once when displaying the totals)
149 // - possibly less overhead to querying the time itself
150 //   (timer_Time may be using slower time sources with ~3us overhead)
151 //
152 // however, the cycle count is not necessarily a measure of wall-clock time
153 // (see http://www.gamedev.net/reference/programming/features/timing).
154 // therefore, on systems with SpeedStep active, measurements of I/O or other
155 // non-CPU bound activity may be skewed. this is ok because the timer is
156 // only used for profiling; just be aware of the issue.
157 // if this is a problem, disable CONFIG2_TIMER_ALLOW_RDTSC.
158 //
159 // note that overflow isn't an issue either way (63 bit cycle counts
160 // at 10 GHz cover intervals of 29 years).
161 
162 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
163 
164 class TimerUnit
165 {
166 public:
SetToZero()167 	void SetToZero()
168 	{
169 		m_cycles = 0;
170 	}
171 
SetFromTimer()172 	void SetFromTimer()
173 	{
174 		m_cycles = x86_x64::rdtsc();
175 	}
176 
AddDifference(TimerUnit t0,TimerUnit t1)177 	void AddDifference(TimerUnit t0, TimerUnit t1)
178 	{
179 		m_cycles += t1.m_cycles - t0.m_cycles;
180 	}
181 
AddDifferenceAtomic(TimerUnit t0,TimerUnit t1)182 	void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
183 	{
184 		const Cycles delta = t1.m_cycles - t0.m_cycles;
185 #if ARCH_AMD64
186 		cpu_AtomicAdd(&m_cycles, delta);
187 #elif ARCH_IA32
188 retry:
189 		if(!cpu_CAS64(&m_cycles, m_cycles, m_cycles+delta))
190 			goto retry;
191 #else
192 # error "port"
193 #endif
194 	}
195 
Subtract(TimerUnit t)196 	void Subtract(TimerUnit t)
197 	{
198 		m_cycles -= t.m_cycles;
199 	}
200 
ToString()201 	std::string ToString() const
202 	{
203 		ENSURE(m_cycles >= 0);
204 		return StringForCycles(m_cycles);
205 	}
206 
ToSeconds()207 	double ToSeconds() const
208 	{
209 		return (double)m_cycles / os_cpu_ClockFrequency();
210 	}
211 
212 private:
213 	Cycles m_cycles;
214 };
215 
216 #else
217 
218 class TimerUnit
219 {
220 public:
SetToZero()221 	void SetToZero()
222 	{
223 		m_seconds = 0.0;
224 	}
225 
SetFromTimer()226 	void SetFromTimer()
227 	{
228 		m_seconds = timer_Time();
229 	}
230 
AddDifference(TimerUnit t0,TimerUnit t1)231 	void AddDifference(TimerUnit t0, TimerUnit t1)
232 	{
233 		m_seconds += t1.m_seconds - t0.m_seconds;
234 	}
235 
AddDifferenceAtomic(TimerUnit t0,TimerUnit t1)236 	void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
237 	{
238 retry:
239 		i64 oldRepresentation;
240 		memcpy(&oldRepresentation, &m_seconds, sizeof(oldRepresentation));
241 
242 		const double seconds = m_seconds + t1.m_seconds - t0.m_seconds;
243 		i64 newRepresentation;
244 		memcpy(&newRepresentation, &seconds, sizeof(newRepresentation));
245 
246 		if(!cpu_CAS64((volatile i64*)&m_seconds, oldRepresentation, newRepresentation))
247 			goto retry;
248 	}
249 
Subtract(TimerUnit t)250 	void Subtract(TimerUnit t)
251 	{
252 		m_seconds -= t.m_seconds;
253 	}
254 
ToString()255 	std::string ToString() const
256 	{
257 		ENSURE(m_seconds >= 0.0);
258 		return StringForSeconds(m_seconds);
259 	}
260 
ToSeconds()261 	double ToSeconds() const
262 	{
263 		return m_seconds;
264 	}
265 
266 private:
267 	double m_seconds;
268 };
269 
270 #endif
271 
272 // opaque - do not access its fields!
273 // note: must be defined here because clients instantiate them;
274 // fields cannot be made private due to POD requirement.
275 struct TimerClient
276 {
277 	TimerUnit sum;	// total bill
278 
279 	// only store a pointer for efficiency.
280 	const wchar_t* description;
281 
282 	TimerClient* next;
283 
284 	// how often the timer was billed (helps measure relative
285 	// performance of something that is done indeterminately often).
286 	intptr_t num_calls;
287 };
288 
289 /**
290  * make the given TimerClient (usually instantiated as static data)
291  * ready for use. returns its address for TIMER_ADD_CLIENT's convenience.
292  * this client's total (which is increased by a BillingPolicy) will be
293  * displayed by timer_DisplayClientTotals.
294  * notes:
295  * - may be called at any time;
296  * - always succeeds (there's no fixed limit);
297  * - free() is not needed nor possible.
298  * - description must remain valid until exit; a string literal is safest.
299  **/
300 LIB_API TimerClient* timer_AddClient(TimerClient* tc, const wchar_t* description);
301 
302 /**
303  * "allocate" a new TimerClient that will keep track of the total time
304  * billed to it, along with a description string. These are displayed when
305  * timer_DisplayClientTotals is called.
306  * Invoke this at file or function scope; a (static) TimerClient pointer of
307  * name \<id\> will be defined, which should be passed to TIMER_ACCRUE.
308  **/
309 #define TIMER_ADD_CLIENT(id)\
310 	static TimerClient UID__;\
311 	static TimerClient* id = timer_AddClient(&UID__, WIDEN(#id))
312 
313 /**
314  * bill the difference between t0 and t1 to the client's total.
315  **/
316 struct BillingPolicy_Default
317 {
operatorBillingPolicy_Default318 	void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
319 	{
320 		tc->sum.AddDifference(t0, t1);
321 		tc->num_calls++;
322 	}
323 };
324 
325 /**
326  * thread-safe (not used by default due to its higher overhead)
327  * note: we can't just use thread-local variables to avoid
328  * synchronization overhead because we don't have control over all
329  * threads (for accumulating their separate timer copies).
330  **/
331 struct BillingPolicy_Atomic
332 {
operatorBillingPolicy_Atomic333 	void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
334 	{
335 		tc->sum.AddDifferenceAtomic(t0, t1);
336 		cpu_AtomicAdd(&tc->num_calls, +1);
337 	}
338 };
339 
340 /**
341  * display all clients' totals; does not reset them.
342  * typically called at exit.
343  **/
344 LIB_API void timer_DisplayClientTotals();
345 
346 
347 /// used by TIMER_ACCRUE
348 template<class BillingPolicy = BillingPolicy_Default>
349 class ScopeTimerAccrue
350 {
351 	NONCOPYABLE(ScopeTimerAccrue);
352 public:
ScopeTimerAccrue(TimerClient * tc)353 	ScopeTimerAccrue(TimerClient* tc)
354 		: m_tc(tc)
355 	{
356 		m_t0.SetFromTimer();
357 	}
358 
~ScopeTimerAccrue()359 	~ScopeTimerAccrue()
360 	{
361 		TimerUnit t1;
362 		t1.SetFromTimer();
363 		BillingPolicy()(m_tc, m_t0, t1);
364 	}
365 
366 private:
367 	TimerUnit m_t0;
368 	TimerClient* m_tc;
369 };
370 
371 /**
372  * Measure the time taken to execute code up until end of the current scope;
373  * bill it to the given TimerClient object. Can safely be nested.
374  * Useful for measuring total time spent in a function or basic block over the
375  * entire program.
376  * `client' is an identifier registered via TIMER_ADD_CLIENT.
377  *
378  * Example usage:
379  * 	TIMER_ADD_CLIENT(client);
380  *
381  * 	void func()
382  * 	{
383  * 		TIMER_ACCRUE(client);
384  * 		// code to be measured
385  * 	}
386  *
387  * 	[later or at exit]
388  * 	timer_DisplayClientTotals();
389  **/
390 #define TIMER_ACCRUE(client) ScopeTimerAccrue<> UID__(client)
391 #define TIMER_ACCRUE_ATOMIC(client) ScopeTimerAccrue<BillingPolicy_Atomic> UID__(client)
392 
393 #endif	// #ifndef INCLUDED_TIMER
394