1 /*
2 
3 ***************************************************************************************************
4 **
5 ** profile.cpp
6 **
7 ** Real-Time Hierarchical Profiling for Game Programming Gems 3
8 **
9 ** by Greg Hjelstrom & Byon Garrabrant
10 **
11 ***************************************************************************************************/
12 
13 // Credits: The Clock class was inspired by the Timer classes in
14 // Ogre (www.ogre3d.org).
15 
16 #include "btQuickprof.h"
17 
18 #ifndef BT_NO_PROFILE
19 
20 
21 static btClock gProfileClock;
22 
23 
24 #ifdef __CELLOS_LV2__
25 #include <sys/sys_time.h>
26 #include <sys/time_util.h>
27 #include <stdio.h>
28 #endif
29 
30 #if defined (SUNOS) || defined (__SUNOS__)
31 #include <stdio.h>
32 #endif
33 
34 #if defined(WIN32) || defined(_WIN32)
35 
36 #define BT_USE_WINDOWS_TIMERS
37 #define WIN32_LEAN_AND_MEAN
38 #define NOWINRES
39 #define NOMCX
40 #define NOIME
41 
42 #ifdef _XBOX
43 	#include <Xtl.h>
44 #else //_XBOX
45 	#include <windows.h>
46 
47 #if WINVER < 0x0600
GetTickCount64()48 ULONGLONG GetTickCount64() { return GetTickCount(); }
49 #endif
50 
51 #endif //_XBOX
52 
53 #include <time.h>
54 
55 
56 #else //_WIN32
57 #include <sys/time.h>
58 #endif //_WIN32
59 
60 #define mymin(a,b) (a > b ? a : b)
61 
62 struct btClockData
63 {
64 
65 #ifdef BT_USE_WINDOWS_TIMERS
66 	LARGE_INTEGER mClockFrequency;
67 	LONGLONG mStartTick;
68 	LONGLONG mPrevElapsedTime;
69 	LARGE_INTEGER mStartTime;
70 #else
71 #ifdef __CELLOS_LV2__
72 	uint64_t	mStartTime;
73 #else
74 	struct timeval mStartTime;
75 #endif
76 #endif //__CELLOS_LV2__
77 
78 };
79 
80 ///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling.
btClock()81 btClock::btClock()
82 {
83 	m_data = new btClockData;
84 #ifdef BT_USE_WINDOWS_TIMERS
85 	QueryPerformanceFrequency(&m_data->mClockFrequency);
86 #endif
87 	reset();
88 }
89 
~btClock()90 btClock::~btClock()
91 {
92 	delete m_data;
93 }
94 
btClock(const btClock & other)95 btClock::btClock(const btClock& other)
96 {
97 	m_data = new btClockData;
98 	*m_data = *other.m_data;
99 }
100 
operator =(const btClock & other)101 btClock& btClock::operator=(const btClock& other)
102 {
103 	*m_data = *other.m_data;
104 	return *this;
105 }
106 
107 
108 	/// Resets the initial reference time.
reset()109 void btClock::reset()
110 {
111 #ifdef BT_USE_WINDOWS_TIMERS
112 	QueryPerformanceCounter(&m_data->mStartTime);
113 	m_data->mStartTick = GetTickCount64();
114 	m_data->mPrevElapsedTime = 0;
115 #else
116 #ifdef __CELLOS_LV2__
117 
118 	typedef uint64_t  ClockSize;
119 	ClockSize newTime;
120 	//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
121 	SYS_TIMEBASE_GET( newTime );
122 	m_data->mStartTime = newTime;
123 #else
124 	gettimeofday(&m_data->mStartTime, 0);
125 #endif
126 #endif
127 }
128 
129 /// Returns the time in ms since the last call to reset or since
130 /// the btClock was created.
getTimeMilliseconds()131 unsigned long int btClock::getTimeMilliseconds()
132 {
133 #ifdef BT_USE_WINDOWS_TIMERS
134 	LARGE_INTEGER currentTime;
135 	QueryPerformanceCounter(&currentTime);
136 	LONGLONG elapsedTime = currentTime.QuadPart -
137 		m_data->mStartTime.QuadPart;
138 		// Compute the number of millisecond ticks elapsed.
139 	unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
140 		m_data->mClockFrequency.QuadPart);
141 		// Check for unexpected leaps in the Win32 performance counter.
142 		// (This is caused by unexpected data across the PCI to ISA
143 		// bridge, aka south bridge.  See Microsoft KB274323.)
144 		unsigned long elapsedTicks = (unsigned long)(GetTickCount64() - m_data->mStartTick);
145 		signed long msecOff = (signed long)(msecTicks - elapsedTicks);
146 		if (msecOff < -100 || msecOff > 100)
147 		{
148 			// Adjust the starting time forwards.
149 			LONGLONG msecAdjustment = mymin(msecOff *
150 				m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
151 				m_data->mPrevElapsedTime);
152 			m_data->mStartTime.QuadPart += msecAdjustment;
153 			elapsedTime -= msecAdjustment;
154 
155 			// Recompute the number of millisecond ticks elapsed.
156 			msecTicks = (unsigned long)(1000 * elapsedTime /
157 				m_data->mClockFrequency.QuadPart);
158 		}
159 
160 		// Store the current elapsed time for adjustments next time.
161 		m_data->mPrevElapsedTime = elapsedTime;
162 
163 		return msecTicks;
164 #else
165 
166 #ifdef __CELLOS_LV2__
167 		uint64_t freq=sys_time_get_timebase_frequency();
168 		double dFreq=((double) freq) / 1000.0;
169 		typedef uint64_t  ClockSize;
170 		ClockSize newTime;
171 		SYS_TIMEBASE_GET( newTime );
172 		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
173 
174 		return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
175 #else
176 
177 		struct timeval currentTime;
178 		gettimeofday(&currentTime, 0);
179 		return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 +
180 			(currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000;
181 #endif //__CELLOS_LV2__
182 #endif
183 }
184 
185 	/// Returns the time in us since the last call to reset or since
186 	/// the Clock was created.
getTimeMicroseconds()187 unsigned long int btClock::getTimeMicroseconds()
188 {
189 #ifdef BT_USE_WINDOWS_TIMERS
190 		LARGE_INTEGER currentTime;
191 		QueryPerformanceCounter(&currentTime);
192 		LONGLONG elapsedTime = currentTime.QuadPart -
193 			m_data->mStartTime.QuadPart;
194 
195 		// Compute the number of millisecond ticks elapsed.
196 		unsigned long msecTicks = (unsigned long)(1000 * elapsedTime /
197 			m_data->mClockFrequency.QuadPart);
198 
199 		// Check for unexpected leaps in the Win32 performance counter.
200 		// (This is caused by unexpected data across the PCI to ISA
201 		// bridge, aka south bridge.  See Microsoft KB274323.)
202 		unsigned long elapsedTicks = (unsigned long)(GetTickCount64() - m_data->mStartTick);
203 		signed long msecOff = (signed long)(msecTicks - elapsedTicks);
204 		if (msecOff < -100 || msecOff > 100)
205 		{
206 			// Adjust the starting time forwards.
207 			LONGLONG msecAdjustment = mymin(msecOff *
208 				m_data->mClockFrequency.QuadPart / 1000, elapsedTime -
209 				m_data->mPrevElapsedTime);
210 			m_data->mStartTime.QuadPart += msecAdjustment;
211 			elapsedTime -= msecAdjustment;
212 		}
213 
214 		// Store the current elapsed time for adjustments next time.
215 		m_data->mPrevElapsedTime = elapsedTime;
216 
217 		// Convert to microseconds.
218 		unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime /
219 			m_data->mClockFrequency.QuadPart);
220 
221 		return usecTicks;
222 #else
223 
224 #ifdef __CELLOS_LV2__
225 		uint64_t freq=sys_time_get_timebase_frequency();
226 		double dFreq=((double) freq)/ 1000000.0;
227 		typedef uint64_t  ClockSize;
228 		ClockSize newTime;
229 		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
230 		SYS_TIMEBASE_GET( newTime );
231 
232 		return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
233 #else
234 
235 		struct timeval currentTime;
236 		gettimeofday(&currentTime, 0);
237 		return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 +
238 			(currentTime.tv_usec - m_data->mStartTime.tv_usec);
239 #endif//__CELLOS_LV2__
240 #endif
241 }
242 
243 
244 
245 /// Returns the time in s since the last call to reset or since
246 /// the Clock was created.
getTimeSeconds()247 btScalar btClock::getTimeSeconds()
248 {
249 	static const btScalar microseconds_to_seconds = btScalar(0.000001);
250 	return btScalar(getTimeMicroseconds()) * microseconds_to_seconds;
251 }
252 
253 
254 
Profile_Get_Ticks(unsigned long int * ticks)255 inline void Profile_Get_Ticks(unsigned long int * ticks)
256 {
257 	*ticks = gProfileClock.getTimeMicroseconds();
258 }
259 
Profile_Get_Tick_Rate(void)260 inline float Profile_Get_Tick_Rate(void)
261 {
262 //	return 1000000.f;
263 	return 1000.f;
264 
265 }
266 
267 
268 
269 /***************************************************************************************************
270 **
271 ** CProfileNode
272 **
273 ***************************************************************************************************/
274 
275 /***********************************************************************************************
276  * INPUT:                                                                                      *
277  * name - pointer to a static string which is the name of this profile node                    *
278  * parent - parent pointer                                                                     *
279  *                                                                                             *
280  * WARNINGS:                                                                                   *
281  * The name is assumed to be a static pointer, only the pointer is stored and compared for     *
282  * efficiency reasons.                                                                         *
283  *=============================================================================================*/
CProfileNode(const char * name,CProfileNode * parent)284 CProfileNode::CProfileNode( const char * name, CProfileNode * parent ) :
285 	Name( name ),
286 	TotalCalls( 0 ),
287 	TotalTime( 0 ),
288 	StartTime( 0 ),
289 	RecursionCounter( 0 ),
290 	Parent( parent ),
291 	Child( NULL ),
292 	Sibling( NULL ),
293 	m_userPtr(0)
294 {
295 	Reset();
296 }
297 
298 
CleanupMemory()299 void	CProfileNode::CleanupMemory()
300 {
301 	delete ( Child);
302 	Child = NULL;
303 	delete ( Sibling);
304 	Sibling = NULL;
305 }
306 
~CProfileNode(void)307 CProfileNode::~CProfileNode( void )
308 {
309 	CleanupMemory();
310 }
311 
312 
313 /***********************************************************************************************
314  * INPUT:                                                                                      *
315  * name - static string pointer to the name of the node we are searching for                   *
316  *                                                                                             *
317  * WARNINGS:                                                                                   *
318  * All profile names are assumed to be static strings so this function uses pointer compares   *
319  * to find the named node.                                                                     *
320  *=============================================================================================*/
Get_Sub_Node(const char * name)321 CProfileNode * CProfileNode::Get_Sub_Node( const char * name )
322 {
323 	// Try to find this sub node
324 	CProfileNode * child = Child;
325 	while ( child ) {
326 		if ( child->Name == name ) {
327 			return child;
328 		}
329 		child = child->Sibling;
330 	}
331 
332 	// We didn't find it, so add it
333 
334 	CProfileNode * node = new CProfileNode( name, this );
335 	node->Sibling = Child;
336 	Child = node;
337 	return node;
338 }
339 
340 
Reset(void)341 void	CProfileNode::Reset( void )
342 {
343 	TotalCalls = 0;
344 	TotalTime = 0.0f;
345 
346 
347 	if ( Child ) {
348 		Child->Reset();
349 	}
350 	if ( Sibling ) {
351 		Sibling->Reset();
352 	}
353 }
354 
355 
Call(void)356 void	CProfileNode::Call( void )
357 {
358 	TotalCalls++;
359 	if (RecursionCounter++ == 0) {
360 		Profile_Get_Ticks(&StartTime);
361 	}
362 }
363 
364 
Return(void)365 bool	CProfileNode::Return( void )
366 {
367 	if ( --RecursionCounter == 0 && TotalCalls != 0 ) {
368 		unsigned long int time;
369 		Profile_Get_Ticks(&time);
370 		time-=StartTime;
371 		TotalTime += (float)time / Profile_Get_Tick_Rate();
372 	}
373 	return ( RecursionCounter == 0 );
374 }
375 
376 
377 /***************************************************************************************************
378 **
379 ** CProfileIterator
380 **
381 ***************************************************************************************************/
CProfileIterator(CProfileNode * start)382 CProfileIterator::CProfileIterator( CProfileNode * start )
383 {
384 	CurrentParent = start;
385 	CurrentChild = CurrentParent->Get_Child();
386 }
387 
388 
First(void)389 void	CProfileIterator::First(void)
390 {
391 	CurrentChild = CurrentParent->Get_Child();
392 }
393 
394 
Next(void)395 void	CProfileIterator::Next(void)
396 {
397 	CurrentChild = CurrentChild->Get_Sibling();
398 }
399 
400 
Is_Done(void)401 bool	CProfileIterator::Is_Done(void)
402 {
403 	return CurrentChild == NULL;
404 }
405 
406 
Enter_Child(int index)407 void	CProfileIterator::Enter_Child( int index )
408 {
409 	CurrentChild = CurrentParent->Get_Child();
410 	while ( (CurrentChild != NULL) && (index != 0) ) {
411 		index--;
412 		CurrentChild = CurrentChild->Get_Sibling();
413 	}
414 
415 	if ( CurrentChild != NULL ) {
416 		CurrentParent = CurrentChild;
417 		CurrentChild = CurrentParent->Get_Child();
418 	}
419 }
420 
421 
Enter_Parent(void)422 void	CProfileIterator::Enter_Parent( void )
423 {
424 	if ( CurrentParent->Get_Parent() != NULL ) {
425 		CurrentParent = CurrentParent->Get_Parent();
426 	}
427 	CurrentChild = CurrentParent->Get_Child();
428 }
429 
430 
431 /***************************************************************************************************
432 **
433 ** CProfileManager
434 **
435 ***************************************************************************************************/
436 
437 CProfileNode	CProfileManager::Root( "Root", NULL );
438 CProfileNode *	CProfileManager::CurrentNode = &CProfileManager::Root;
439 int				CProfileManager::FrameCounter = 0;
440 unsigned long int			CProfileManager::ResetTime = 0;
441 
442 
443 /***********************************************************************************************
444  * CProfileManager::Start_Profile -- Begin a named profile                                    *
445  *                                                                                             *
446  * Steps one level deeper into the tree, if a child already exists with the specified name     *
447  * then it accumulates the profiling; otherwise a new child node is added to the profile tree. *
448  *                                                                                             *
449  * INPUT:                                                                                      *
450  * name - name of this profiling record                                                        *
451  *                                                                                             *
452  * WARNINGS:                                                                                   *
453  * The string used is assumed to be a static string; pointer compares are used throughout      *
454  * the profiling code for efficiency.                                                          *
455  *=============================================================================================*/
Start_Profile(const char * name)456 void	CProfileManager::Start_Profile( const char * name )
457 {
458 	if (name != CurrentNode->Get_Name()) {
459 		CurrentNode = CurrentNode->Get_Sub_Node( name );
460 	}
461 
462 	CurrentNode->Call();
463 }
464 
465 
466 /***********************************************************************************************
467  * CProfileManager::Stop_Profile -- Stop timing and record the results.                       *
468  *=============================================================================================*/
Stop_Profile(void)469 void	CProfileManager::Stop_Profile( void )
470 {
471 	// Return will indicate whether we should back up to our parent (we may
472 	// be profiling a recursive function)
473 	if (CurrentNode->Return()) {
474 		CurrentNode = CurrentNode->Get_Parent();
475 	}
476 }
477 
478 
479 /***********************************************************************************************
480  * CProfileManager::Reset -- Reset the contents of the profiling system                       *
481  *                                                                                             *
482  *    This resets everything except for the tree structure.  All of the timing data is reset.  *
483  *=============================================================================================*/
Reset(void)484 void	CProfileManager::Reset( void )
485 {
486 	gProfileClock.reset();
487 	Root.Reset();
488     Root.Call();
489 	FrameCounter = 0;
490 	Profile_Get_Ticks(&ResetTime);
491 }
492 
493 
494 /***********************************************************************************************
495  * CProfileManager::Increment_Frame_Counter -- Increment the frame counter                    *
496  *=============================================================================================*/
Increment_Frame_Counter(void)497 void CProfileManager::Increment_Frame_Counter( void )
498 {
499 	FrameCounter++;
500 }
501 
502 
503 /***********************************************************************************************
504  * CProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset         *
505  *=============================================================================================*/
Get_Time_Since_Reset(void)506 float CProfileManager::Get_Time_Since_Reset( void )
507 {
508 	unsigned long int time;
509 	Profile_Get_Ticks(&time);
510 	time -= ResetTime;
511 	return (float)time / Profile_Get_Tick_Rate();
512 }
513 
514 #include <stdio.h>
515 
dumpRecursive(CProfileIterator * profileIterator,int spacing)516 void	CProfileManager::dumpRecursive(CProfileIterator* profileIterator, int spacing)
517 {
518 	profileIterator->First();
519 	if (profileIterator->Is_Done())
520 		return;
521 
522 	float accumulated_time=0,parent_time = profileIterator->Is_Root() ? CProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time();
523 	int i;
524 	int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
525 	for (i=0;i<spacing;i++)	printf(".");
526 	printf("----------------------------------\n");
527 	for (i=0;i<spacing;i++)	printf(".");
528 	printf("Profiling: %s (total running time: %.3f ms) ---\n",	profileIterator->Get_Current_Parent_Name(), parent_time );
529 	float totalTime = 0.f;
530 
531 
532 	int numChildren = 0;
533 
534 	for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next())
535 	{
536 		numChildren++;
537 		float current_total_time = profileIterator->Get_Current_Total_Time();
538 		accumulated_time += current_total_time;
539 		float fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
540 		{
541 			int i;	for (i=0;i<spacing;i++)	printf(".");
542 		}
543 		printf("%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n",i, profileIterator->Get_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls());
544 		totalTime += current_total_time;
545 		//recurse into children
546 	}
547 
548 	if (parent_time < accumulated_time)
549 	{
550 		//printf("what's wrong\n");
551 	}
552 	for (i=0;i<spacing;i++)	printf(".");
553 	printf("%s (%.3f %%) :: %.3f ms\n", "Unaccounted:",parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
554 
555 	for (i=0;i<numChildren;i++)
556 	{
557 		profileIterator->Enter_Child(i);
558 		dumpRecursive(profileIterator,spacing+3);
559 		profileIterator->Enter_Parent();
560 	}
561 }
562 
563 
564 
dumpAll()565 void	CProfileManager::dumpAll()
566 {
567 	CProfileIterator* profileIterator = 0;
568 	profileIterator = CProfileManager::Get_Iterator();
569 
570 	dumpRecursive(profileIterator,0);
571 
572 	CProfileManager::Release_Iterator(profileIterator);
573 }
574 
575 
576 
577 
578 #endif //BT_NO_PROFILE
579