1 #pragma once
2 // This is free and unencumbered software released into the public domain.
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
6 // means.
7 // In jurisdictions that recognize copyright laws, the author or authors
8 // of this software dedicate any and all copyright interest in the
9 // software to the public domain. We make this dedication for the benefit
10 // of the public at large and to the detriment of our heirs and
11 // successors. We intend this dedication to be an overt act of
12 // relinquishment in perpetuity of all present and future rights to this
13 // software under copyright law.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 // For more information, please refer to <http://unlicense.org/>
22 //
23 // ***********************************************************************
24 //
25 //
26 //
27 //
28 // Howto:
29 // Call these functions from your code:
30 // MicroProfileOnThreadCreate
31 // MicroProfileMouseButton
32 // MicroProfileMousePosition
33 // MicroProfileModKey
34 // MicroProfileFlip <-- Call this once per frame
35 // MicroProfileDraw <-- Call this once per frame
36 // MicroProfileToggleDisplayMode <-- Bind to a key to toggle profiling
37 // MicroProfileTogglePause <-- Bind to a key to toggle pause
38 //
39 // Use these macros in your code in blocks you want to time:
40 //
41 // MICROPROFILE_DECLARE
42 // MICROPROFILE_DEFINE
43 // MICROPROFILE_DECLARE_GPU
44 // MICROPROFILE_DEFINE_GPU
45 // MICROPROFILE_SCOPE
46 // MICROPROFILE_SCOPEI
47 // MICROPROFILE_SCOPEGPU
48 // MICROPROFILE_SCOPEGPUI
49 // MICROPROFILE_META
50 //
51 //
52 // Usage:
53 //
54 // {
55 // MICROPROFILE_SCOPEI("GroupName", "TimerName", nColorRgb):
56 // ..Code to be timed..
57 // }
58 //
59 // MICROPROFILE_DECLARE / MICROPROFILE_DEFINE allows defining groups in a shared place, to ensure sorting of the timers
60 //
61 // (in global scope)
62 // MICROPROFILE_DEFINE(g_ProfileFisk, "Fisk", "Skalle", nSomeColorRgb);
63 //
64 // (in some other file)
65 // MICROPROFILE_DECLARE(g_ProfileFisk);
66 //
67 // void foo(){
68 // MICROPROFILE_SCOPE(g_ProfileFisk);
69 // }
70 //
71 // Once code is instrumented the gui is activeted by calling MicroProfileToggleDisplayMode or by clicking in the upper left corner of
72 // the screen
73 //
74 // The following functions must be implemented before the profiler is usable
75 // debug render:
76 // void MicroProfileDrawText(int nX, int nY, uint32_t nColor, const char* pText, uint32_t nNumCharacters);
77 // void MicroProfileDrawBox(int nX, int nY, int nX1, int nY1, uint32_t nColor, MicroProfileBoxType = MicroProfileBoxTypeFlat);
78 // void MicroProfileDrawLine2D(uint32_t nVertices, float* pVertices, uint32_t nColor);
79 // Gpu time stamps: (See below for d3d/opengl helper)
80 // uint32_t MicroProfileGpuInsertTimeStamp();
81 // uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey);
82 // uint64_t MicroProfileTicksPerSecondGpu();
83 // threading:
84 // const char* MicroProfileGetThreadName(); Threadnames in detailed view
85 //
86 // Default implementations of Gpu timestamp functions:
87 // Opengl:
88 // in .c file where MICROPROFILE_IMPL is defined:
89 // #define MICROPROFILE_GPU_TIMERS_GL
90 // call MicroProfileGpuInitGL() on startup
91 // D3D11:
92 // in .c file where MICROPROFILE_IMPL is defined:
93 // #define MICROPROFILE_GPU_TIMERS_D3D11
94 // call MICROPROFILE_GPU_TIMERS_D3D11(). Pass Device & ImmediateContext
95 //
96 // Limitations:
97 // GPU timestamps can only be inserted from one thread.
98
99
100
101 #ifndef MICROPROFILE_ENABLED
102 #define MICROPROFILE_ENABLED 1
103 #endif
104
105 #include <stdint.h>
106 typedef uint64_t MicroProfileToken;
107 typedef uint16_t MicroProfileGroupId;
108
109 #if 0 == MICROPROFILE_ENABLED
110
111 #define MICROPROFILE_DECLARE(var)
112 #define MICROPROFILE_DEFINE(var, group, name, color)
113 #define MICROPROFILE_REGISTER_GROUP(group, color, category)
114 #define MICROPROFILE_DECLARE_GPU(var)
115 #define MICROPROFILE_DEFINE_GPU(var, name, color)
116 #define MICROPROFILE_SCOPE(var) do{}while(0)
117 #define MICROPROFILE_SCOPEI(group, name, color) do{}while(0)
118 #define MICROPROFILE_SCOPEGPU(var) do{}while(0)
119 #define MICROPROFILE_SCOPEGPUI( name, color) do{}while(0)
120 #define MICROPROFILE_META_CPU(name, count)
121 #define MICROPROFILE_META_GPU(name, count)
122 #define MICROPROFILE_FORCEENABLECPUGROUP(s) do{} while(0)
123 #define MICROPROFILE_FORCEDISABLECPUGROUP(s) do{} while(0)
124 #define MICROPROFILE_FORCEENABLEGPUGROUP(s) do{} while(0)
125 #define MICROPROFILE_FORCEDISABLEGPUGROUP(s) do{} while(0)
126 #define MICROPROFILE_SCOPE_TOKEN(token)
127
128 #define MicroProfileGetTime(group, name) 0.f
129 #define MicroProfileOnThreadCreate(foo) do{}while(0)
130 #define MicroProfileFlip() do{}while(0)
131 #define MicroProfileSetAggregateFrames(a) do{}while(0)
132 #define MicroProfileGetAggregateFrames() 0
133 #define MicroProfileGetCurrentAggregateFrames() 0
134 #define MicroProfileTogglePause() do{}while(0)
135 #define MicroProfileToggleAllGroups() do{} while(0)
136 #define MicroProfileDumpTimers() do{}while(0)
137 #define MicroProfileShutdown() do{}while(0)
138 #define MicroProfileSetForceEnable(a) do{} while(0)
139 #define MicroProfileGetForceEnable() false
140 #define MicroProfileSetEnableAllGroups(a) do{} while(0)
141 #define MicroProfileEnableCategory(a) do{} while(0)
142 #define MicroProfileDisableCategory(a) do{} while(0)
143 #define MicroProfileGetEnableAllGroups() false
144 #define MicroProfileSetForceMetaCounters(a)
145 #define MicroProfileGetForceMetaCounters() 0
146 #define MicroProfileEnableMetaCounter(c) do{}while(0)
147 #define MicroProfileDisableMetaCounter(c) do{}while(0)
148 #define MicroProfileDumpFile(html,csv) do{} while(0)
149 #define MicroProfileWebServerPort() ((uint32_t)-1)
150
151 #else
152
153 #include <stdint.h>
154 #include <string.h>
155 #include <algorithm>
156 #include <array>
157 #include <atomic>
158 #include <mutex>
159 #include <thread>
160
161 #ifndef MICROPROFILE_API
162 #define MICROPROFILE_API
163 #endif
164
165 MICROPROFILE_API int64_t MicroProfileTicksPerSecondCpu();
166
167
168 #if defined(__APPLE__)
169 #include <mach/mach.h>
170 #include <mach/mach_time.h>
171 #include <unistd.h>
172 #include <libkern/OSAtomic.h>
173 #include <TargetConditionals.h>
174 #if TARGET_OS_IPHONE
175 #define MICROPROFILE_IOS
176 #endif
177
178 #define MP_TICK() mach_absolute_time()
MicroProfileTicksPerSecondCpu()179 inline int64_t MicroProfileTicksPerSecondCpu()
180 {
181 static int64_t nTicksPerSecond = 0;
182 if(nTicksPerSecond == 0)
183 {
184 mach_timebase_info_data_t sTimebaseInfo;
185 mach_timebase_info(&sTimebaseInfo);
186 nTicksPerSecond = 1000000000ll * sTimebaseInfo.denom / sTimebaseInfo.numer;
187 }
188 return nTicksPerSecond;
189 }
MicroProfileGetCurrentThreadId()190 inline uint64_t MicroProfileGetCurrentThreadId()
191 {
192 uint64_t tid;
193 pthread_threadid_np(pthread_self(), &tid);
194 return tid;
195 }
196
197 #define MP_BREAK() __builtin_trap()
198 #define MP_THREAD_LOCAL __thread
199 #define MP_STRCASECMP strcasecmp
200 #define MP_GETCURRENTTHREADID() MicroProfileGetCurrentThreadId()
201 typedef uint64_t ThreadIdType;
202 #elif defined(_WIN32)
203 int64_t MicroProfileGetTick();
204 #define MP_TICK() MicroProfileGetTick()
205 #define MP_BREAK() __debugbreak()
206 #define MP_THREAD_LOCAL thread_local
207 #define MP_STRCASECMP _stricmp
208 #define MP_GETCURRENTTHREADID() GetCurrentThreadId()
209 typedef uint32_t ThreadIdType;
210
211 #elif !defined(_WIN32)
212 #include <unistd.h>
213 #include <time.h>
MicroProfileTicksPerSecondCpu()214 inline int64_t MicroProfileTicksPerSecondCpu()
215 {
216 return 1000000000ll;
217 }
218
MicroProfileGetTick()219 inline int64_t MicroProfileGetTick()
220 {
221 timespec ts;
222 clock_gettime(CLOCK_REALTIME, &ts);
223 return 1000000000ll * ts.tv_sec + ts.tv_nsec;
224 }
225 #define MP_TICK() MicroProfileGetTick()
226 #define MP_BREAK() __builtin_trap()
227 #define MP_THREAD_LOCAL __thread
228 #define MP_STRCASECMP strcasecmp
229 #define MP_GETCURRENTTHREADID() (uint64_t)pthread_self()
230 typedef uint64_t ThreadIdType;
231 #endif
232
233
234 #ifndef MP_GETCURRENTTHREADID
235 #define MP_GETCURRENTTHREADID() 0
236 typedef uint32_t ThreadIdType;
237 #endif
238
239
240 #define MP_ASSERT(a) do{if(!(a)){MP_BREAK();} }while(0)
241 #define MICROPROFILE_DECLARE(var) extern MicroProfileToken g_mp_##var
242 #define MICROPROFILE_DEFINE(var, group, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu)
243 #define MICROPROFILE_REGISTER_GROUP(group, category, color) MicroProfileRegisterGroup(group, category, color)
244 #define MICROPROFILE_DECLARE_GPU(var) extern MicroProfileToken g_mp_##var
245 #define MICROPROFILE_DEFINE_GPU(var, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu)
246 #define MICROPROFILE_TOKEN_PASTE0(a, b) a ## b
247 #define MICROPROFILE_TOKEN_PASTE(a, b) MICROPROFILE_TOKEN_PASTE0(a,b)
248 #define MICROPROFILE_TOKEN(var) g_mp_##var
249 #define MICROPROFILE_SCOPE(var) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var)
250 #define MICROPROFILE_SCOPE_TOKEN(token) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(token)
251 #define MICROPROFILE_SCOPEI(group, name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu); MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__))
252 #define MICROPROFILE_SCOPEGPU(var) MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var)
253 #define MICROPROFILE_SCOPEGPUI(name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu); MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__))
254 #define MICROPROFILE_META_CPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeCpu)
255 #define MICROPROFILE_META_GPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeGpu)
256
257
258 #ifndef MICROPROFILE_USE_THREAD_NAME_CALLBACK
259 #define MICROPROFILE_USE_THREAD_NAME_CALLBACK 0
260 #endif
261
262 #ifndef MICROPROFILE_PER_THREAD_BUFFER_SIZE
263 #define MICROPROFILE_PER_THREAD_BUFFER_SIZE (2048<<10)
264 #endif
265
266 #ifndef MICROPROFILE_MAX_FRAME_HISTORY
267 #define MICROPROFILE_MAX_FRAME_HISTORY 512
268 #endif
269
270 #ifndef MICROPROFILE_PRINTF
271 #define MICROPROFILE_PRINTF printf
272 #endif
273
274 #ifndef MICROPROFILE_META_MAX
275 #define MICROPROFILE_META_MAX 8
276 #endif
277
278 #ifndef MICROPROFILE_WEBSERVER_PORT
279 #define MICROPROFILE_WEBSERVER_PORT 1338
280 #endif
281
282 #ifndef MICROPROFILE_WEBSERVER
283 #define MICROPROFILE_WEBSERVER 1
284 #endif
285
286 #ifndef MICROPROFILE_WEBSERVER_MAXFRAMES
287 #define MICROPROFILE_WEBSERVER_MAXFRAMES 30
288 #endif
289
290 #ifndef MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE
291 #define MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE (16<<10)
292 #endif
293
294 #ifndef MICROPROFILE_GPU_TIMERS
295 #define MICROPROFILE_GPU_TIMERS 1
296 #endif
297
298 #ifndef MICROPROFILE_GPU_FRAME_DELAY
299 #define MICROPROFILE_GPU_FRAME_DELAY 3 //must be > 0
300 #endif
301
302
303 #ifndef MICROPROFILE_NAME_MAX_LEN
304 #define MICROPROFILE_NAME_MAX_LEN 64
305 #endif
306
307 #define MICROPROFILE_FORCEENABLECPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeCpu)
308 #define MICROPROFILE_FORCEDISABLECPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeCpu)
309 #define MICROPROFILE_FORCEENABLEGPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeGpu)
310 #define MICROPROFILE_FORCEDISABLEGPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeGpu)
311
312 #define MICROPROFILE_INVALID_TICK ((uint64_t)-1)
313 #define MICROPROFILE_GROUP_MASK_ALL 0xffffffffffff
314
315
316 #define MICROPROFILE_INVALID_TOKEN (uint64_t)-1
317
318 enum MicroProfileTokenType
319 {
320 MicroProfileTokenTypeCpu,
321 MicroProfileTokenTypeGpu,
322 };
323
324 enum MicroProfileBoxType
325 {
326 MicroProfileBoxTypeBar,
327 MicroProfileBoxTypeFlat,
328 };
329
330
331
332 struct MicroProfile;
333
334 MICROPROFILE_API void MicroProfileInit();
335 MICROPROFILE_API void MicroProfileShutdown();
336 MICROPROFILE_API MicroProfileToken MicroProfileFindToken(const char* sGroup, const char* sName);
337 MICROPROFILE_API MicroProfileToken MicroProfileGetToken(const char* sGroup, const char* sName, uint32_t nColor, MicroProfileTokenType Token = MicroProfileTokenTypeCpu);
338 MICROPROFILE_API MicroProfileToken MicroProfileGetMetaToken(const char* pName);
339 MICROPROFILE_API void MicroProfileMetaUpdate(MicroProfileToken, int nCount, MicroProfileTokenType eTokenType);
340 MICROPROFILE_API uint64_t MicroProfileEnter(MicroProfileToken nToken);
341 MICROPROFILE_API void MicroProfileLeave(MicroProfileToken nToken, uint64_t nTick);
342 MICROPROFILE_API uint64_t MicroProfileGpuEnter(MicroProfileToken nToken);
343 MICROPROFILE_API void MicroProfileGpuLeave(MicroProfileToken nToken, uint64_t nTick);
MicroProfileGetTimerIndex(MicroProfileToken t)344 inline uint16_t MicroProfileGetTimerIndex(MicroProfileToken t){ return (t&0xffff); }
MicroProfileGetGroupMask(MicroProfileToken t)345 inline uint64_t MicroProfileGetGroupMask(MicroProfileToken t){ return ((t>>16)&MICROPROFILE_GROUP_MASK_ALL);}
MicroProfileMakeToken(uint64_t nGroupMask,uint16_t nTimer)346 inline MicroProfileToken MicroProfileMakeToken(uint64_t nGroupMask, uint16_t nTimer){ return (nGroupMask<<16) | nTimer;}
347
348 MICROPROFILE_API void MicroProfileFlip(); //! call once per frame.
349 MICROPROFILE_API void MicroProfileTogglePause();
350 MICROPROFILE_API void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type);
351 MICROPROFILE_API void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type);
352 MICROPROFILE_API float MicroProfileGetTime(const char* pGroup, const char* pName);
353 MICROPROFILE_API void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu);
354 MICROPROFILE_API void MicroProfileOnThreadCreate(const char* pThreadName); //should be called from newly created threads
355 MICROPROFILE_API void MicroProfileOnThreadExit(); //call on exit to reuse log
356 MICROPROFILE_API void MicroProfileInitThreadLog();
357 MICROPROFILE_API void MicroProfileSetForceEnable(bool bForceEnable);
358 MICROPROFILE_API bool MicroProfileGetForceEnable();
359 MICROPROFILE_API void MicroProfileSetEnableAllGroups(bool bEnable);
360 MICROPROFILE_API void MicroProfileEnableCategory(const char* pCategory);
361 MICROPROFILE_API void MicroProfileDisableCategory(const char* pCategory);
362 MICROPROFILE_API bool MicroProfileGetEnableAllGroups();
363 MICROPROFILE_API void MicroProfileSetForceMetaCounters(bool bEnable);
364 MICROPROFILE_API bool MicroProfileGetForceMetaCounters();
365 MICROPROFILE_API void MicroProfileEnableMetaCounter(const char* pMet);
366 MICROPROFILE_API void MicroProfileDisableMetaCounter(const char* pMet);
367 MICROPROFILE_API void MicroProfileSetAggregateFrames(int frames);
368 MICROPROFILE_API int MicroProfileGetAggregateFrames();
369 MICROPROFILE_API int MicroProfileGetCurrentAggregateFrames();
370 MICROPROFILE_API MicroProfile* MicroProfileGet();
371 MICROPROFILE_API void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2]);
372 MICROPROFILE_API std::recursive_mutex& MicroProfileGetMutex();
373 MICROPROFILE_API void MicroProfileStartContextSwitchTrace();
374 MICROPROFILE_API void MicroProfileStopContextSwitchTrace();
375 MICROPROFILE_API bool MicroProfileIsLocalThread(uint32_t nThreadId);
376
377
378 #if MICROPROFILE_WEBSERVER
379 MICROPROFILE_API void MicroProfileDumpFile(const char* pHtml, const char* pCsv);
380 MICROPROFILE_API uint32_t MicroProfileWebServerPort();
381 #else
382 #define MicroProfileDumpFile(c) do{} while(0)
383 #define MicroProfileWebServerPort() ((uint32_t)-1)
384 #endif
385
386
387
388
389 #if MICROPROFILE_GPU_TIMERS
390 MICROPROFILE_API uint32_t MicroProfileGpuInsertTimeStamp();
391 MICROPROFILE_API uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey);
392 MICROPROFILE_API uint64_t MicroProfileTicksPerSecondGpu();
393 MICROPROFILE_API int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu);
394 #else
395 #define MicroProfileGpuInsertTimeStamp() 1
396 #define MicroProfileGpuGetTimeStamp(a) 0
397 #define MicroProfileTicksPerSecondGpu() 1
398 #define MicroProfileGetGpuTickReference(a,b) 0
399 #endif
400
401 #if MICROPROFILE_GPU_TIMERS_D3D11
402 #define MICROPROFILE_D3D_MAX_QUERIES (8<<10)
403 MICROPROFILE_API void MicroProfileGpuInitD3D11(void* pDevice, void* pDeviceContext);
404 #endif
405
406 #if MICROPROFILE_GPU_TIMERS_GL
407 #define MICROPROFILE_GL_MAX_QUERIES (8<<10)
408 MICROPROFILE_API void MicroProfileGpuInitGL();
409 #endif
410
411
412
413 #if MICROPROFILE_USE_THREAD_NAME_CALLBACK
414 MICROPROFILE_API const char* MicroProfileGetThreadName();
415 #else
416 #define MicroProfileGetThreadName() "<implement MicroProfileGetThreadName to get threadnames>"
417 #endif
418
419 #if !defined(MICROPROFILE_THREAD_NAME_FROM_ID)
420 #define MICROPROFILE_THREAD_NAME_FROM_ID(a) ""
421 #endif
422
423
424 struct MicroProfileScopeHandler
425 {
426 MicroProfileToken nToken;
427 uint64_t nTick;
MicroProfileScopeHandlerMicroProfileScopeHandler428 MicroProfileScopeHandler(MicroProfileToken Token):nToken(Token)
429 {
430 nTick = MicroProfileEnter(nToken);
431 }
~MicroProfileScopeHandlerMicroProfileScopeHandler432 ~MicroProfileScopeHandler()
433 {
434 MicroProfileLeave(nToken, nTick);
435 }
436 };
437
438 struct MicroProfileScopeGpuHandler
439 {
440 MicroProfileToken nToken;
441 uint64_t nTick;
MicroProfileScopeGpuHandlerMicroProfileScopeGpuHandler442 MicroProfileScopeGpuHandler(MicroProfileToken Token):nToken(Token)
443 {
444 nTick = MicroProfileGpuEnter(nToken);
445 }
~MicroProfileScopeGpuHandlerMicroProfileScopeGpuHandler446 ~MicroProfileScopeGpuHandler()
447 {
448 MicroProfileGpuLeave(nToken, nTick);
449 }
450 };
451
452
453
454 #define MICROPROFILE_MAX_TIMERS 1024
455 #define MICROPROFILE_MAX_GROUPS 48 //dont bump! no. of bits used it bitmask
456 #define MICROPROFILE_MAX_CATEGORIES 16
457 #define MICROPROFILE_MAX_GRAPHS 5
458 #define MICROPROFILE_GRAPH_HISTORY 128
459 #define MICROPROFILE_BUFFER_SIZE ((MICROPROFILE_PER_THREAD_BUFFER_SIZE)/sizeof(MicroProfileLogEntry))
460 #define MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS 256
461 #define MICROPROFILE_STACK_MAX 32
462 //#define MICROPROFILE_MAX_PRESETS 5
463 #define MICROPROFILE_ANIM_DELAY_PRC 0.5f
464 #define MICROPROFILE_GAP_TIME 50 //extra ms to fetch to close timers from earlier frames
465
466
467 #ifndef MICROPROFILE_MAX_THREADS
468 #define MICROPROFILE_MAX_THREADS 32
469 #endif
470
471 #ifndef MICROPROFILE_UNPACK_RED
472 #define MICROPROFILE_UNPACK_RED(c) ((c)>>16)
473 #endif
474
475 #ifndef MICROPROFILE_UNPACK_GREEN
476 #define MICROPROFILE_UNPACK_GREEN(c) ((c)>>8)
477 #endif
478
479 #ifndef MICROPROFILE_UNPACK_BLUE
480 #define MICROPROFILE_UNPACK_BLUE(c) ((c))
481 #endif
482
483 #ifndef MICROPROFILE_DEFAULT_PRESET
484 #define MICROPROFILE_DEFAULT_PRESET "Default"
485 #endif
486
487
488 #ifndef MICROPROFILE_CONTEXT_SWITCH_TRACE
489 #if defined(_WIN32)
490 #define MICROPROFILE_CONTEXT_SWITCH_TRACE 1
491 #elif defined(__APPLE__)
492 #define MICROPROFILE_CONTEXT_SWITCH_TRACE 0 //disabled until dtrace script is working.
493 #else
494 #define MICROPROFILE_CONTEXT_SWITCH_TRACE 0
495 #endif
496 #endif
497
498 #if MICROPROFILE_CONTEXT_SWITCH_TRACE
499 #define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (128*1024) //2mb with 16 byte entry size
500 #else
501 #define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (1)
502 #endif
503
504 #ifndef MICROPROFILE_MINIZ
505 #define MICROPROFILE_MINIZ 0
506 #endif
507
508 #ifdef _WIN32
509 #include <basetsd.h>
510 typedef UINT_PTR MpSocket;
511 #else
512 typedef int MpSocket;
513 #endif
514
515
516 #ifndef _WIN32
517 typedef pthread_t MicroProfileThread;
518 #elif defined(_MSC_VER)
519 typedef HANDLE MicroProfileThread;
520 #else
521 typedef std::thread* MicroProfileThread;
522 #endif
523
524
525
526 enum MicroProfileDrawMask
527 {
528 MP_DRAW_OFF = 0x0,
529 MP_DRAW_BARS = 0x1,
530 MP_DRAW_DETAILED = 0x2,
531 MP_DRAW_HIDDEN = 0x3,
532 };
533
534 enum MicroProfileDrawBarsMask
535 {
536 MP_DRAW_TIMERS = 0x1,
537 MP_DRAW_AVERAGE = 0x2,
538 MP_DRAW_MAX = 0x4,
539 MP_DRAW_CALL_COUNT = 0x8,
540 MP_DRAW_TIMERS_EXCLUSIVE = 0x10,
541 MP_DRAW_AVERAGE_EXCLUSIVE = 0x20,
542 MP_DRAW_MAX_EXCLUSIVE = 0x40,
543 MP_DRAW_META_FIRST = 0x80,
544 MP_DRAW_ALL = 0xffffffff,
545
546 };
547
548 typedef uint64_t MicroProfileLogEntry;
549
550 struct MicroProfileTimer
551 {
552 uint64_t nTicks;
553 uint32_t nCount;
554 };
555
556 struct MicroProfileCategory
557 {
558 char pName[MICROPROFILE_NAME_MAX_LEN];
559 uint64_t nGroupMask;
560 };
561
562 struct MicroProfileGroupInfo
563 {
564 char pName[MICROPROFILE_NAME_MAX_LEN];
565 uint32_t nNameLen;
566 uint32_t nGroupIndex;
567 uint32_t nNumTimers;
568 uint32_t nMaxTimerNameLen;
569 uint32_t nColor;
570 uint32_t nCategory;
571 MicroProfileTokenType Type;
572 };
573
574 struct MicroProfileTimerInfo
575 {
576 MicroProfileToken nToken;
577 uint32_t nTimerIndex;
578 uint32_t nGroupIndex;
579 char pName[MICROPROFILE_NAME_MAX_LEN];
580 uint32_t nNameLen;
581 uint32_t nColor;
582 bool bGraph;
583 };
584
585 struct MicroProfileGraphState
586 {
587 int64_t nHistory[MICROPROFILE_GRAPH_HISTORY];
588 MicroProfileToken nToken;
589 int32_t nKey;
590 };
591
592 struct MicroProfileContextSwitch
593 {
594 ThreadIdType nThreadOut;
595 ThreadIdType nThreadIn;
596 int64_t nCpu : 8;
597 int64_t nTicks : 56;
598 };
599
600
601 struct MicroProfileFrameState
602 {
603 int64_t nFrameStartCpu;
604 int64_t nFrameStartGpu;
605 uint32_t nLogStart[MICROPROFILE_MAX_THREADS];
606 };
607
608 struct MicroProfileThreadLog
609 {
610 std::array<MicroProfileLogEntry, MICROPROFILE_BUFFER_SIZE> Log{};
611
612 std::atomic<uint32_t> nPut{0};
613 std::atomic<uint32_t> nGet{0};
614 uint32_t nActive = 0;
615 uint32_t nGpu = 0;
616 ThreadIdType nThreadId{};
617
618 std::array<uint32_t, MICROPROFILE_STACK_MAX> nStack{};
619 std::array<int64_t, MICROPROFILE_STACK_MAX> nChildTickStack{};
620 uint32_t nStackPos = 0;
621
622
623 std::array<uint8_t, MICROPROFILE_MAX_GROUPS> nGroupStackPos{};
624 std::array<int64_t, MICROPROFILE_MAX_GROUPS> nGroupTicks{};
625 std::array<int64_t, MICROPROFILE_MAX_GROUPS> nAggregateGroupTicks{};
626 enum
627 {
628 THREAD_MAX_LEN = 64,
629 };
630 char ThreadName[64]{};
631 int nFreeListNext = 0;
632
ResetMicroProfileThreadLog633 void Reset() {
634 Log.fill({});
635 nPut = 0;
636 nGet = 0;
637 nActive = 0;
638 nGpu = 0;
639 nThreadId = {};
640 nStack.fill(0);
641 nChildTickStack.fill(0);
642 nStackPos = 0;
643 nGroupStackPos.fill(0);
644 nGroupTicks.fill(0);
645 nAggregateGroupTicks.fill(0);
646 std::fill(std::begin(ThreadName), std::end(ThreadName), '\0');
647 nFreeListNext = 0;
648 }
649 };
650
651 #if MICROPROFILE_GPU_TIMERS_D3D11
652 struct MicroProfileD3D11Frame
653 {
654 uint32_t m_nQueryStart;
655 uint32_t m_nQueryCount;
656 uint32_t m_nRateQueryStarted;
657 void* m_pRateQuery;
658 };
659
660 struct MicroProfileGpuTimerState
661 {
662 uint32_t bInitialized;
663 void* m_pDevice;
664 void* m_pDeviceContext;
665 void* m_pQueries[MICROPROFILE_D3D_MAX_QUERIES];
666 int64_t m_nQueryResults[MICROPROFILE_D3D_MAX_QUERIES];
667 uint32_t m_nQueryPut;
668 uint32_t m_nQueryGet;
669 uint32_t m_nQueryFrame;
670 int64_t m_nQueryFrequency;
671 MicroProfileD3D11Frame m_QueryFrames[MICROPROFILE_GPU_FRAME_DELAY];
672 };
673 #elif MICROPROFILE_GPU_TIMERS_GL
674 struct MicroProfileGpuTimerState
675 {
676 uint32_t GLTimers[MICROPROFILE_GL_MAX_QUERIES];
677 uint32_t GLTimerPos;
678 };
679 #else
680 struct MicroProfileGpuTimerState{};
681 #endif
682
683 struct MicroProfile
684 {
685 uint32_t nTotalTimers;
686 uint32_t nGroupCount;
687 uint32_t nCategoryCount;
688 uint32_t nAggregateClear;
689 uint32_t nAggregateFlip;
690 uint32_t nAggregateFlipCount;
691 uint32_t nAggregateFrames;
692
693 uint64_t nAggregateFlipTick;
694
695 uint32_t nDisplay;
696 uint32_t nBars;
697 uint64_t nActiveGroup;
698 uint32_t nActiveBars;
699
700 uint64_t nForceGroup;
701 uint32_t nForceEnable;
702 uint32_t nForceMetaCounters;
703
704 uint64_t nForceGroupUI;
705 uint64_t nActiveGroupWanted;
706 uint32_t nAllGroupsWanted;
707 uint32_t nAllThreadsWanted;
708
709 uint32_t nOverflow;
710
711 uint64_t nGroupMask;
712 uint32_t nRunning;
713 uint32_t nToggleRunning;
714 uint32_t nMaxGroupSize;
715 uint32_t nDumpFileNextFrame;
716 uint32_t nAutoClearFrames;
717 char HtmlDumpPath[512];
718 char CsvDumpPath[512];
719
720 int64_t nPauseTicks;
721
722 float fReferenceTime;
723 float fRcpReferenceTime;
724
725 MicroProfileCategory CategoryInfo[MICROPROFILE_MAX_CATEGORIES];
726 MicroProfileGroupInfo GroupInfo[MICROPROFILE_MAX_GROUPS];
727 MicroProfileTimerInfo TimerInfo[MICROPROFILE_MAX_TIMERS];
728 uint8_t TimerToGroup[MICROPROFILE_MAX_TIMERS];
729
730 MicroProfileTimer AccumTimers[MICROPROFILE_MAX_TIMERS];
731 uint64_t AccumMaxTimers[MICROPROFILE_MAX_TIMERS];
732 uint64_t AccumTimersExclusive[MICROPROFILE_MAX_TIMERS];
733 uint64_t AccumMaxTimersExclusive[MICROPROFILE_MAX_TIMERS];
734
735 MicroProfileTimer Frame[MICROPROFILE_MAX_TIMERS];
736 uint64_t FrameExclusive[MICROPROFILE_MAX_TIMERS];
737
738 MicroProfileTimer Aggregate[MICROPROFILE_MAX_TIMERS];
739 uint64_t AggregateMax[MICROPROFILE_MAX_TIMERS];
740 uint64_t AggregateExclusive[MICROPROFILE_MAX_TIMERS];
741 uint64_t AggregateMaxExclusive[MICROPROFILE_MAX_TIMERS];
742
743
744 uint64_t FrameGroup[MICROPROFILE_MAX_GROUPS];
745 uint64_t AccumGroup[MICROPROFILE_MAX_GROUPS];
746 uint64_t AccumGroupMax[MICROPROFILE_MAX_GROUPS];
747
748 uint64_t AggregateGroup[MICROPROFILE_MAX_GROUPS];
749 uint64_t AggregateGroupMax[MICROPROFILE_MAX_GROUPS];
750
751
752 struct
753 {
754 uint64_t nCounters[MICROPROFILE_MAX_TIMERS];
755
756 uint64_t nAccum[MICROPROFILE_MAX_TIMERS];
757 uint64_t nAccumMax[MICROPROFILE_MAX_TIMERS];
758
759 uint64_t nAggregate[MICROPROFILE_MAX_TIMERS];
760 uint64_t nAggregateMax[MICROPROFILE_MAX_TIMERS];
761
762 uint64_t nSum;
763 uint64_t nSumAccum;
764 uint64_t nSumAccumMax;
765 uint64_t nSumAggregate;
766 uint64_t nSumAggregateMax;
767
768 const char* pName;
769 } MetaCounters[MICROPROFILE_META_MAX];
770
771 MicroProfileGraphState Graph[MICROPROFILE_MAX_GRAPHS];
772 uint32_t nGraphPut;
773
774 uint32_t nThreadActive[MICROPROFILE_MAX_THREADS];
775 MicroProfileThreadLog* Pool[MICROPROFILE_MAX_THREADS];
776 uint32_t nNumLogs;
777 uint32_t nMemUsage;
778 int nFreeListHead;
779
780 uint32_t nFrameCurrent;
781 uint32_t nFrameCurrentIndex;
782 uint32_t nFramePut;
783 uint64_t nFramePutIndex;
784
785 MicroProfileFrameState Frames[MICROPROFILE_MAX_FRAME_HISTORY];
786
787 uint64_t nFlipTicks;
788 uint64_t nFlipAggregate;
789 uint64_t nFlipMax;
790 uint64_t nFlipAggregateDisplay;
791 uint64_t nFlipMaxDisplay;
792
793 MicroProfileThread ContextSwitchThread;
794 bool bContextSwitchRunning;
795 bool bContextSwitchStop;
796 bool bContextSwitchAllThreads;
797 bool bContextSwitchNoBars;
798 uint32_t nContextSwitchUsage;
799 uint32_t nContextSwitchLastPut;
800
801 int64_t nContextSwitchHoverTickIn;
802 int64_t nContextSwitchHoverTickOut;
803 uint32_t nContextSwitchHoverThread;
804 uint32_t nContextSwitchHoverThreadBefore;
805 uint32_t nContextSwitchHoverThreadAfter;
806 uint8_t nContextSwitchHoverCpu;
807 uint8_t nContextSwitchHoverCpuNext;
808
809 uint32_t nContextSwitchPut;
810 MicroProfileContextSwitch ContextSwitch[MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE];
811
812
813 MpSocket ListenerSocket;
814 uint32_t nWebServerPort;
815
816 char WebServerBuffer[MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE];
817 uint32_t WebServerPut;
818
819 uint64_t nWebServerDataSent;
820
821 MicroProfileGpuTimerState GPU;
822
823
824 };
825
826 #define MP_LOG_TICK_MASK 0x0000ffffffffffff
827 #define MP_LOG_INDEX_MASK 0x3fff000000000000
828 #define MP_LOG_BEGIN_MASK 0xc000000000000000
829 #define MP_LOG_GPU_EXTRA 0x3
830 #define MP_LOG_META 0x2
831 #define MP_LOG_ENTER 0x1
832 #define MP_LOG_LEAVE 0x0
833
834
MicroProfileLogType(MicroProfileLogEntry Index)835 inline int MicroProfileLogType(MicroProfileLogEntry Index)
836 {
837 return ((MP_LOG_BEGIN_MASK & Index)>>62) & 0x3;
838 }
839
MicroProfileLogTimerIndex(MicroProfileLogEntry Index)840 inline uint64_t MicroProfileLogTimerIndex(MicroProfileLogEntry Index)
841 {
842 return (0x3fff&(Index>>48));
843 }
844
MicroProfileMakeLogIndex(uint64_t nBegin,MicroProfileToken nToken,int64_t nTick)845 inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick)
846 {
847 MicroProfileLogEntry Entry = (nBegin<<62) | ((0x3fff&nToken)<<48) | (MP_LOG_TICK_MASK&nTick);
848 int t = MicroProfileLogType(Entry);
849 uint64_t nTimerIndex = MicroProfileLogTimerIndex(Entry);
850 MP_ASSERT(t == nBegin);
851 MP_ASSERT(nTimerIndex == (nToken&0x3fff));
852 return Entry;
853
854 }
855
MicroProfileLogTickDifference(MicroProfileLogEntry Start,MicroProfileLogEntry End)856 inline int64_t MicroProfileLogTickDifference(MicroProfileLogEntry Start, MicroProfileLogEntry End)
857 {
858 uint64_t nStart = Start;
859 uint64_t nEnd = End;
860 int64_t nDifference = ((nEnd<<16) - (nStart<<16));
861 return nDifference >> 16;
862 }
863
MicroProfileLogGetTick(MicroProfileLogEntry e)864 inline int64_t MicroProfileLogGetTick(MicroProfileLogEntry e)
865 {
866 return MP_LOG_TICK_MASK & e;
867 }
868
MicroProfileLogSetTick(MicroProfileLogEntry e,int64_t nTick)869 inline int64_t MicroProfileLogSetTick(MicroProfileLogEntry e, int64_t nTick)
870 {
871 return (MP_LOG_TICK_MASK & nTick) | (e & ~MP_LOG_TICK_MASK);
872 }
873
874 template<typename T>
MicroProfileMin(T a,T b)875 T MicroProfileMin(T a, T b)
876 { return a < b ? a : b; }
877
878 template<typename T>
MicroProfileMax(T a,T b)879 T MicroProfileMax(T a, T b)
880 { return a > b ? a : b; }
881
MicroProfileMsToTick(float fMs,int64_t nTicksPerSecond)882 inline int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond)
883 {
884 return (int64_t)(fMs*0.001f*nTicksPerSecond);
885 }
886
MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond)887 inline float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond)
888 {
889 return 1000.f / nTicksPerSecond;
890 }
891
MicroProfileGetGroupIndex(MicroProfileToken t)892 inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t)
893 {
894 return (uint16_t)MicroProfileGet()->TimerToGroup[MicroProfileGetTimerIndex(t)];
895 }
896
897
898
899 #ifdef MICROPROFILE_IMPL
900
901 #ifdef _WIN32
902 #include <windows.h>
903 #define snprintf _snprintf
904
905 #pragma warning(push)
906 #pragma warning(disable: 4244)
MicroProfileTicksPerSecondCpu()907 int64_t MicroProfileTicksPerSecondCpu()
908 {
909 static int64_t nTicksPerSecond = 0;
910 if(nTicksPerSecond == 0)
911 {
912 QueryPerformanceFrequency((LARGE_INTEGER*)&nTicksPerSecond);
913 }
914 return nTicksPerSecond;
915 }
MicroProfileGetTick()916 int64_t MicroProfileGetTick()
917 {
918 int64_t ticks;
919 QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
920 return ticks;
921 }
922
923 #endif
924
925 #if defined(MICROPROFILE_WEBSERVER) || defined(MICROPROFILE_CONTEXT_SWITCH_TRACE)
926
927
928 typedef void* (*MicroProfileThreadFunc)(void*);
929
930 #ifndef _WIN32
931 typedef pthread_t MicroProfileThread;
MicroProfileThreadStart(MicroProfileThread * pThread,MicroProfileThreadFunc Func)932 void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
933 {
934 pthread_attr_t Attr;
935 int r = pthread_attr_init(&Attr);
936 MP_ASSERT(r == 0);
937 pthread_create(pThread, &Attr, Func, 0);
938 }
MicroProfileThreadJoin(MicroProfileThread * pThread)939 void MicroProfileThreadJoin(MicroProfileThread* pThread)
940 {
941 int r = pthread_join(*pThread, 0);
942 MP_ASSERT(r == 0);
943 }
944 #elif defined(_MSC_VER)
945 typedef HANDLE MicroProfileThread;
ThreadTrampoline(void * pFunc)946 DWORD _stdcall ThreadTrampoline(void* pFunc)
947 {
948 MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc;
949
950 // The return value of F will always return a void*, however, this is for
951 // compatibility with pthreads. The underlying "address" of the pointer
952 // is always a 32-bit value, so this cast is safe to perform.
953 return static_cast<DWORD>(reinterpret_cast<uint64_t>(F(0)));
954 }
955
MicroProfileThreadStart(MicroProfileThread * pThread,MicroProfileThreadFunc Func)956 void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
957 {
958 *pThread = CreateThread(0, 0, ThreadTrampoline, Func, 0, 0);
959 }
MicroProfileThreadJoin(MicroProfileThread * pThread)960 void MicroProfileThreadJoin(MicroProfileThread* pThread)
961 {
962 WaitForSingleObject(*pThread, INFINITE);
963 CloseHandle(*pThread);
964 }
965 #else
966 #include <thread>
967 typedef std::thread* MicroProfileThread;
MicroProfileThreadStart(MicroProfileThread * pThread,MicroProfileThreadFunc Func)968 inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
969 {
970 *pThread = new std::thread(Func, nullptr);
971 }
MicroProfileThreadJoin(MicroProfileThread * pThread)972 inline void MicroProfileThreadJoin(MicroProfileThread* pThread)
973 {
974 (*pThread)->join();
975 delete *pThread;
976 }
977 #endif
978 #endif
979
980 #if MICROPROFILE_WEBSERVER
981
982 #ifdef _WIN32
983 #define MP_INVALID_SOCKET(f) (f == INVALID_SOCKET)
984 #endif
985
986 #ifndef _WIN32
987 #include <sys/socket.h>
988 #include <netinet/in.h>
989 #include <fcntl.h>
990 #define MP_INVALID_SOCKET(f) (f < 0)
991 #endif
992
993
994 void MicroProfileWebServerStart();
995 void MicroProfileWebServerStop();
996 bool MicroProfileWebServerUpdate();
997 void MicroProfileDumpToFile();
998
999 #else
1000
1001 #define MicroProfileWebServerStart() do{}while(0)
1002 #define MicroProfileWebServerStop() do{}while(0)
1003 #define MicroProfileWebServerUpdate() false
1004 #define MicroProfileDumpToFile() do{} while(0)
1005 #endif
1006
1007
1008 #if MICROPROFILE_GPU_TIMERS_D3D11
1009 void MicroProfileGpuFlip();
1010 void MicroProfileGpuShutdown();
1011 #else
1012 #define MicroProfileGpuFlip() do{}while(0)
1013 #define MicroProfileGpuShutdown() do{}while(0)
1014 #endif
1015
1016
1017
1018 #include <stdlib.h>
1019 #include <stdio.h>
1020 #include <math.h>
1021 #include <algorithm>
1022
1023
1024 #ifndef MICROPROFILE_DEBUG
1025 #define MICROPROFILE_DEBUG 0
1026 #endif
1027
1028
1029 #define S g_MicroProfile
1030
1031 MicroProfile g_MicroProfile;
1032 MicroProfileThreadLog* g_MicroProfileGpuLog = 0;
1033 #ifdef MICROPROFILE_IOS
1034 // iOS doesn't support __thread
1035 static pthread_key_t g_MicroProfileThreadLogKey;
1036 static pthread_once_t g_MicroProfileThreadLogKeyOnce = PTHREAD_ONCE_INIT;
MicroProfileCreateThreadLogKey()1037 static void MicroProfileCreateThreadLogKey()
1038 {
1039 pthread_key_create(&g_MicroProfileThreadLogKey, NULL);
1040 }
1041 #else
1042 MP_THREAD_LOCAL MicroProfileThreadLog* g_MicroProfileThreadLog = 0;
1043 #endif
1044 static std::atomic<bool> g_bUseLock{false}; /// This is used because windows does not support using mutexes under dll init(which is where global initialization is handled)
1045
1046
1047 MICROPROFILE_DEFINE(g_MicroProfileFlip, "MicroProfile", "MicroProfileFlip", 0x3355ee);
1048 MICROPROFILE_DEFINE(g_MicroProfileThreadLoop, "MicroProfile", "ThreadLoop", 0x3355ee);
1049 MICROPROFILE_DEFINE(g_MicroProfileClear, "MicroProfile", "Clear", 0x3355ee);
1050 MICROPROFILE_DEFINE(g_MicroProfileAccumulate, "MicroProfile", "Accumulate", 0x3355ee);
1051 MICROPROFILE_DEFINE(g_MicroProfileContextSwitchSearch,"MicroProfile", "ContextSwitchSearch", 0xDD7300);
1052
MicroProfileMutex()1053 inline std::recursive_mutex& MicroProfileMutex()
1054 {
1055 static std::recursive_mutex Mutex;
1056 return Mutex;
1057 }
MicroProfileGetMutex()1058 std::recursive_mutex& MicroProfileGetMutex()
1059 {
1060 return MicroProfileMutex();
1061 }
1062
MicroProfileGet()1063 MICROPROFILE_API MicroProfile* MicroProfileGet()
1064 {
1065 return &g_MicroProfile;
1066 }
1067
1068
1069 MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName);
1070
1071
MicroProfileInit()1072 void MicroProfileInit()
1073 {
1074 std::recursive_mutex& mutex = MicroProfileMutex();
1075 bool bUseLock = g_bUseLock;
1076 if(bUseLock)
1077 mutex.lock();
1078 static bool bOnce = true;
1079 if(bOnce)
1080 {
1081 S.nMemUsage += sizeof(S);
1082 bOnce = false;
1083 memset(&S, 0, sizeof(S));
1084 for(int i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
1085 {
1086 S.GroupInfo[i].pName[0] = '\0';
1087 }
1088 for(int i = 0; i < MICROPROFILE_MAX_CATEGORIES; ++i)
1089 {
1090 S.CategoryInfo[i].pName[0] = '\0';
1091 S.CategoryInfo[i].nGroupMask = 0;
1092 }
1093 strcpy(&S.CategoryInfo[0].pName[0], "default");
1094 S.nCategoryCount = 1;
1095 for(int i = 0; i < MICROPROFILE_MAX_TIMERS; ++i)
1096 {
1097 S.TimerInfo[i].pName[0] = '\0';
1098 }
1099 S.nGroupCount = 0;
1100 S.nAggregateFlipTick = MP_TICK();
1101 S.nActiveGroup = 0;
1102 S.nActiveBars = 0;
1103 S.nForceGroup = 0;
1104 S.nAllGroupsWanted = 0;
1105 S.nActiveGroupWanted = 0;
1106 S.nAllThreadsWanted = 1;
1107 S.nAggregateFlip = 0;
1108 S.nTotalTimers = 0;
1109 for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
1110 {
1111 S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
1112 }
1113 S.nRunning = 1;
1114 S.fReferenceTime = 33.33f;
1115 S.fRcpReferenceTime = 1.f / S.fReferenceTime;
1116 S.nFreeListHead = -1;
1117 int64_t nTick = MP_TICK();
1118 for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
1119 {
1120 S.Frames[i].nFrameStartCpu = nTick;
1121 S.Frames[i].nFrameStartGpu = -1;
1122 }
1123
1124 MicroProfileThreadLog* pGpu = MicroProfileCreateThreadLog("GPU");
1125 g_MicroProfileGpuLog = pGpu;
1126 MP_ASSERT(S.Pool[0] == pGpu);
1127 pGpu->nGpu = 1;
1128 pGpu->nThreadId = 0;
1129
1130 S.nWebServerDataSent = (uint64_t)-1;
1131 }
1132 if(bUseLock)
1133 mutex.unlock();
1134 }
1135
MicroProfileShutdown()1136 void MicroProfileShutdown()
1137 {
1138 std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1139 MicroProfileWebServerStop();
1140 MicroProfileStopContextSwitchTrace();
1141 MicroProfileGpuShutdown();
1142 }
1143
1144 #ifdef MICROPROFILE_IOS
MicroProfileGetThreadLog()1145 inline MicroProfileThreadLog* MicroProfileGetThreadLog()
1146 {
1147 pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
1148 return (MicroProfileThreadLog*)pthread_getspecific(g_MicroProfileThreadLogKey);
1149 }
1150
MicroProfileSetThreadLog(MicroProfileThreadLog * pLog)1151 inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
1152 {
1153 pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
1154 pthread_setspecific(g_MicroProfileThreadLogKey, pLog);
1155 }
1156 #else
MicroProfileGetThreadLog()1157 MicroProfileThreadLog* MicroProfileGetThreadLog()
1158 {
1159 return g_MicroProfileThreadLog;
1160 }
MicroProfileSetThreadLog(MicroProfileThreadLog * pLog)1161 inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
1162 {
1163 g_MicroProfileThreadLog = pLog;
1164 }
1165 #endif
1166
1167
MicroProfileCreateThreadLog(const char * pName)1168 MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName)
1169 {
1170 MicroProfileThreadLog* pLog = 0;
1171 if(S.nFreeListHead != -1)
1172 {
1173 pLog = S.Pool[S.nFreeListHead];
1174 MP_ASSERT(pLog->nPut.load() == 0);
1175 MP_ASSERT(pLog->nGet.load() == 0);
1176 S.nFreeListHead = S.Pool[S.nFreeListHead]->nFreeListNext;
1177 pLog->Reset();
1178 }
1179 else
1180 {
1181 pLog = new MicroProfileThreadLog;
1182 S.nMemUsage += sizeof(MicroProfileThreadLog);
1183 S.Pool[S.nNumLogs++] = pLog;
1184 }
1185 int len = (int)strlen(pName);
1186 int maxlen = sizeof(pLog->ThreadName)-1;
1187 len = len < maxlen ? len : maxlen;
1188 memcpy(&pLog->ThreadName[0], pName, len);
1189 pLog->ThreadName[len] = '\0';
1190 pLog->nThreadId = MP_GETCURRENTTHREADID();
1191 pLog->nFreeListNext = -1;
1192 pLog->nActive = 1;
1193 return pLog;
1194 }
1195
MicroProfileOnThreadCreate(const char * pThreadName)1196 void MicroProfileOnThreadCreate(const char* pThreadName)
1197 {
1198 g_bUseLock = true;
1199 MicroProfileInit();
1200 std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1201 MP_ASSERT(MicroProfileGetThreadLog() == 0);
1202 MicroProfileThreadLog* pLog = MicroProfileCreateThreadLog(pThreadName ? pThreadName : MicroProfileGetThreadName());
1203 MP_ASSERT(pLog);
1204 MicroProfileSetThreadLog(pLog);
1205 }
1206
MicroProfileOnThreadExit()1207 void MicroProfileOnThreadExit()
1208 {
1209 std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1210 MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
1211 if(pLog)
1212 {
1213 int32_t nLogIndex = -1;
1214 for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1215 {
1216 if(pLog == S.Pool[i])
1217 {
1218 nLogIndex = i;
1219 break;
1220 }
1221 }
1222 MP_ASSERT(nLogIndex < MICROPROFILE_MAX_THREADS && nLogIndex > 0);
1223 pLog->nFreeListNext = S.nFreeListHead;
1224 pLog->nActive = 0;
1225 pLog->nPut.store(0);
1226 pLog->nGet.store(0);
1227 S.nFreeListHead = nLogIndex;
1228 for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
1229 {
1230 S.Frames[i].nLogStart[nLogIndex] = 0;
1231 }
1232 pLog->nGroupStackPos.fill(0);
1233 pLog->nGroupTicks.fill(0);
1234 }
1235 }
1236
MicroProfileInitThreadLog()1237 void MicroProfileInitThreadLog()
1238 {
1239 MicroProfileOnThreadCreate(nullptr);
1240 }
1241
1242
1243 struct MicroProfileScopeLock
1244 {
1245 bool bUseLock;
1246 std::recursive_mutex& m;
MicroProfileScopeLockMicroProfileScopeLock1247 MicroProfileScopeLock(std::recursive_mutex& m) : bUseLock(g_bUseLock), m(m)
1248 {
1249 if(bUseLock)
1250 m.lock();
1251 }
~MicroProfileScopeLockMicroProfileScopeLock1252 ~MicroProfileScopeLock()
1253 {
1254 if(bUseLock)
1255 m.unlock();
1256 }
1257 };
1258
MicroProfileFindToken(const char * pGroup,const char * pName)1259 MicroProfileToken MicroProfileFindToken(const char* pGroup, const char* pName)
1260 {
1261 MicroProfileInit();
1262 MicroProfileScopeLock L(MicroProfileMutex());
1263 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1264 {
1265 if(!MP_STRCASECMP(pName, S.TimerInfo[i].pName) && !MP_STRCASECMP(pGroup, S.GroupInfo[S.TimerToGroup[i]].pName))
1266 {
1267 return S.TimerInfo[i].nToken;
1268 }
1269 }
1270 return MICROPROFILE_INVALID_TOKEN;
1271 }
1272
MicroProfileGetGroup(const char * pGroup,MicroProfileTokenType Type)1273 uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
1274 {
1275 for(uint32_t i = 0; i < S.nGroupCount; ++i)
1276 {
1277 if(!MP_STRCASECMP(pGroup, S.GroupInfo[i].pName))
1278 {
1279 return i;
1280 }
1281 }
1282 uint16_t nGroupIndex = 0xffff;
1283 uint32_t nLen = (uint32_t)strlen(pGroup);
1284 if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
1285 nLen = MICROPROFILE_NAME_MAX_LEN-1;
1286 memcpy(&S.GroupInfo[S.nGroupCount].pName[0], pGroup, nLen);
1287 S.GroupInfo[S.nGroupCount].pName[nLen] = '\0';
1288 S.GroupInfo[S.nGroupCount].nNameLen = nLen;
1289 S.GroupInfo[S.nGroupCount].nNumTimers = 0;
1290 S.GroupInfo[S.nGroupCount].nGroupIndex = S.nGroupCount;
1291 S.GroupInfo[S.nGroupCount].Type = Type;
1292 S.GroupInfo[S.nGroupCount].nMaxTimerNameLen = 0;
1293 S.GroupInfo[S.nGroupCount].nColor = 0x88888888;
1294 S.GroupInfo[S.nGroupCount].nCategory = 0;
1295 S.CategoryInfo[0].nGroupMask |= (1ll << (uint64_t)S.nGroupCount);
1296 nGroupIndex = S.nGroupCount++;
1297 S.nGroupMask = (S.nGroupMask<<1)|1;
1298 MP_ASSERT(nGroupIndex < MICROPROFILE_MAX_GROUPS);
1299 return nGroupIndex;
1300 }
1301
MicroProfileRegisterGroup(const char * pGroup,const char * pCategory,uint32_t nColor)1302 void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor)
1303 {
1304 int nCategoryIndex = -1;
1305 for(uint32_t i = 0; i < S.nCategoryCount; ++i)
1306 {
1307 if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
1308 {
1309 nCategoryIndex = (int)i;
1310 break;
1311 }
1312 }
1313 if(-1 == nCategoryIndex && S.nCategoryCount < MICROPROFILE_MAX_CATEGORIES)
1314 {
1315 MP_ASSERT(S.CategoryInfo[S.nCategoryCount].pName[0] == '\0');
1316 nCategoryIndex = (int)S.nCategoryCount++;
1317 uint32_t nLen = (uint32_t)strlen(pCategory);
1318 if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
1319 nLen = MICROPROFILE_NAME_MAX_LEN-1;
1320 memcpy(&S.CategoryInfo[nCategoryIndex].pName[0], pCategory, nLen);
1321 S.CategoryInfo[nCategoryIndex].pName[nLen] = '\0';
1322 }
1323 uint16_t nGroup = MicroProfileGetGroup(pGroup, 0 != MP_STRCASECMP(pGroup, "gpu")?MicroProfileTokenTypeCpu : MicroProfileTokenTypeGpu);
1324 S.GroupInfo[nGroup].nColor = nColor;
1325 if(nCategoryIndex >= 0)
1326 {
1327 uint64_t nBit = 1ll << nGroup;
1328 uint32_t nOldCategory = S.GroupInfo[nGroup].nCategory;
1329 S.CategoryInfo[nOldCategory].nGroupMask &= ~nBit;
1330 S.CategoryInfo[nCategoryIndex].nGroupMask |= nBit;
1331 S.GroupInfo[nGroup].nCategory = nCategoryIndex;
1332 }
1333 }
1334
MicroProfileGetToken(const char * pGroup,const char * pName,uint32_t nColor,MicroProfileTokenType Type)1335 MicroProfileToken MicroProfileGetToken(const char* pGroup, const char* pName, uint32_t nColor, MicroProfileTokenType Type)
1336 {
1337 MicroProfileInit();
1338 MicroProfileScopeLock L(MicroProfileMutex());
1339 MicroProfileToken ret = MicroProfileFindToken(pGroup, pName);
1340 if(ret != MICROPROFILE_INVALID_TOKEN)
1341 return ret;
1342 uint16_t nGroupIndex = MicroProfileGetGroup(pGroup, Type);
1343 uint16_t nTimerIndex = (uint16_t)(S.nTotalTimers++);
1344 uint64_t nGroupMask = 1ll << nGroupIndex;
1345 MicroProfileToken nToken = MicroProfileMakeToken(nGroupMask, nTimerIndex);
1346 S.GroupInfo[nGroupIndex].nNumTimers++;
1347 S.GroupInfo[nGroupIndex].nMaxTimerNameLen = MicroProfileMax(S.GroupInfo[nGroupIndex].nMaxTimerNameLen, (uint32_t)strlen(pName));
1348 MP_ASSERT(S.GroupInfo[nGroupIndex].Type == Type); //dont mix cpu & gpu timers in the same group
1349 S.nMaxGroupSize = MicroProfileMax(S.nMaxGroupSize, S.GroupInfo[nGroupIndex].nNumTimers);
1350 S.TimerInfo[nTimerIndex].nToken = nToken;
1351 uint32_t nLen = (uint32_t)strlen(pName);
1352 if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
1353 nLen = MICROPROFILE_NAME_MAX_LEN-1;
1354 memcpy(&S.TimerInfo[nTimerIndex].pName, pName, nLen);
1355 S.TimerInfo[nTimerIndex].pName[nLen] = '\0';
1356 S.TimerInfo[nTimerIndex].nNameLen = nLen;
1357 S.TimerInfo[nTimerIndex].nColor = nColor&0xffffff;
1358 S.TimerInfo[nTimerIndex].nGroupIndex = nGroupIndex;
1359 S.TimerInfo[nTimerIndex].nTimerIndex = nTimerIndex;
1360 S.TimerToGroup[nTimerIndex] = nGroupIndex;
1361 return nToken;
1362 }
1363
MicroProfileGetMetaToken(const char * pName)1364 MicroProfileToken MicroProfileGetMetaToken(const char* pName)
1365 {
1366 MicroProfileInit();
1367 MicroProfileScopeLock L(MicroProfileMutex());
1368 for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i)
1369 {
1370 if(!S.MetaCounters[i].pName)
1371 {
1372 S.MetaCounters[i].pName = pName;
1373 return i;
1374 }
1375 else if(!MP_STRCASECMP(pName, S.MetaCounters[i].pName))
1376 {
1377 return i;
1378 }
1379 }
1380 MP_ASSERT(0);//out of slots, increase MICROPROFILE_META_MAX
1381 return (MicroProfileToken)-1;
1382 }
1383
1384
MicroProfileLogPut(MicroProfileToken nToken_,uint64_t nTick,uint64_t nBegin,MicroProfileThreadLog * pLog)1385 inline void MicroProfileLogPut(MicroProfileToken nToken_, uint64_t nTick, uint64_t nBegin, MicroProfileThreadLog* pLog)
1386 {
1387 MP_ASSERT(pLog != 0); //this assert is hit if MicroProfileOnCreateThread is not called
1388 MP_ASSERT(pLog->nActive);
1389 uint32_t nPos = pLog->nPut.load(std::memory_order_relaxed);
1390 uint32_t nNextPos = (nPos+1) % MICROPROFILE_BUFFER_SIZE;
1391 if(nNextPos == pLog->nGet.load(std::memory_order_relaxed))
1392 {
1393 S.nOverflow = 100;
1394 }
1395 else
1396 {
1397 pLog->Log[nPos] = MicroProfileMakeLogIndex(nBegin, nToken_, nTick);
1398 pLog->nPut.store(nNextPos, std::memory_order_release);
1399 }
1400 }
1401
MicroProfileEnter(MicroProfileToken nToken_)1402 uint64_t MicroProfileEnter(MicroProfileToken nToken_)
1403 {
1404 if(MicroProfileGetGroupMask(nToken_) & S.nActiveGroup)
1405 {
1406 if(!MicroProfileGetThreadLog())
1407 {
1408 MicroProfileInitThreadLog();
1409 }
1410 uint64_t nTick = MP_TICK();
1411 MicroProfileLogPut(nToken_, nTick, MP_LOG_ENTER, MicroProfileGetThreadLog());
1412 return nTick;
1413 }
1414 return MICROPROFILE_INVALID_TICK;
1415 }
1416
MicroProfileMetaUpdate(MicroProfileToken nToken,int nCount,MicroProfileTokenType eTokenType)1417 void MicroProfileMetaUpdate(MicroProfileToken nToken, int nCount, MicroProfileTokenType eTokenType)
1418 {
1419 if((MP_DRAW_META_FIRST<<nToken) & S.nActiveBars)
1420 {
1421 MicroProfileThreadLog* pLog = MicroProfileTokenTypeCpu == eTokenType ? MicroProfileGetThreadLog() : g_MicroProfileGpuLog;
1422 if(pLog)
1423 {
1424 MP_ASSERT(nToken < MICROPROFILE_META_MAX);
1425 MicroProfileLogPut(nToken, nCount, MP_LOG_META, pLog);
1426 }
1427 }
1428 }
1429
1430
MicroProfileLeave(MicroProfileToken nToken_,uint64_t nTickStart)1431 void MicroProfileLeave(MicroProfileToken nToken_, uint64_t nTickStart)
1432 {
1433 if(MICROPROFILE_INVALID_TICK != nTickStart)
1434 {
1435 if(!MicroProfileGetThreadLog())
1436 {
1437 MicroProfileInitThreadLog();
1438 }
1439 uint64_t nTick = MP_TICK();
1440 MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
1441 MicroProfileLogPut(nToken_, nTick, MP_LOG_LEAVE, pLog);
1442 }
1443 }
1444
1445
MicroProfileGpuEnter(MicroProfileToken nToken_)1446 uint64_t MicroProfileGpuEnter(MicroProfileToken nToken_)
1447 {
1448 if(MicroProfileGetGroupMask(nToken_) & S.nActiveGroup)
1449 {
1450 uint64_t nTimer = MicroProfileGpuInsertTimeStamp();
1451 MicroProfileLogPut(nToken_, nTimer, MP_LOG_ENTER, g_MicroProfileGpuLog);
1452 MicroProfileLogPut(nToken_, MP_TICK(), MP_LOG_GPU_EXTRA, g_MicroProfileGpuLog);
1453 return 1;
1454 }
1455 return 0;
1456 }
1457
MicroProfileGpuLeave(MicroProfileToken nToken_,uint64_t nTickStart)1458 void MicroProfileGpuLeave(MicroProfileToken nToken_, uint64_t nTickStart)
1459 {
1460 if(nTickStart)
1461 {
1462 uint64_t nTimer = MicroProfileGpuInsertTimeStamp();
1463 MicroProfileLogPut(nToken_, nTimer, MP_LOG_LEAVE, g_MicroProfileGpuLog);
1464 MicroProfileLogPut(nToken_, MP_TICK(), MP_LOG_GPU_EXTRA, g_MicroProfileGpuLog);
1465 }
1466 }
1467
MicroProfileContextSwitchPut(MicroProfileContextSwitch * pContextSwitch)1468 void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch)
1469 {
1470 if(S.nRunning || pContextSwitch->nTicks <= S.nPauseTicks)
1471 {
1472 uint32_t nPut = S.nContextSwitchPut;
1473 S.ContextSwitch[nPut] = *pContextSwitch;
1474 S.nContextSwitchPut = (S.nContextSwitchPut+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
1475 }
1476 }
1477
1478
MicroProfileGetRange(uint32_t nPut,uint32_t nGet,uint32_t nRange[2][2])1479 void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2])
1480 {
1481 if(nPut > nGet)
1482 {
1483 nRange[0][0] = nGet;
1484 nRange[0][1] = nPut;
1485 nRange[1][0] = nRange[1][1] = 0;
1486 }
1487 else if(nPut != nGet)
1488 {
1489 MP_ASSERT(nGet != MICROPROFILE_BUFFER_SIZE);
1490 uint32_t nCountEnd = MICROPROFILE_BUFFER_SIZE - nGet;
1491 nRange[0][0] = nGet;
1492 nRange[0][1] = nGet + nCountEnd;
1493 nRange[1][0] = 0;
1494 nRange[1][1] = nPut;
1495 }
1496 }
1497
MicroProfileFlip()1498 void MicroProfileFlip()
1499 {
1500 #if 0
1501 //verify LogEntry wraps correctly
1502 MicroProfileLogEntry c = MP_LOG_TICK_MASK-5000;
1503 for(int i = 0; i < 10000; ++i, c += 1)
1504 {
1505 MicroProfileLogEntry l2 = (c+2500) & MP_LOG_TICK_MASK;
1506 MP_ASSERT(2500 == MicroProfileLogTickDifference(c, l2));
1507 }
1508 #endif
1509 MICROPROFILE_SCOPE(g_MicroProfileFlip);
1510 std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1511
1512
1513 MicroProfileGpuFlip();
1514
1515 if(S.nToggleRunning)
1516 {
1517 S.nRunning = !S.nRunning;
1518 if(!S.nRunning)
1519 S.nPauseTicks = MP_TICK();
1520 S.nToggleRunning = 0;
1521 for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1522 {
1523 MicroProfileThreadLog* pLog = S.Pool[i];
1524 if(pLog)
1525 {
1526 pLog->nStackPos = 0;
1527 }
1528 }
1529 }
1530 uint32_t nAggregateClear = S.nAggregateClear || S.nAutoClearFrames, nAggregateFlip = 0;
1531 if(S.nDumpFileNextFrame)
1532 {
1533 MicroProfileDumpToFile();
1534 S.nDumpFileNextFrame = 0;
1535 S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage
1536 }
1537 if(S.nWebServerDataSent == (uint64_t)-1)
1538 {
1539 MicroProfileWebServerStart();
1540 S.nWebServerDataSent = 0;
1541 }
1542
1543 if(MicroProfileWebServerUpdate())
1544 {
1545 S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage
1546 }
1547
1548 if(S.nAutoClearFrames)
1549 {
1550 nAggregateClear = 1;
1551 nAggregateFlip = 1;
1552 S.nAutoClearFrames -= 1;
1553 }
1554
1555
1556 if(S.nRunning || S.nForceEnable)
1557 {
1558 S.nFramePutIndex++;
1559 S.nFramePut = (S.nFramePut+1) % MICROPROFILE_MAX_FRAME_HISTORY;
1560 MP_ASSERT((S.nFramePutIndex % MICROPROFILE_MAX_FRAME_HISTORY) == S.nFramePut);
1561 S.nFrameCurrent = (S.nFramePut + MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 1) % MICROPROFILE_MAX_FRAME_HISTORY;
1562 S.nFrameCurrentIndex++;
1563 uint32_t nFrameNext = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY;
1564
1565 uint32_t nContextSwitchPut = S.nContextSwitchPut;
1566 if(S.nContextSwitchLastPut < nContextSwitchPut)
1567 {
1568 S.nContextSwitchUsage = (nContextSwitchPut - S.nContextSwitchLastPut);
1569 }
1570 else
1571 {
1572 S.nContextSwitchUsage = MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - S.nContextSwitchLastPut + nContextSwitchPut;
1573 }
1574 S.nContextSwitchLastPut = nContextSwitchPut;
1575
1576 MicroProfileFrameState* pFramePut = &S.Frames[S.nFramePut];
1577 MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
1578 MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext];
1579
1580 pFramePut->nFrameStartCpu = MP_TICK();
1581 pFramePut->nFrameStartGpu = (uint32_t)MicroProfileGpuInsertTimeStamp();
1582 if(pFrameNext->nFrameStartGpu != (uint64_t)-1)
1583 pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu);
1584
1585 if(pFrameCurrent->nFrameStartGpu == (uint64_t)-1)
1586 pFrameCurrent->nFrameStartGpu = pFrameNext->nFrameStartGpu + 1;
1587
1588 uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu;
1589 uint64_t nFrameEndCpu = pFrameNext->nFrameStartCpu;
1590
1591 {
1592 uint64_t nTick = nFrameEndCpu - nFrameStartCpu;
1593 S.nFlipTicks = nTick;
1594 S.nFlipAggregate += nTick;
1595 S.nFlipMax = MicroProfileMax(S.nFlipMax, nTick);
1596 }
1597
1598 uint8_t* pTimerToGroup = &S.TimerToGroup[0];
1599 for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1600 {
1601 MicroProfileThreadLog* pLog = S.Pool[i];
1602 if(!pLog)
1603 {
1604 pFramePut->nLogStart[i] = 0;
1605 }
1606 else
1607 {
1608 uint32_t nPut = pLog->nPut.load(std::memory_order_acquire);
1609 pFramePut->nLogStart[i] = nPut;
1610 MP_ASSERT(nPut< MICROPROFILE_BUFFER_SIZE);
1611 //need to keep last frame around to close timers. timers more than 1 frame old is ditched.
1612 pLog->nGet.store(nPut, std::memory_order_relaxed);
1613 }
1614 }
1615
1616 if(S.nRunning)
1617 {
1618 uint64_t* pFrameGroup = &S.FrameGroup[0];
1619 {
1620 MICROPROFILE_SCOPE(g_MicroProfileClear);
1621 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1622 {
1623 S.Frame[i].nTicks = 0;
1624 S.Frame[i].nCount = 0;
1625 S.FrameExclusive[i] = 0;
1626 }
1627 for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
1628 {
1629 pFrameGroup[i] = 0;
1630 }
1631 for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j)
1632 {
1633 if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j)))
1634 {
1635 auto& Meta = S.MetaCounters[j];
1636 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1637 {
1638 Meta.nCounters[i] = 0;
1639 }
1640 }
1641 }
1642
1643 }
1644 {
1645 MICROPROFILE_SCOPE(g_MicroProfileThreadLoop);
1646 for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1647 {
1648 MicroProfileThreadLog* pLog = S.Pool[i];
1649 if(!pLog)
1650 continue;
1651
1652 uint8_t* pGroupStackPos = &pLog->nGroupStackPos[0];
1653 int64_t nGroupTicks[MICROPROFILE_MAX_GROUPS] = {0};
1654
1655
1656 uint32_t nPut = pFrameNext->nLogStart[i];
1657 uint32_t nGet = pFrameCurrent->nLogStart[i];
1658 uint32_t nRange[2][2] = { {0, 0}, {0, 0}, };
1659 MicroProfileGetRange(nPut, nGet, nRange);
1660
1661
1662 //fetch gpu results.
1663 if(pLog->nGpu)
1664 {
1665 for(uint32_t j = 0; j < 2; ++j)
1666 {
1667 uint32_t nStart = nRange[j][0];
1668 uint32_t nEnd = nRange[j][1];
1669 for(uint32_t k = nStart; k < nEnd; ++k)
1670 {
1671 MicroProfileLogEntry L = pLog->Log[k];
1672 if(MicroProfileLogType(L) < MP_LOG_META)
1673 {
1674 pLog->Log[k] = MicroProfileLogSetTick(L, MicroProfileGpuGetTimeStamp((uint32_t)MicroProfileLogGetTick(L)));
1675 }
1676 }
1677 }
1678 }
1679
1680
1681 uint32_t* pStack = &pLog->nStack[0];
1682 int64_t* pChildTickStack = &pLog->nChildTickStack[0];
1683 uint32_t nStackPos = pLog->nStackPos;
1684
1685 for(uint32_t j = 0; j < 2; ++j)
1686 {
1687 uint32_t nStart = nRange[j][0];
1688 uint32_t nEnd = nRange[j][1];
1689 for(uint32_t k = nStart; k < nEnd; ++k)
1690 {
1691 MicroProfileLogEntry LE = pLog->Log[k];
1692 int nType = MicroProfileLogType(LE);
1693
1694 if(MP_LOG_ENTER == nType)
1695 {
1696 int nTimer = MicroProfileLogTimerIndex(LE);
1697 uint8_t nGroup = pTimerToGroup[nTimer];
1698 MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
1699 MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
1700 pGroupStackPos[nGroup]++;
1701 pStack[nStackPos++] = k;
1702 pChildTickStack[nStackPos] = 0;
1703
1704 }
1705 else if(MP_LOG_META == nType)
1706 {
1707 if(nStackPos)
1708 {
1709 int64_t nMetaIndex = MicroProfileLogTimerIndex(LE);
1710 int64_t nMetaCount = MicroProfileLogGetTick(LE);
1711 MP_ASSERT(nMetaIndex < MICROPROFILE_META_MAX);
1712 int64_t nCounter = MicroProfileLogTimerIndex(pLog->Log[pStack[nStackPos-1]]);
1713 S.MetaCounters[nMetaIndex].nCounters[nCounter] += nMetaCount;
1714 }
1715 }
1716 else if(MP_LOG_LEAVE == nType)
1717 {
1718 int nTimer = MicroProfileLogTimerIndex(LE);
1719 uint8_t nGroup = pTimerToGroup[nTimer];
1720 MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
1721 if(nStackPos)
1722 {
1723 int64_t nTickStart = pLog->Log[pStack[nStackPos-1]];
1724 int64_t nTicks = MicroProfileLogTickDifference(nTickStart, LE);
1725 int64_t nChildTicks = pChildTickStack[nStackPos];
1726 nStackPos--;
1727 pChildTickStack[nStackPos] += nTicks;
1728
1729 uint32_t nTimerIndex = MicroProfileLogTimerIndex(LE);
1730 S.Frame[nTimerIndex].nTicks += nTicks;
1731 S.FrameExclusive[nTimerIndex] += (nTicks-nChildTicks);
1732 S.Frame[nTimerIndex].nCount += 1;
1733
1734 MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
1735 uint8_t nGroupStackPos = pGroupStackPos[nGroup];
1736 if(nGroupStackPos)
1737 {
1738 nGroupStackPos--;
1739 if(0 == nGroupStackPos)
1740 {
1741 nGroupTicks[nGroup] += nTicks;
1742 }
1743 pGroupStackPos[nGroup] = nGroupStackPos;
1744 }
1745 }
1746 }
1747 }
1748 }
1749 for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
1750 {
1751 pLog->nGroupTicks[i] += nGroupTicks[i];
1752 pFrameGroup[i] += nGroupTicks[i];
1753 }
1754 pLog->nStackPos = nStackPos;
1755 }
1756 }
1757 {
1758 MICROPROFILE_SCOPE(g_MicroProfileAccumulate);
1759 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1760 {
1761 S.AccumTimers[i].nTicks += S.Frame[i].nTicks;
1762 S.AccumTimers[i].nCount += S.Frame[i].nCount;
1763 S.AccumMaxTimers[i] = MicroProfileMax(S.AccumMaxTimers[i], S.Frame[i].nTicks);
1764 S.AccumTimersExclusive[i] += S.FrameExclusive[i];
1765 S.AccumMaxTimersExclusive[i] = MicroProfileMax(S.AccumMaxTimersExclusive[i], S.FrameExclusive[i]);
1766 }
1767
1768 for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
1769 {
1770 S.AccumGroup[i] += pFrameGroup[i];
1771 S.AccumGroupMax[i] = MicroProfileMax(S.AccumGroupMax[i], pFrameGroup[i]);
1772 }
1773
1774 for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j)
1775 {
1776 if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j)))
1777 {
1778 auto& Meta = S.MetaCounters[j];
1779 uint64_t nSum = 0;;
1780 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1781 {
1782 uint64_t nCounter = Meta.nCounters[i];
1783 Meta.nAccumMax[i] = MicroProfileMax(Meta.nAccumMax[i], nCounter);
1784 Meta.nAccum[i] += nCounter;
1785 nSum += nCounter;
1786 }
1787 Meta.nSumAccum += nSum;
1788 Meta.nSumAccumMax = MicroProfileMax(Meta.nSumAccumMax, nSum);
1789 }
1790 }
1791 }
1792 for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
1793 {
1794 if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN)
1795 {
1796 MicroProfileToken nToken = S.Graph[i].nToken;
1797 S.Graph[i].nHistory[S.nGraphPut] = S.Frame[MicroProfileGetTimerIndex(nToken)].nTicks;
1798 }
1799 }
1800 S.nGraphPut = (S.nGraphPut+1) % MICROPROFILE_GRAPH_HISTORY;
1801
1802 }
1803
1804
1805 if(S.nRunning && S.nAggregateFlip <= ++S.nAggregateFlipCount)
1806 {
1807 nAggregateFlip = 1;
1808 if(S.nAggregateFlip) // if 0 accumulate indefinitely
1809 {
1810 nAggregateClear = 1;
1811 }
1812 }
1813 }
1814 if(nAggregateFlip)
1815 {
1816 memcpy(&S.Aggregate[0], &S.AccumTimers[0], sizeof(S.Aggregate[0]) * S.nTotalTimers);
1817 memcpy(&S.AggregateMax[0], &S.AccumMaxTimers[0], sizeof(S.AggregateMax[0]) * S.nTotalTimers);
1818 memcpy(&S.AggregateExclusive[0], &S.AccumTimersExclusive[0], sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
1819 memcpy(&S.AggregateMaxExclusive[0], &S.AccumMaxTimersExclusive[0], sizeof(S.AggregateMaxExclusive[0]) * S.nTotalTimers);
1820
1821 memcpy(&S.AggregateGroup[0], &S.AccumGroup[0], sizeof(S.AggregateGroup));
1822 memcpy(&S.AggregateGroupMax[0], &S.AccumGroupMax[0], sizeof(S.AggregateGroup));
1823
1824 for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1825 {
1826 MicroProfileThreadLog* pLog = S.Pool[i];
1827 if(!pLog)
1828 continue;
1829
1830 memcpy(&pLog->nAggregateGroupTicks[0], &pLog->nGroupTicks[0], sizeof(pLog->nAggregateGroupTicks));
1831
1832 if(nAggregateClear)
1833 {
1834 memset(&pLog->nGroupTicks[0], 0, sizeof(pLog->nGroupTicks));
1835 }
1836 }
1837
1838 for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j)
1839 {
1840 if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j)))
1841 {
1842 auto& Meta = S.MetaCounters[j];
1843 memcpy(&Meta.nAggregateMax[0], &Meta.nAccumMax[0], sizeof(Meta.nAggregateMax[0]) * S.nTotalTimers);
1844 memcpy(&Meta.nAggregate[0], &Meta.nAccum[0], sizeof(Meta.nAggregate[0]) * S.nTotalTimers);
1845 Meta.nSumAggregate = Meta.nSumAccum;
1846 Meta.nSumAggregateMax = Meta.nSumAccumMax;
1847 if(nAggregateClear)
1848 {
1849 memset(&Meta.nAccumMax[0], 0, sizeof(Meta.nAccumMax[0]) * S.nTotalTimers);
1850 memset(&Meta.nAccum[0], 0, sizeof(Meta.nAccum[0]) * S.nTotalTimers);
1851 Meta.nSumAccum = 0;
1852 Meta.nSumAccumMax = 0;
1853 }
1854 }
1855 }
1856
1857
1858
1859
1860
1861 S.nAggregateFrames = S.nAggregateFlipCount;
1862 S.nFlipAggregateDisplay = S.nFlipAggregate;
1863 S.nFlipMaxDisplay = S.nFlipMax;
1864 if(nAggregateClear)
1865 {
1866 memset(&S.AccumTimers[0], 0, sizeof(S.Aggregate[0]) * S.nTotalTimers);
1867 memset(&S.AccumMaxTimers[0], 0, sizeof(S.AccumMaxTimers[0]) * S.nTotalTimers);
1868 memset(&S.AccumTimersExclusive[0], 0, sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
1869 memset(&S.AccumMaxTimersExclusive[0], 0, sizeof(S.AccumMaxTimersExclusive[0]) * S.nTotalTimers);
1870 memset(&S.AccumGroup[0], 0, sizeof(S.AggregateGroup));
1871 memset(&S.AccumGroupMax[0], 0, sizeof(S.AggregateGroup));
1872
1873 S.nAggregateFlipCount = 0;
1874 S.nFlipAggregate = 0;
1875 S.nFlipMax = 0;
1876
1877 S.nAggregateFlipTick = MP_TICK();
1878 }
1879 }
1880 S.nAggregateClear = 0;
1881
1882 uint64_t nNewActiveGroup = 0;
1883 if(S.nForceEnable || (S.nDisplay && S.nRunning))
1884 nNewActiveGroup = S.nAllGroupsWanted ? S.nGroupMask : S.nActiveGroupWanted;
1885 nNewActiveGroup |= S.nForceGroup;
1886 nNewActiveGroup |= S.nForceGroupUI;
1887 if(S.nActiveGroup != nNewActiveGroup)
1888 S.nActiveGroup = nNewActiveGroup;
1889 uint32_t nNewActiveBars = 0;
1890 if(S.nDisplay && S.nRunning)
1891 nNewActiveBars = S.nBars;
1892 if(S.nForceMetaCounters)
1893 {
1894 for(int i = 0; i < MICROPROFILE_META_MAX; ++i)
1895 {
1896 if(S.MetaCounters[i].pName)
1897 {
1898 nNewActiveBars |= (MP_DRAW_META_FIRST<<i);
1899 }
1900 }
1901 }
1902 if(nNewActiveBars != S.nActiveBars)
1903 S.nActiveBars = nNewActiveBars;
1904 }
1905
MicroProfileSetForceEnable(bool bEnable)1906 void MicroProfileSetForceEnable(bool bEnable)
1907 {
1908 S.nForceEnable = bEnable ? 1 : 0;
1909 }
MicroProfileGetForceEnable()1910 bool MicroProfileGetForceEnable()
1911 {
1912 return S.nForceEnable != 0;
1913 }
1914
MicroProfileSetEnableAllGroups(bool bEnableAllGroups)1915 void MicroProfileSetEnableAllGroups(bool bEnableAllGroups)
1916 {
1917 S.nAllGroupsWanted = bEnableAllGroups ? 1 : 0;
1918 }
1919
MicroProfileEnableCategory(const char * pCategory,bool bEnabled)1920 void MicroProfileEnableCategory(const char* pCategory, bool bEnabled)
1921 {
1922 int nCategoryIndex = -1;
1923 for(uint32_t i = 0; i < S.nCategoryCount; ++i)
1924 {
1925 if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
1926 {
1927 nCategoryIndex = (int)i;
1928 break;
1929 }
1930 }
1931 if(nCategoryIndex >= 0)
1932 {
1933 if(bEnabled)
1934 {
1935 S.nActiveGroupWanted |= S.CategoryInfo[nCategoryIndex].nGroupMask;
1936 }
1937 else
1938 {
1939 S.nActiveGroupWanted &= ~S.CategoryInfo[nCategoryIndex].nGroupMask;
1940 }
1941 }
1942 }
1943
1944
MicroProfileEnableCategory(const char * pCategory)1945 void MicroProfileEnableCategory(const char* pCategory)
1946 {
1947 MicroProfileEnableCategory(pCategory, true);
1948 }
MicroProfileDisableCategory(const char * pCategory)1949 void MicroProfileDisableCategory(const char* pCategory)
1950 {
1951 MicroProfileEnableCategory(pCategory, false);
1952 }
1953
MicroProfileGetEnableAllGroups()1954 bool MicroProfileGetEnableAllGroups()
1955 {
1956 return 0 != S.nAllGroupsWanted;
1957 }
1958
MicroProfileSetForceMetaCounters(bool bForce)1959 void MicroProfileSetForceMetaCounters(bool bForce)
1960 {
1961 S.nForceMetaCounters = bForce ? 1 : 0;
1962 }
1963
MicroProfileGetForceMetaCounters()1964 bool MicroProfileGetForceMetaCounters()
1965 {
1966 return 0 != S.nForceMetaCounters;
1967 }
1968
MicroProfileEnableMetaCounter(const char * pMeta)1969 void MicroProfileEnableMetaCounter(const char* pMeta)
1970 {
1971 for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i)
1972 {
1973 if(S.MetaCounters[i].pName && 0 == MP_STRCASECMP(S.MetaCounters[i].pName, pMeta))
1974 {
1975 S.nBars |= (MP_DRAW_META_FIRST<<i);
1976 return;
1977 }
1978 }
1979 }
MicroProfileDisableMetaCounter(const char * pMeta)1980 void MicroProfileDisableMetaCounter(const char* pMeta)
1981 {
1982 for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i)
1983 {
1984 if(S.MetaCounters[i].pName && 0 == MP_STRCASECMP(S.MetaCounters[i].pName, pMeta))
1985 {
1986 S.nBars &= ~(MP_DRAW_META_FIRST<<i);
1987 return;
1988 }
1989 }
1990 }
1991
1992
MicroProfileSetAggregateFrames(int nFrames)1993 void MicroProfileSetAggregateFrames(int nFrames)
1994 {
1995 S.nAggregateFlip = (uint32_t)nFrames;
1996 if(0 == nFrames)
1997 {
1998 S.nAggregateClear = 1;
1999 }
2000 }
2001
MicroProfileGetAggregateFrames()2002 int MicroProfileGetAggregateFrames()
2003 {
2004 return S.nAggregateFlip;
2005 }
2006
MicroProfileGetCurrentAggregateFrames()2007 int MicroProfileGetCurrentAggregateFrames()
2008 {
2009 return int(S.nAggregateFlip ? S.nAggregateFlip : S.nAggregateFlipCount);
2010 }
2011
2012
MicroProfileForceEnableGroup(const char * pGroup,MicroProfileTokenType Type)2013 void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type)
2014 {
2015 MicroProfileInit();
2016 std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2017 uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
2018 S.nForceGroup |= (1ll << nGroup);
2019 }
2020
MicroProfileForceDisableGroup(const char * pGroup,MicroProfileTokenType Type)2021 void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type)
2022 {
2023 MicroProfileInit();
2024 std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2025 uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
2026 S.nForceGroup &= ~(1ll << nGroup);
2027 }
2028
2029
MicroProfileCalcAllTimers(float * pTimers,float * pAverage,float * pMax,float * pCallAverage,float * pExclusive,float * pAverageExclusive,float * pMaxExclusive,float * pTotal,uint32_t nSize)2030 void MicroProfileCalcAllTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize)
2031 {
2032 for(uint32_t i = 0; i < S.nTotalTimers && i < nSize; ++i)
2033 {
2034 const uint32_t nGroupId = S.TimerInfo[i].nGroupIndex;
2035 const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
2036 uint32_t nTimer = i;
2037 uint32_t nIdx = i * 2;
2038 uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
2039 uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1;
2040 float fToPrc = S.fRcpReferenceTime;
2041 float fMs = fToMs * (S.Frame[nTimer].nTicks);
2042 float fPrc = MicroProfileMin(fMs * fToPrc, 1.f);
2043 float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames);
2044 float fAveragePrc = MicroProfileMin(fAverageMs * fToPrc, 1.f);
2045 float fMaxMs = fToMs * (S.AggregateMax[nTimer]);
2046 float fMaxPrc = MicroProfileMin(fMaxMs * fToPrc, 1.f);
2047 float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount);
2048 float fCallAveragePrc = MicroProfileMin(fCallAverageMs * fToPrc, 1.f);
2049 float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]);
2050 float fPrcExclusive = MicroProfileMin(fMsExclusive * fToPrc, 1.f);
2051 float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames);
2052 float fAveragePrcExclusive = MicroProfileMin(fAverageMsExclusive * fToPrc, 1.f);
2053 float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]);
2054 float fMaxPrcExclusive = MicroProfileMin(fMaxMsExclusive * fToPrc, 1.f);
2055 float fTotalMs = fToMs * S.Aggregate[nTimer].nTicks;
2056 pTimers[nIdx] = fMs;
2057 pTimers[nIdx+1] = fPrc;
2058 pAverage[nIdx] = fAverageMs;
2059 pAverage[nIdx+1] = fAveragePrc;
2060 pMax[nIdx] = fMaxMs;
2061 pMax[nIdx+1] = fMaxPrc;
2062 pCallAverage[nIdx] = fCallAverageMs;
2063 pCallAverage[nIdx+1] = fCallAveragePrc;
2064 pExclusive[nIdx] = fMsExclusive;
2065 pExclusive[nIdx+1] = fPrcExclusive;
2066 pAverageExclusive[nIdx] = fAverageMsExclusive;
2067 pAverageExclusive[nIdx+1] = fAveragePrcExclusive;
2068 pMaxExclusive[nIdx] = fMaxMsExclusive;
2069 pMaxExclusive[nIdx+1] = fMaxPrcExclusive;
2070 pTotal[nIdx] = fTotalMs;
2071 pTotal[nIdx+1] = 0.f;
2072 }
2073 }
2074
MicroProfileTogglePause()2075 void MicroProfileTogglePause()
2076 {
2077 S.nToggleRunning = 1;
2078 }
2079
MicroProfileGetTime(const char * pGroup,const char * pName)2080 float MicroProfileGetTime(const char* pGroup, const char* pName)
2081 {
2082 MicroProfileToken nToken = MicroProfileFindToken(pGroup, pName);
2083 if(nToken == MICROPROFILE_INVALID_TOKEN)
2084 {
2085 return 0.f;
2086 }
2087 uint32_t nTimerIndex = MicroProfileGetTimerIndex(nToken);
2088 uint32_t nGroupIndex = MicroProfileGetGroupIndex(nToken);
2089 float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
2090 return S.Frame[nTimerIndex].nTicks * fToMs;
2091 }
2092
2093
MicroProfileContextSwitchSearch(uint32_t * pContextSwitchStart,uint32_t * pContextSwitchEnd,uint64_t nBaseTicksCpu,uint64_t nBaseTicksEndCpu)2094 void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu)
2095 {
2096 MICROPROFILE_SCOPE(g_MicroProfileContextSwitchSearch);
2097 uint32_t nContextSwitchPut = S.nContextSwitchPut;
2098 uint64_t nContextSwitchStart, nContextSwitchEnd;
2099 nContextSwitchStart = nContextSwitchEnd = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
2100 int64_t nSearchEnd = nBaseTicksEndCpu + MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
2101 int64_t nSearchBegin = nBaseTicksCpu - MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
2102 for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; ++i)
2103 {
2104 uint32_t nIndex = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - (i+1)) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
2105 MicroProfileContextSwitch& CS = S.ContextSwitch[nIndex];
2106 if(CS.nTicks > nSearchEnd)
2107 {
2108 nContextSwitchEnd = nIndex;
2109 }
2110 if(CS.nTicks > nSearchBegin)
2111 {
2112 nContextSwitchStart = nIndex;
2113 }
2114 }
2115 *pContextSwitchStart = nContextSwitchStart;
2116 *pContextSwitchEnd = nContextSwitchEnd;
2117 }
2118
2119
2120
2121 #if MICROPROFILE_WEBSERVER
2122
2123 #define MICROPROFILE_EMBED_HTML
2124
2125 extern const char* g_MicroProfileHtml_begin[];
2126 extern size_t g_MicroProfileHtml_begin_sizes[];
2127 extern size_t g_MicroProfileHtml_begin_count;
2128 extern const char* g_MicroProfileHtml_end[];
2129 extern size_t g_MicroProfileHtml_end_sizes[];
2130 extern size_t g_MicroProfileHtml_end_count;
2131
2132 typedef void MicroProfileWriteCallback(void* Handle, size_t size, const char* pData);
2133
MicroProfileWebServerPort()2134 uint32_t MicroProfileWebServerPort()
2135 {
2136 return S.nWebServerPort;
2137 }
2138
MicroProfileDumpFile(const char * pHtml,const char * pCsv)2139 void MicroProfileDumpFile(const char* pHtml, const char* pCsv)
2140 {
2141 S.nDumpFileNextFrame = 0;
2142 if(pHtml)
2143 {
2144 uint32_t nLen = strlen(pHtml);
2145 if(nLen > sizeof(S.HtmlDumpPath)-1)
2146 {
2147 return;
2148 }
2149 memcpy(S.HtmlDumpPath, pHtml, nLen+1);
2150 S.nDumpFileNextFrame |= 1;
2151 }
2152 if(pCsv)
2153 {
2154 uint32_t nLen = strlen(pCsv);
2155 if(nLen > sizeof(S.CsvDumpPath)-1)
2156 {
2157 return;
2158 }
2159 memcpy(S.CsvDumpPath, pCsv, nLen+1);
2160 S.nDumpFileNextFrame |= 2;
2161 }
2162 }
2163
MicroProfilePrintf(MicroProfileWriteCallback CB,void * Handle,const char * pFmt,...)2164 void MicroProfilePrintf(MicroProfileWriteCallback CB, void* Handle, const char* pFmt, ...)
2165 {
2166 char buffer[32*1024];
2167 va_list args;
2168 va_start (args, pFmt);
2169 #ifdef _WIN32
2170 size_t size = vsprintf_s(buffer, pFmt, args);
2171 #else
2172 size_t size = vsnprintf(buffer, sizeof(buffer)-1, pFmt, args);
2173 #endif
2174 CB(Handle, size, &buffer[0]);
2175 va_end (args);
2176 }
2177
2178 #define printf(...) MicroProfilePrintf(CB, Handle, __VA_ARGS__)
MicroProfileDumpCsv(MicroProfileWriteCallback CB,void * Handle,int nMaxFrames)2179 void MicroProfileDumpCsv(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames)
2180 {
2181 uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
2182 float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
2183 float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
2184
2185 printf("frames,%d\n", nAggregateFrames);
2186 printf("group,name,average,max,callaverage\n");
2187
2188 uint32_t nNumTimers = S.nTotalTimers;
2189 uint32_t nBlockSize = 2 * nNumTimers;
2190 float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float));
2191 float* pAverage = pTimers + nBlockSize;
2192 float* pMax = pTimers + 2 * nBlockSize;
2193 float* pCallAverage = pTimers + 3 * nBlockSize;
2194 float* pTimersExclusive = pTimers + 4 * nBlockSize;
2195 float* pAverageExclusive = pTimers + 5 * nBlockSize;
2196 float* pMaxExclusive = pTimers + 6 * nBlockSize;
2197 float* pTotal = pTimers + 7 * nBlockSize;
2198
2199 MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
2200
2201 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2202 {
2203 uint32_t nIdx = i * 2;
2204 printf("\"%s\",\"%s\",%f,%f,%f\n", S.TimerInfo[i].pName, S.GroupInfo[S.TimerInfo[i].nGroupIndex].pName, pAverage[nIdx], pMax[nIdx], pCallAverage[nIdx]);
2205 }
2206
2207 printf("\n\n");
2208
2209 printf("group,average,max,total\n");
2210 for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
2211 {
2212 const char* pGroupName = S.GroupInfo[j].pName;
2213 float fToMs = S.GroupInfo[j].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
2214 if(pGroupName[0] != '\0')
2215 {
2216 printf("\"%s\",%.3f,%.3f,%.3f\n", pGroupName, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j]);
2217 }
2218 }
2219
2220 printf("\n\n");
2221 printf("group,thread,average,total\n");
2222 for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
2223 {
2224 for(uint32_t i = 0; i < S.nNumLogs; ++i)
2225 {
2226 if(S.Pool[i])
2227 {
2228 const char* pThreadName = &S.Pool[i]->ThreadName[0];
2229 // MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i);
2230 float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
2231 {
2232 uint64_t nTicks = S.Pool[i]->nAggregateGroupTicks[j];
2233 float fTime = nTicks / nAggregateFrames * fToMs;
2234 float fTimeTotal = nTicks * fToMs;
2235 if(fTimeTotal > 0.01f)
2236 {
2237 const char* pGroupName = S.GroupInfo[j].pName;
2238 printf("\"%s\",\"%s\",%.3f,%.3f\n", pGroupName, pThreadName, fTime, fTimeTotal);
2239 }
2240 }
2241 }
2242 }
2243 }
2244
2245 printf("\n\n");
2246 printf("frametimecpu\n");
2247
2248 const uint32_t nCount = MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3;
2249 const uint32_t nStart = S.nFrameCurrent;
2250 for(uint32_t i = nCount; i > 0; i--)
2251 {
2252 uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
2253 uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
2254 uint64_t nTicks = S.Frames[nFrameNext].nFrameStartCpu - S.Frames[nFrame].nFrameStartCpu;
2255 printf("%f,", nTicks * fToMsCPU);
2256 }
2257 printf("\n");
2258
2259 printf("\n\n");
2260 printf("frametimegpu\n");
2261
2262 for(uint32_t i = nCount; i > 0; i--)
2263 {
2264 uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
2265 uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
2266 uint64_t nTicks = S.Frames[nFrameNext].nFrameStartGpu - S.Frames[nFrame].nFrameStartGpu;
2267 printf("%f,", nTicks * fToMsGPU);
2268 }
2269 printf("\n\n");
2270 printf("Meta\n");//only single frame snapshot
2271 printf("name,average,max,total\n");
2272 for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
2273 {
2274 if(S.MetaCounters[j].pName)
2275 {
2276 printf("\"%s\",%f,%lld,%lld\n",S.MetaCounters[j].pName, S.MetaCounters[j].nSumAggregate / (float)nAggregateFrames, S.MetaCounters[j].nSumAggregateMax,S.MetaCounters[j].nSumAggregate);
2277 }
2278 }
2279 }
2280 #undef printf
2281
MicroProfileDumpHtml(MicroProfileWriteCallback CB,void * Handle,int nMaxFrames,const char * pHost)2282 void MicroProfileDumpHtml(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames, const char* pHost)
2283 {
2284 uint32_t nRunning = S.nRunning;
2285 S.nRunning = 0;
2286 //stall pushing of timers
2287 uint64_t nActiveGroup = S.nActiveGroup;
2288 S.nActiveGroup = 0;
2289 S.nPauseTicks = MP_TICK();
2290
2291
2292 for(size_t i = 0; i < g_MicroProfileHtml_begin_count; ++i)
2293 {
2294 CB(Handle, g_MicroProfileHtml_begin_sizes[i]-1, g_MicroProfileHtml_begin[i]);
2295 }
2296 //dump info
2297 uint64_t nTicks = MP_TICK();
2298
2299 float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
2300 float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
2301 float fAggregateMs = fToMsCPU * (nTicks - S.nAggregateFlipTick);
2302 MicroProfilePrintf(CB, Handle, "var DumpHost = '%s';\n", pHost ? pHost : "");
2303 time_t CaptureTime;
2304 time(&CaptureTime);
2305 MicroProfilePrintf(CB, Handle, "var DumpUtcCaptureTime = %ld;\n", CaptureTime);
2306 MicroProfilePrintf(CB, Handle, "var AggregateInfo = {'Frames':%d, 'Time':%f};\n", S.nAggregateFrames, fAggregateMs);
2307
2308 //categories
2309 MicroProfilePrintf(CB, Handle, "var CategoryInfo = Array(%d);\n",S.nCategoryCount);
2310 for(uint32_t i = 0; i < S.nCategoryCount; ++i)
2311 {
2312 MicroProfilePrintf(CB, Handle, "CategoryInfo[%d] = \"%s\";\n", i, S.CategoryInfo[i].pName);
2313 }
2314
2315 //groups
2316 MicroProfilePrintf(CB, Handle, "var GroupInfo = Array(%d);\n\n",S.nGroupCount);
2317 uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
2318 float fRcpAggregateFrames = 1.f / nAggregateFrames;
2319 for(uint32_t i = 0; i < S.nGroupCount; ++i)
2320 {
2321 MP_ASSERT(i == S.GroupInfo[i].nGroupIndex);
2322 float fToMs = S.GroupInfo[i].Type == MicroProfileTokenTypeCpu ? fToMsCPU : fToMsGPU;
2323 MicroProfilePrintf(CB, Handle, "GroupInfo[%d] = MakeGroup(%d, \"%s\", %d, %d, %d, %f, %f, %f, '#%02x%02x%02x');\n",
2324 S.GroupInfo[i].nGroupIndex,
2325 S.GroupInfo[i].nGroupIndex,
2326 S.GroupInfo[i].pName,
2327 S.GroupInfo[i].nCategory,
2328 S.GroupInfo[i].nNumTimers,
2329 S.GroupInfo[i].Type == MicroProfileTokenTypeGpu?1:0,
2330 fToMs * S.AggregateGroup[i],
2331 fToMs * S.AggregateGroup[i] / nAggregateFrames,
2332 fToMs * S.AggregateGroupMax[i],
2333 MICROPROFILE_UNPACK_RED(S.GroupInfo[i].nColor) & 0xff,
2334 MICROPROFILE_UNPACK_GREEN(S.GroupInfo[i].nColor) & 0xff,
2335 MICROPROFILE_UNPACK_BLUE(S.GroupInfo[i].nColor) & 0xff);
2336 }
2337 //timers
2338
2339 uint32_t nNumTimers = S.nTotalTimers;
2340 uint32_t nBlockSize = 2 * nNumTimers;
2341 float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float));
2342 float* pAverage = pTimers + nBlockSize;
2343 float* pMax = pTimers + 2 * nBlockSize;
2344 float* pCallAverage = pTimers + 3 * nBlockSize;
2345 float* pTimersExclusive = pTimers + 4 * nBlockSize;
2346 float* pAverageExclusive = pTimers + 5 * nBlockSize;
2347 float* pMaxExclusive = pTimers + 6 * nBlockSize;
2348 float* pTotal = pTimers + 7 * nBlockSize;
2349
2350 MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
2351
2352 MicroProfilePrintf(CB, Handle, "\nvar TimerInfo = Array(%d);\n\n", S.nTotalTimers);
2353 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2354 {
2355 uint32_t nIdx = i * 2;
2356 MP_ASSERT(i == S.TimerInfo[i].nTimerIndex);
2357 MicroProfilePrintf(CB, Handle, "var Meta%d = [", i);
2358 bool bOnce = true;
2359 for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
2360 {
2361 if(S.MetaCounters[j].pName)
2362 {
2363 uint32_t lala = S.MetaCounters[j].nCounters[i];
2364 MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", lala);
2365 bOnce = false;
2366 }
2367 }
2368 MicroProfilePrintf(CB, Handle, "];\n");
2369 MicroProfilePrintf(CB, Handle, "var MetaAvg%d = [", i);
2370 bOnce = true;
2371 for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
2372 {
2373 if(S.MetaCounters[j].pName)
2374 {
2375 MicroProfilePrintf(CB, Handle, bOnce ? "%f" : ",%f", fRcpAggregateFrames * S.MetaCounters[j].nAggregate[i]);
2376 bOnce = false;
2377 }
2378 }
2379 MicroProfilePrintf(CB, Handle, "];\n");
2380 MicroProfilePrintf(CB, Handle, "var MetaMax%d = [", i);
2381 bOnce = true;
2382 for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
2383 {
2384 if(S.MetaCounters[j].pName)
2385 {
2386 MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", S.MetaCounters[j].nAggregateMax[i]);
2387 bOnce = false;
2388 }
2389 }
2390 MicroProfilePrintf(CB, Handle, "];\n");
2391
2392
2393 uint32_t nColor = S.TimerInfo[i].nColor;
2394 uint32_t nColorDark = (nColor >> 1) & ~0x80808080;
2395 MicroProfilePrintf(CB, Handle, "TimerInfo[%d] = MakeTimer(%d, \"%s\", %d, '#%02x%02x%02x','#%02x%02x%02x', %f, %f, %f, %f, %f, %d, %f, Meta%d, MetaAvg%d, MetaMax%d);\n", S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].pName, S.TimerInfo[i].nGroupIndex,
2396 MICROPROFILE_UNPACK_RED(nColor) & 0xff,
2397 MICROPROFILE_UNPACK_GREEN(nColor) & 0xff,
2398 MICROPROFILE_UNPACK_BLUE(nColor) & 0xff,
2399 MICROPROFILE_UNPACK_RED(nColorDark) & 0xff,
2400 MICROPROFILE_UNPACK_GREEN(nColorDark) & 0xff,
2401 MICROPROFILE_UNPACK_BLUE(nColorDark) & 0xff,
2402 pAverage[nIdx],
2403 pMax[nIdx],
2404 pAverageExclusive[nIdx],
2405 pMaxExclusive[nIdx],
2406 pCallAverage[nIdx],
2407 S.Aggregate[i].nCount,
2408 pTotal[nIdx],
2409 i,i,i);
2410
2411 }
2412
2413 MicroProfilePrintf(CB, Handle, "\nvar ThreadNames = [");
2414 for(uint32_t i = 0; i < S.nNumLogs; ++i)
2415 {
2416 if(S.Pool[i])
2417 {
2418 MicroProfilePrintf(CB, Handle, "'%s',", S.Pool[i]->ThreadName);
2419 }
2420 else
2421 {
2422 MicroProfilePrintf(CB, Handle, "'Thread %d',", i);
2423 }
2424 }
2425 MicroProfilePrintf(CB, Handle, "];\n\n");
2426
2427
2428 for(uint32_t i = 0; i < S.nNumLogs; ++i)
2429 {
2430 if(S.Pool[i])
2431 {
2432 MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i);
2433 float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
2434 for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
2435 {
2436 MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j]/nAggregateFrames * fToMs);
2437 }
2438 MicroProfilePrintf(CB, Handle, "];\n");
2439 }
2440 }
2441 MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeArray = [");
2442 for(uint32_t i = 0; i < S.nNumLogs; ++i)
2443 {
2444 if(S.Pool[i])
2445 {
2446 MicroProfilePrintf(CB, Handle, "ThreadGroupTime%d,", i);
2447 }
2448 }
2449 MicroProfilePrintf(CB, Handle, "];\n");
2450
2451
2452 for(uint32_t i = 0; i < S.nNumLogs; ++i)
2453 {
2454 if(S.Pool[i])
2455 {
2456 MicroProfilePrintf(CB, Handle, "var ThreadGroupTimeTotal%d = [", i);
2457 float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
2458 for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
2459 {
2460 MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j] * fToMs);
2461 }
2462 MicroProfilePrintf(CB, Handle, "];\n");
2463 }
2464 }
2465 MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeTotalArray = [");
2466 for(uint32_t i = 0; i < S.nNumLogs; ++i)
2467 {
2468 if(S.Pool[i])
2469 {
2470 MicroProfilePrintf(CB, Handle, "ThreadGroupTimeTotal%d,", i);
2471 }
2472 }
2473 MicroProfilePrintf(CB, Handle, "];");
2474
2475
2476
2477
2478 MicroProfilePrintf(CB, Handle, "\nvar ThreadIds = [");
2479 for(uint32_t i = 0; i < S.nNumLogs; ++i)
2480 {
2481 if(S.Pool[i])
2482 {
2483 ThreadIdType ThreadId = S.Pool[i]->nThreadId;
2484 if(!ThreadId)
2485 {
2486 ThreadId = (ThreadIdType)-1;
2487 }
2488 MicroProfilePrintf(CB, Handle, "%d,", ThreadId);
2489 }
2490 else
2491 {
2492 MicroProfilePrintf(CB, Handle, "-1,", i);
2493 }
2494 }
2495 MicroProfilePrintf(CB, Handle, "];\n\n");
2496
2497 MicroProfilePrintf(CB, Handle, "\nvar MetaNames = [");
2498 for(int i = 0; i < MICROPROFILE_META_MAX; ++i)
2499 {
2500 if(S.MetaCounters[i].pName)
2501 {
2502 MicroProfilePrintf(CB, Handle, "'%s',", S.MetaCounters[i].pName);
2503 }
2504 }
2505
2506
2507 MicroProfilePrintf(CB, Handle, "];\n\n");
2508
2509
2510
2511 uint32_t nNumFrames = (MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3); //leave a few to not overwrite
2512 nNumFrames = MicroProfileMin(nNumFrames, (uint32_t)nMaxFrames);
2513
2514
2515 uint32_t nFirstFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
2516 uint32_t nLastFrame = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
2517 MP_ASSERT(nLastFrame == (S.nFrameCurrent % MICROPROFILE_MAX_FRAME_HISTORY));
2518 MP_ASSERT(nFirstFrame < MICROPROFILE_MAX_FRAME_HISTORY);
2519 MP_ASSERT(nLastFrame < MICROPROFILE_MAX_FRAME_HISTORY);
2520 const int64_t nTickStart = S.Frames[nFirstFrame].nFrameStartCpu;
2521 const int64_t nTickEnd = S.Frames[nLastFrame].nFrameStartCpu;
2522 int64_t nTickStartGpu = S.Frames[nFirstFrame].nFrameStartGpu;
2523
2524 int64_t nTickReferenceCpu, nTickReferenceGpu;
2525 int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
2526 int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
2527 int nTickReference = 0;
2528 if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu))
2529 {
2530 nTickStartGpu = (nTickStart - nTickReferenceCpu) * nTicksPerSecondGpu / nTicksPerSecondCpu + nTickReferenceGpu;
2531 nTickReference = 1;
2532 }
2533
2534
2535 #if MICROPROFILE_DEBUG
2536 printf("dumping %d frames\n", nNumFrames);
2537 printf("dumping frame %d to %d\n", nFirstFrame, nLastFrame);
2538 #endif
2539
2540
2541 uint32_t* nTimerCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers);
2542 memset(nTimerCounter, 0, sizeof(uint32_t) * S.nTotalTimers);
2543
2544 MicroProfilePrintf(CB, Handle, "var Frames = Array(%d);\n", nNumFrames);
2545 for(uint32_t i = 0; i < nNumFrames; ++i)
2546 {
2547 uint32_t nFrameIndex = (nFirstFrame + i) % MICROPROFILE_MAX_FRAME_HISTORY;
2548 uint32_t nFrameIndexNext = (nFrameIndex + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
2549
2550 for(uint32_t j = 0; j < S.nNumLogs; ++j)
2551 {
2552 MicroProfileThreadLog* pLog = S.Pool[j];
2553 int64_t nStartTickBase = pLog->nGpu ? nTickStartGpu : nTickStart;
2554 uint32_t nLogStart = S.Frames[nFrameIndex].nLogStart[j];
2555 uint32_t nLogEnd = S.Frames[nFrameIndexNext].nLogStart[j];
2556
2557 float fToMsCpu = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
2558 float fToMsBase = MicroProfileTickToMsMultiplier(pLog->nGpu ? nTicksPerSecondGpu : nTicksPerSecondCpu);
2559 MicroProfilePrintf(CB, Handle, "var ts_%d_%d = [", i, j);
2560 if(nLogStart != nLogEnd)
2561 {
2562 uint32_t k = nLogStart;
2563 uint32_t nLogType = MicroProfileLogType(pLog->Log[k]);
2564 float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase;
2565 int64_t nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase;
2566 float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs;
2567 MicroProfilePrintf(CB, Handle, "%f", fTime);
2568 for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE)
2569 {
2570 uint32_t nLogType = MicroProfileLogType(pLog->Log[k]);
2571 float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase;
2572 nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase;
2573 float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs;
2574 MicroProfilePrintf(CB, Handle, ",%f", fTime);
2575 }
2576 }
2577 MicroProfilePrintf(CB, Handle, "];\n");
2578 MicroProfilePrintf(CB, Handle, "var tt_%d_%d = [", i, j);
2579 if(nLogStart != nLogEnd)
2580 {
2581 uint32_t k = nLogStart;
2582 MicroProfilePrintf(CB, Handle, "%d", MicroProfileLogType(pLog->Log[k]));
2583 for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE)
2584 {
2585 uint32_t nLogType = MicroProfileLogType(pLog->Log[k]);
2586 if(nLogType == MP_LOG_META)
2587 {
2588 //for meta, store the count + 3, which is the tick part
2589 nLogType = 3 + MicroProfileLogGetTick(pLog->Log[k]);
2590 }
2591 MicroProfilePrintf(CB, Handle, ",%d", nLogType);
2592 }
2593 }
2594 MicroProfilePrintf(CB, Handle, "];\n");
2595
2596 MicroProfilePrintf(CB, Handle, "var ti_%d_%d = [", i, j);
2597 if(nLogStart != nLogEnd)
2598 {
2599 uint32_t k = nLogStart;
2600 MicroProfilePrintf(CB, Handle, "%d", (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k]));
2601 for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE)
2602 {
2603 uint32_t nTimerIndex = (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k]);
2604 MicroProfilePrintf(CB, Handle, ",%d", nTimerIndex);
2605 nTimerCounter[nTimerIndex]++;
2606 }
2607 }
2608 MicroProfilePrintf(CB, Handle, "];\n");
2609
2610 }
2611
2612 MicroProfilePrintf(CB, Handle, "var ts%d = [", i);
2613 for(uint32_t j = 0; j < S.nNumLogs; ++j)
2614 {
2615 MicroProfilePrintf(CB, Handle, "ts_%d_%d,", i, j);
2616 }
2617 MicroProfilePrintf(CB, Handle, "];\n");
2618 MicroProfilePrintf(CB, Handle, "var tt%d = [", i);
2619 for(uint32_t j = 0; j < S.nNumLogs; ++j)
2620 {
2621 MicroProfilePrintf(CB, Handle, "tt_%d_%d,", i, j);
2622 }
2623 MicroProfilePrintf(CB, Handle, "];\n");
2624
2625 MicroProfilePrintf(CB, Handle, "var ti%d = [", i);
2626 for(uint32_t j = 0; j < S.nNumLogs; ++j)
2627 {
2628 MicroProfilePrintf(CB, Handle, "ti_%d_%d,", i, j);
2629 }
2630 MicroProfilePrintf(CB, Handle, "];\n");
2631
2632
2633 int64_t nFrameStart = S.Frames[nFrameIndex].nFrameStartCpu;
2634 int64_t nFrameEnd = S.Frames[nFrameIndexNext].nFrameStartCpu;
2635
2636 float fToMs = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
2637 float fFrameMs = MicroProfileLogTickDifference(nTickStart, nFrameStart) * fToMs;
2638 float fFrameEndMs = MicroProfileLogTickDifference(nTickStart, nFrameEnd) * fToMs;
2639 float fFrameGpuMs = 0;
2640 float fFrameGpuEndMs = 0;
2641 if(nTickReference)
2642 {
2643 fFrameGpuMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndex].nFrameStartGpu) * fToMsGPU;
2644 fFrameGpuEndMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndexNext].nFrameStartGpu) * fToMsGPU;
2645 }
2646 MicroProfilePrintf(CB, Handle, "Frames[%d] = MakeFrame(%d, %f, %f, %f, %f, ts%d, tt%d, ti%d);\n", i, 0, fFrameMs, fFrameEndMs, fFrameGpuMs, fFrameGpuEndMs, i, i, i);
2647 }
2648
2649 uint32_t nContextSwitchStart = 0;
2650 uint32_t nContextSwitchEnd = 0;
2651 MicroProfileContextSwitchSearch(&nContextSwitchStart, &nContextSwitchEnd, nTickStart, nTickEnd);
2652
2653 uint32_t nWrittenBefore = S.nWebServerDataSent;
2654 MicroProfilePrintf(CB, Handle, "var CSwitchThreadInOutCpu = [");
2655 for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
2656 {
2657 MicroProfileContextSwitch CS = S.ContextSwitch[j];
2658 int nCpu = CS.nCpu;
2659 MicroProfilePrintf(CB, Handle, "%d,%d,%d,", CS.nThreadIn, CS.nThreadOut, nCpu);
2660 }
2661 MicroProfilePrintf(CB, Handle, "];\n");
2662 MicroProfilePrintf(CB, Handle, "var CSwitchTime = [");
2663 float fToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
2664 for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
2665 {
2666 MicroProfileContextSwitch CS = S.ContextSwitch[j];
2667 float fTime = MicroProfileLogTickDifference(nTickStart, CS.nTicks) * fToMsCpu;
2668 MicroProfilePrintf(CB, Handle, "%f,", fTime);
2669 }
2670 MicroProfilePrintf(CB, Handle, "];\n");
2671 uint32_t nWrittenAfter = S.nWebServerDataSent;
2672 MicroProfilePrintf(CB, Handle, "//CSwitch Size %d\n", nWrittenAfter - nWrittenBefore);
2673
2674
2675 for(size_t i = 0; i < g_MicroProfileHtml_end_count; ++i)
2676 {
2677 CB(Handle, g_MicroProfileHtml_end_sizes[i]-1, g_MicroProfileHtml_end[i]);
2678 }
2679
2680 uint32_t* nGroupCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount);
2681
2682 memset(nGroupCounter, 0, sizeof(uint32_t) * S.nGroupCount);
2683 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2684 {
2685 uint32_t nGroupIndex = S.TimerInfo[i].nGroupIndex;
2686 nGroupCounter[nGroupIndex] += nTimerCounter[i];
2687 }
2688
2689 uint32_t* nGroupCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount);
2690 uint32_t* nTimerCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers);
2691 for(uint32_t i = 0; i < S.nGroupCount; ++i)
2692 {
2693 nGroupCounterSort[i] = i;
2694 }
2695 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2696 {
2697 nTimerCounterSort[i] = i;
2698 }
2699 std::sort(nGroupCounterSort, nGroupCounterSort + S.nGroupCount,
2700 [nGroupCounter](const uint32_t l, const uint32_t r)
2701 {
2702 return nGroupCounter[l] > nGroupCounter[r];
2703 }
2704 );
2705
2706 std::sort(nTimerCounterSort, nTimerCounterSort + S.nTotalTimers,
2707 [nTimerCounter](const uint32_t l, const uint32_t r)
2708 {
2709 return nTimerCounter[l] > nTimerCounter[r];
2710 }
2711 );
2712
2713 MicroProfilePrintf(CB, Handle, "\n<!--\nMarker Per Group\n");
2714 for(uint32_t i = 0; i < S.nGroupCount; ++i)
2715 {
2716 uint32_t idx = nGroupCounterSort[i];
2717 MicroProfilePrintf(CB, Handle, "%8d:%s\n", nGroupCounter[idx], S.GroupInfo[idx].pName);
2718 }
2719 MicroProfilePrintf(CB, Handle, "Marker Per Timer\n");
2720 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2721 {
2722 uint32_t idx = nTimerCounterSort[i];
2723 MicroProfilePrintf(CB, Handle, "%8d:%s(%s)\n", nTimerCounter[idx], S.TimerInfo[idx].pName, S.GroupInfo[S.TimerInfo[idx].nGroupIndex].pName);
2724 }
2725 MicroProfilePrintf(CB, Handle, "\n-->\n");
2726
2727 S.nActiveGroup = nActiveGroup;
2728 S.nRunning = nRunning;
2729
2730 #if MICROPROFILE_DEBUG
2731 int64_t nTicksEnd = MP_TICK();
2732 float fMs = fToMsCpu * (nTicksEnd - S.nPauseTicks);
2733 printf("html dump took %6.2fms\n", fMs);
2734 #endif
2735
2736
2737 }
2738
MicroProfileWriteFile(void * Handle,size_t nSize,const char * pData)2739 void MicroProfileWriteFile(void* Handle, size_t nSize, const char* pData)
2740 {
2741 fwrite(pData, nSize, 1, (FILE*)Handle);
2742 }
2743
MicroProfileDumpToFile()2744 void MicroProfileDumpToFile()
2745 {
2746 std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2747 if(S.nDumpFileNextFrame&1)
2748 {
2749 FILE* F = fopen(S.HtmlDumpPath, "w");
2750 if(F)
2751 {
2752 MicroProfileDumpHtml(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES, S.HtmlDumpPath);
2753 fclose(F);
2754 }
2755 }
2756 if(S.nDumpFileNextFrame&2)
2757 {
2758 FILE* F = fopen(S.CsvDumpPath, "w");
2759 if(F)
2760 {
2761 MicroProfileDumpCsv(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES);
2762 fclose(F);
2763 }
2764 }
2765 }
2766
MicroProfileFlushSocket(MpSocket Socket)2767 void MicroProfileFlushSocket(MpSocket Socket)
2768 {
2769 send(Socket, &S.WebServerBuffer[0], S.WebServerPut, 0);
2770 S.WebServerPut = 0;
2771
2772 }
2773
MicroProfileWriteSocket(void * Handle,size_t nSize,const char * pData)2774 void MicroProfileWriteSocket(void* Handle, size_t nSize, const char* pData)
2775 {
2776 S.nWebServerDataSent += nSize;
2777 MpSocket Socket = *(MpSocket*)Handle;
2778 if(nSize > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE / 2)
2779 {
2780 MicroProfileFlushSocket(Socket);
2781 send(Socket, pData, nSize, 0);
2782
2783 }
2784 else
2785 {
2786 memcpy(&S.WebServerBuffer[S.WebServerPut], pData, nSize);
2787 S.WebServerPut += nSize;
2788 if(S.WebServerPut > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE/2)
2789 {
2790 MicroProfileFlushSocket(Socket);
2791 }
2792 }
2793 }
2794
2795 #if MICROPROFILE_MINIZ
2796 #ifndef MICROPROFILE_COMPRESS_BUFFER_SIZE
2797 #define MICROPROFILE_COMPRESS_BUFFER_SIZE (256<<10)
2798 #endif
2799
2800 #define MICROPROFILE_COMPRESS_CHUNK (MICROPROFILE_COMPRESS_BUFFER_SIZE/2)
2801 struct MicroProfileCompressedSocketState
2802 {
2803 unsigned char DeflateOut[MICROPROFILE_COMPRESS_CHUNK];
2804 unsigned char DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
2805 mz_stream Stream;
2806 MpSocket Socket;
2807 uint32_t nSize;
2808 uint32_t nCompressedSize;
2809 uint32_t nFlushes;
2810 uint32_t nMemmoveBytes;
2811 };
2812
MicroProfileCompressedSocketFlush(MicroProfileCompressedSocketState * pState)2813 void MicroProfileCompressedSocketFlush(MicroProfileCompressedSocketState* pState)
2814 {
2815 mz_stream& Stream = pState->Stream;
2816 unsigned char* pSendStart = &pState->DeflateOut[0];
2817 unsigned char* pSendEnd = &pState->DeflateOut[MICROPROFILE_COMPRESS_CHUNK - Stream.avail_out];
2818 if(pSendStart != pSendEnd)
2819 {
2820 send(pState->Socket, (const char*)pSendStart, pSendEnd - pSendStart, 0);
2821 pState->nCompressedSize += pSendEnd - pSendStart;
2822 }
2823 Stream.next_out = &pState->DeflateOut[0];
2824 Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
2825
2826 }
MicroProfileCompressedSocketStart(MicroProfileCompressedSocketState * pState,MpSocket Socket)2827 void MicroProfileCompressedSocketStart(MicroProfileCompressedSocketState* pState, MpSocket Socket)
2828 {
2829 mz_stream& Stream = pState->Stream;
2830 memset(&Stream, 0, sizeof(Stream));
2831 Stream.next_out = &pState->DeflateOut[0];
2832 Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
2833 Stream.next_in = &pState->DeflateIn[0];
2834 Stream.avail_in = 0;
2835 mz_deflateInit(&Stream, Z_DEFAULT_COMPRESSION);
2836 pState->Socket = Socket;
2837 pState->nSize = 0;
2838 pState->nCompressedSize = 0;
2839 pState->nFlushes = 0;
2840 pState->nMemmoveBytes = 0;
2841
2842 }
MicroProfileCompressedSocketFinish(MicroProfileCompressedSocketState * pState)2843 void MicroProfileCompressedSocketFinish(MicroProfileCompressedSocketState* pState)
2844 {
2845 mz_stream& Stream = pState->Stream;
2846 MicroProfileCompressedSocketFlush(pState);
2847 int r = mz_deflate(&Stream, MZ_FINISH);
2848 MP_ASSERT(r == MZ_STREAM_END);
2849 MicroProfileCompressedSocketFlush(pState);
2850 r = mz_deflateEnd(&Stream);
2851 MP_ASSERT(r == MZ_OK);
2852 }
2853
MicroProfileCompressedWriteSocket(void * Handle,size_t nSize,const char * pData)2854 void MicroProfileCompressedWriteSocket(void* Handle, size_t nSize, const char* pData)
2855 {
2856 MicroProfileCompressedSocketState* pState = (MicroProfileCompressedSocketState*)Handle;
2857 mz_stream& Stream = pState->Stream;
2858 const unsigned char* pDeflateInEnd = Stream.next_in + Stream.avail_in;
2859 const unsigned char* pDeflateInStart = &pState->DeflateIn[0];
2860 const unsigned char* pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
2861 pState->nSize += nSize;
2862 if(nSize <= pDeflateInRealEnd - pDeflateInEnd)
2863 {
2864 memcpy((void*)pDeflateInEnd, pData, nSize);
2865 Stream.avail_in += nSize;
2866 MP_ASSERT(Stream.next_in + Stream.avail_in <= pDeflateInRealEnd);
2867 return;
2868 }
2869 int Flush = 0;
2870 while(nSize)
2871 {
2872 pDeflateInEnd = Stream.next_in + Stream.avail_in;
2873 if(Flush)
2874 {
2875 pState->nFlushes++;
2876 MicroProfileCompressedSocketFlush(pState);
2877 pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
2878 if(pDeflateInEnd == pDeflateInRealEnd)
2879 {
2880 if(Stream.avail_in)
2881 {
2882 MP_ASSERT(pDeflateInStart != Stream.next_in);
2883 memmove((void*)pDeflateInStart, Stream.next_in, Stream.avail_in);
2884 pState->nMemmoveBytes += Stream.avail_in;
2885 }
2886 Stream.next_in = pDeflateInStart;
2887 pDeflateInEnd = Stream.next_in + Stream.avail_in;
2888 }
2889 }
2890 size_t nSpace = pDeflateInRealEnd - pDeflateInEnd;
2891 size_t nBytes = MicroProfileMin(nSpace, nSize);
2892 MP_ASSERT(nBytes + pDeflateInEnd <= pDeflateInRealEnd);
2893 memcpy((void*)pDeflateInEnd, pData, nBytes);
2894 Stream.avail_in += nBytes;
2895 nSize -= nBytes;
2896 pData += nBytes;
2897 int r = mz_deflate(&Stream, MZ_NO_FLUSH);
2898 Flush = r == MZ_BUF_ERROR || nBytes == 0 || Stream.avail_out == 0 ? 1 : 0;
2899 MP_ASSERT(r == MZ_BUF_ERROR || r == MZ_OK);
2900 if(r == MZ_BUF_ERROR)
2901 {
2902 r = mz_deflate(&Stream, MZ_SYNC_FLUSH);
2903 }
2904 }
2905 }
2906 #endif
2907
2908
2909 #ifndef MicroProfileSetNonBlocking //fcntl doesnt work on a some unix like platforms..
MicroProfileSetNonBlocking(MpSocket Socket,int NonBlocking)2910 void MicroProfileSetNonBlocking(MpSocket Socket, int NonBlocking)
2911 {
2912 #ifdef _WIN32
2913 u_long nonBlocking = NonBlocking ? 1 : 0;
2914 ioctlsocket(Socket, FIONBIO, &nonBlocking);
2915 #else
2916 int Options = fcntl(Socket, F_GETFL);
2917 if(NonBlocking)
2918 {
2919 fcntl(Socket, F_SETFL, Options|O_NONBLOCK);
2920 }
2921 else
2922 {
2923 fcntl(Socket, F_SETFL, Options&(~O_NONBLOCK));
2924 }
2925 #endif
2926 }
2927 #endif
2928
MicroProfileWebServerStart()2929 void MicroProfileWebServerStart()
2930 {
2931 #ifdef _WIN32
2932 WSADATA wsa;
2933 if(WSAStartup(MAKEWORD(2, 2), &wsa))
2934 {
2935 S.ListenerSocket = -1;
2936 return;
2937 }
2938 #endif
2939
2940 S.ListenerSocket = socket(PF_INET, SOCK_STREAM, 6);
2941 MP_ASSERT(!MP_INVALID_SOCKET(S.ListenerSocket));
2942 MicroProfileSetNonBlocking(S.ListenerSocket, 1);
2943
2944 S.nWebServerPort = (uint32_t)-1;
2945 struct sockaddr_in Addr;
2946 Addr.sin_family = AF_INET;
2947 Addr.sin_addr.s_addr = INADDR_ANY;
2948 for(int i = 0; i < 20; ++i)
2949 {
2950 Addr.sin_port = htons(MICROPROFILE_WEBSERVER_PORT+i);
2951 if(0 == bind(S.ListenerSocket, (sockaddr*)&Addr, sizeof(Addr)))
2952 {
2953 S.nWebServerPort = MICROPROFILE_WEBSERVER_PORT+i;
2954 break;
2955 }
2956 }
2957 listen(S.ListenerSocket, 8);
2958 }
2959
MicroProfileWebServerStop()2960 void MicroProfileWebServerStop()
2961 {
2962 #ifdef _WIN32
2963 closesocket(S.ListenerSocket);
2964 WSACleanup();
2965 #else
2966 close(S.ListenerSocket);
2967 #endif
2968 }
2969
MicroProfileParseGet(const char * pGet)2970 int MicroProfileParseGet(const char* pGet)
2971 {
2972 const char* pStart = pGet;
2973 while(*pGet != '\0')
2974 {
2975 if(*pGet < '0' || *pGet > '9')
2976 return 0;
2977 pGet++;
2978 }
2979 int nFrames = atoi(pStart);
2980 if(nFrames)
2981 {
2982 return nFrames;
2983 }
2984 else
2985 {
2986 return MICROPROFILE_WEBSERVER_MAXFRAMES;
2987 }
2988 }
MicroProfileWebServerUpdate()2989 bool MicroProfileWebServerUpdate()
2990 {
2991 MICROPROFILE_SCOPEI("MicroProfile", "Webserver-update", -1);
2992 MpSocket Connection = accept(S.ListenerSocket, 0, 0);
2993 bool bServed = false;
2994 if(!MP_INVALID_SOCKET(Connection))
2995 {
2996 std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2997 char Req[8192];
2998 MicroProfileSetNonBlocking(Connection, 0);
2999 int nReceived = recv(Connection, Req, sizeof(Req)-1, 0);
3000 if(nReceived > 0)
3001 {
3002 Req[nReceived] = '\0';
3003 #if MICROPROFILE_MINIZ
3004 #define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nContent-Encoding: deflate\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n"
3005 #else
3006 #define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n"
3007 #endif
3008 char* pHttp = strstr(Req, "HTTP/");
3009 char* pGet = strstr(Req, "GET /");
3010 char* pHost = strstr(Req, "Host: ");
3011 auto Terminate = [](char* pString)
3012 {
3013 char* pEnd = pString;
3014 while(*pEnd != '\0')
3015 {
3016 if(*pEnd == '\r' || *pEnd == '\n' || *pEnd == ' ')
3017 {
3018 *pEnd = '\0';
3019 return;
3020 }
3021 pEnd++;
3022 }
3023 };
3024 if(pHost)
3025 {
3026 pHost += sizeof("Host: ")-1;
3027 Terminate(pHost);
3028 }
3029
3030 if(pHttp && pGet)
3031 {
3032 *pHttp = '\0';
3033 pGet += sizeof("GET /")-1;
3034 Terminate(pGet);
3035 int nFrames = MicroProfileParseGet(pGet);
3036 if(nFrames)
3037 {
3038 uint64_t nTickStart = MP_TICK();
3039 send(Connection, MICROPROFILE_HTML_HEADER, sizeof(MICROPROFILE_HTML_HEADER)-1, 0);
3040 uint64_t nDataStart = S.nWebServerDataSent;
3041 S.WebServerPut = 0;
3042 #if 0 == MICROPROFILE_MINIZ
3043 MicroProfileDumpHtml(MicroProfileWriteSocket, &Connection, nFrames, pHost);
3044 uint64_t nDataEnd = S.nWebServerDataSent;
3045 uint64_t nTickEnd = MP_TICK();
3046 uint64_t nDiff = (nTickEnd - nTickStart);
3047 float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
3048 int nKb = ((nDataEnd-nDataStart)>>10) + 1;
3049 int nCompressedKb = nKb;
3050 MicroProfilePrintf(MicroProfileWriteSocket, &Connection, "\n<!-- Sent %dkb in %.2fms-->\n\n",nKb, fMs);
3051 MicroProfileFlushSocket(Connection);
3052 #else
3053 MicroProfileCompressedSocketState CompressState;
3054 MicroProfileCompressedSocketStart(&CompressState, Connection);
3055 MicroProfileDumpHtml(MicroProfileCompressedWriteSocket, &CompressState, nFrames, pHost);
3056 S.nWebServerDataSent += CompressState.nSize;
3057 uint64_t nDataEnd = S.nWebServerDataSent;
3058 uint64_t nTickEnd = MP_TICK();
3059 uint64_t nDiff = (nTickEnd - nTickStart);
3060 float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
3061 int nKb = ((nDataEnd-nDataStart)>>10) + 1;
3062 int nCompressedKb = ((CompressState.nCompressedSize)>>10) + 1;
3063 MicroProfilePrintf(MicroProfileCompressedWriteSocket, &CompressState, "\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
3064 MicroProfileCompressedSocketFinish(&CompressState);
3065 MicroProfileFlushSocket(Connection);
3066 #endif
3067
3068 #if MICROPROFILE_DEBUG
3069 printf("\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
3070 #endif
3071 }
3072 }
3073 }
3074 #ifdef _WIN32
3075 closesocket(Connection);
3076 #else
3077 close(Connection);
3078 #endif
3079 }
3080 return bServed;
3081 }
3082 #endif
3083
3084
3085
3086
3087 #if MICROPROFILE_CONTEXT_SWITCH_TRACE
3088 //functions that need to be implemented per platform.
3089 void* MicroProfileTraceThread(void* unused);
3090 bool MicroProfileIsLocalThread(uint32_t nThreadId);
3091
3092
MicroProfileStartContextSwitchTrace()3093 void MicroProfileStartContextSwitchTrace()
3094 {
3095 if(!S.bContextSwitchRunning)
3096 {
3097 S.bContextSwitchRunning = true;
3098 S.bContextSwitchStop = false;
3099 MicroProfileThreadStart(&S.ContextSwitchThread, MicroProfileTraceThread);
3100 }
3101 }
3102
MicroProfileStopContextSwitchTrace()3103 void MicroProfileStopContextSwitchTrace()
3104 {
3105 if(S.bContextSwitchRunning)
3106 {
3107 S.bContextSwitchStop = true;
3108 MicroProfileThreadJoin(&S.ContextSwitchThread);
3109 }
3110 }
3111
3112
3113 #ifdef _WIN32
3114 #define INITGUID
3115 #include <evntrace.h>
3116 #include <evntcons.h>
3117 #include <strsafe.h>
3118
3119
3120 static GUID g_MicroProfileThreadClassGuid = { 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c };
3121
3122 struct MicroProfileSCSwitch
3123 {
3124 uint32_t NewThreadId;
3125 uint32_t OldThreadId;
3126 int8_t NewThreadPriority;
3127 int8_t OldThreadPriority;
3128 uint8_t PreviousCState;
3129 int8_t SpareByte;
3130 int8_t OldThreadWaitReason;
3131 int8_t OldThreadWaitMode;
3132 int8_t OldThreadState;
3133 int8_t OldThreadWaitIdealProcessor;
3134 uint32_t NewThreadWaitTime;
3135 uint32_t Reserved;
3136 };
3137
3138
MicroProfileContextSwitchCallback(PEVENT_TRACE pEvent)3139 VOID WINAPI MicroProfileContextSwitchCallback(PEVENT_TRACE pEvent)
3140 {
3141 if (pEvent->Header.Guid == g_MicroProfileThreadClassGuid)
3142 {
3143 if (pEvent->Header.Class.Type == 36)
3144 {
3145 MicroProfileSCSwitch* pCSwitch = (MicroProfileSCSwitch*) pEvent->MofData;
3146 if ((pCSwitch->NewThreadId != 0) || (pCSwitch->OldThreadId != 0))
3147 {
3148 MicroProfileContextSwitch Switch;
3149 Switch.nThreadOut = pCSwitch->OldThreadId;
3150 Switch.nThreadIn = pCSwitch->NewThreadId;
3151 Switch.nCpu = pEvent->BufferContext.ProcessorNumber;
3152 Switch.nTicks = pEvent->Header.TimeStamp.QuadPart;
3153 MicroProfileContextSwitchPut(&Switch);
3154 }
3155 }
3156 }
3157 }
3158
MicroProfileBufferCallback(PEVENT_TRACE_LOGFILE Buffer)3159 ULONG WINAPI MicroProfileBufferCallback(PEVENT_TRACE_LOGFILE Buffer)
3160 {
3161 return (S.bContextSwitchStop || !S.bContextSwitchRunning) ? FALSE : TRUE;
3162 }
3163
3164
3165 struct MicroProfileKernelTraceProperties : public EVENT_TRACE_PROPERTIES
3166 {
3167 char dummy[sizeof(KERNEL_LOGGER_NAME)];
3168 };
3169
MicroProfileContextSwitchShutdownTrace()3170 void MicroProfileContextSwitchShutdownTrace()
3171 {
3172 TRACEHANDLE SessionHandle = 0;
3173 MicroProfileKernelTraceProperties sessionProperties;
3174
3175 ZeroMemory(&sessionProperties, sizeof(sessionProperties));
3176 sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
3177 sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
3178 sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution
3179 sessionProperties.Wnode.Guid = SystemTraceControlGuid;
3180 sessionProperties.BufferSize = 1;
3181 sessionProperties.NumberOfBuffers = 128;
3182 sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH;
3183 sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
3184 sessionProperties.MaximumFileSize = 0;
3185 sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
3186 sessionProperties.LogFileNameOffset = 0;
3187
3188 EVENT_TRACE_LOGFILE log;
3189 ZeroMemory(&log, sizeof(log));
3190 log.LoggerName = KERNEL_LOGGER_NAME;
3191 log.ProcessTraceMode = 0;
3192 TRACEHANDLE hLog = OpenTrace(&log);
3193 if (hLog)
3194 {
3195 ControlTrace(SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties, EVENT_TRACE_CONTROL_STOP);
3196 }
3197 CloseTrace(hLog);
3198
3199
3200 }
3201
MicroProfileTraceThread(void * unused)3202 void* MicroProfileTraceThread(void* unused)
3203 {
3204
3205 MicroProfileContextSwitchShutdownTrace();
3206 ULONG status = ERROR_SUCCESS;
3207 TRACEHANDLE SessionHandle = 0;
3208 MicroProfileKernelTraceProperties sessionProperties;
3209
3210 ZeroMemory(&sessionProperties, sizeof(sessionProperties));
3211 sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
3212 sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
3213 sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution
3214 sessionProperties.Wnode.Guid = SystemTraceControlGuid;
3215 sessionProperties.BufferSize = 1;
3216 sessionProperties.NumberOfBuffers = 128;
3217 sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH|EVENT_TRACE_FLAG_PROCESS;
3218 sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
3219 sessionProperties.MaximumFileSize = 0;
3220 sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
3221 sessionProperties.LogFileNameOffset = 0;
3222
3223
3224 status = StartTrace((PTRACEHANDLE) &SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties);
3225
3226 if (ERROR_SUCCESS != status)
3227 {
3228 S.bContextSwitchRunning = false;
3229 return 0;
3230 }
3231
3232 EVENT_TRACE_LOGFILE log;
3233 ZeroMemory(&log, sizeof(log));
3234
3235 log.LoggerName = KERNEL_LOGGER_NAME;
3236 log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
3237 log.EventCallback = MicroProfileContextSwitchCallback;
3238 log.BufferCallback = MicroProfileBufferCallback;
3239
3240 TRACEHANDLE hLog = OpenTrace(&log);
3241 ProcessTrace(&hLog, 1, 0, 0);
3242 CloseTrace(hLog);
3243 MicroProfileContextSwitchShutdownTrace();
3244
3245 S.bContextSwitchRunning = false;
3246 return 0;
3247 }
3248
MicroProfileIsLocalThread(uint32_t nThreadId)3249 bool MicroProfileIsLocalThread(uint32_t nThreadId)
3250 {
3251 HANDLE h = OpenThread(THREAD_QUERY_LIMITED_INFORMATION, FALSE, nThreadId);
3252 if(h == NULL)
3253 return false;
3254 DWORD hProcess = GetProcessIdOfThread(h);
3255 CloseHandle(h);
3256 return GetCurrentProcessId() == hProcess;
3257 }
3258
3259 #elif defined(__APPLE__)
3260 #include <sys/time.h>
MicroProfileTraceThread(void * unused)3261 void* MicroProfileTraceThread(void* unused)
3262 {
3263 FILE* pFile = fopen("mypipe", "r");
3264 if(!pFile)
3265 {
3266 printf("CONTEXT SWITCH FAILED TO OPEN FILE: make sure to run dtrace script\n");
3267 S.bContextSwitchRunning = false;
3268 return 0;
3269 }
3270 printf("STARTING TRACE THREAD\n");
3271 char* pLine = 0;
3272 size_t cap = 0;
3273 size_t len = 0;
3274 struct timeval tv;
3275
3276 gettimeofday(&tv, NULL);
3277
3278 uint64_t nsSinceEpoch = ((uint64_t)(tv.tv_sec) * 1000000 + (uint64_t)(tv.tv_usec)) * 1000;
3279 uint64_t nTickEpoch = MP_TICK();
3280 uint32_t nLastThread[MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS] = {0};
3281 mach_timebase_info_data_t sTimebaseInfo;
3282 mach_timebase_info(&sTimebaseInfo);
3283 S.bContextSwitchRunning = true;
3284
3285 uint64_t nProcessed = 0;
3286 uint64_t nProcessedLast = 0;
3287 while((len = getline(&pLine, &cap, pFile))>0 && !S.bContextSwitchStop)
3288 {
3289 nProcessed += len;
3290 if(nProcessed - nProcessedLast > 10<<10)
3291 {
3292 nProcessedLast = nProcessed;
3293 printf("processed %llukb %llukb\n", (nProcessed-nProcessedLast)>>10,nProcessed >>10);
3294 }
3295
3296 char* pX = strchr(pLine, 'X');
3297 if(pX)
3298 {
3299 int cpu = atoi(pX+1);
3300 char* pX2 = strchr(pX + 1, 'X');
3301 char* pX3 = strchr(pX2 + 1, 'X');
3302 int thread = atoi(pX2+1);
3303 char* lala;
3304 int64_t timestamp = strtoll(pX3 + 1, &lala, 10);
3305 MicroProfileContextSwitch Switch;
3306
3307 //convert to ticks.
3308 uint64_t nDeltaNsSinceEpoch = timestamp - nsSinceEpoch;
3309 uint64_t nDeltaTickSinceEpoch = sTimebaseInfo.numer * nDeltaNsSinceEpoch / sTimebaseInfo.denom;
3310 uint64_t nTicks = nDeltaTickSinceEpoch + nTickEpoch;
3311 if(cpu < MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS)
3312 {
3313 Switch.nThreadOut = nLastThread[cpu];
3314 Switch.nThreadIn = thread;
3315 nLastThread[cpu] = thread;
3316 Switch.nCpu = cpu;
3317 Switch.nTicks = nTicks;
3318 MicroProfileContextSwitchPut(&Switch);
3319 }
3320 }
3321 }
3322 printf("EXITING TRACE THREAD\n");
3323 S.bContextSwitchRunning = false;
3324 return 0;
3325 }
3326
MicroProfileIsLocalThread(uint32_t nThreadId)3327 bool MicroProfileIsLocalThread(uint32_t nThreadId)
3328 {
3329 return false;
3330 }
3331
3332 #endif
3333 #else
3334
MicroProfileIsLocalThread(uint32_t nThreadId)3335 bool MicroProfileIsLocalThread(uint32_t nThreadId){return false;}
MicroProfileStopContextSwitchTrace()3336 void MicroProfileStopContextSwitchTrace(){}
MicroProfileStartContextSwitchTrace()3337 void MicroProfileStartContextSwitchTrace(){}
3338
3339 #endif
3340
3341
3342
3343
3344 #if MICROPROFILE_GPU_TIMERS_D3D11
MicroProfileGpuInsertTimeStamp()3345 uint32_t MicroProfileGpuInsertTimeStamp()
3346 {
3347 MicroProfileD3D11Frame& Frame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame];
3348 if(Frame.m_nRateQueryStarted)
3349 {
3350 uint32_t nCurrent = (Frame.m_nQueryStart + Frame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES;
3351 uint32_t nNext = (nCurrent + 1) % MICROPROFILE_D3D_MAX_QUERIES;
3352 if(nNext != S.GPU.m_nQueryGet)
3353 {
3354 Frame.m_nQueryCount++;
3355 ID3D11Query* pQuery = (ID3D11Query*)S.GPU.m_pQueries[nCurrent];
3356 ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext;
3357 pContext->End(pQuery);
3358 S.GPU.m_nQueryPut = nNext;
3359 return nCurrent;
3360 }
3361 }
3362 return (uint32_t)-1;
3363 }
3364
MicroProfileGpuGetTimeStamp(uint32_t nIndex)3365 uint64_t MicroProfileGpuGetTimeStamp(uint32_t nIndex)
3366 {
3367 if(nIndex == (uint32_t)-1)
3368 {
3369 return (uint64_t)-1;
3370 }
3371 int64_t nResult = S.GPU.m_nQueryResults[nIndex];
3372 MP_ASSERT(nResult != -1);
3373 return nResult;
3374 }
3375
MicroProfileGpuGetData(void * pQuery,void * pData,uint32_t nDataSize)3376 bool MicroProfileGpuGetData(void* pQuery, void* pData, uint32_t nDataSize)
3377 {
3378 HRESULT hr;
3379 do
3380 {
3381 hr = ((ID3D11DeviceContext*)S.GPU.m_pDeviceContext)->GetData((ID3D11Query*)pQuery, pData, nDataSize, 0);
3382 }while(hr == S_FALSE);
3383 switch(hr)
3384 {
3385 case DXGI_ERROR_DEVICE_REMOVED:
3386 case DXGI_ERROR_INVALID_CALL:
3387 case E_INVALIDARG:
3388 MP_BREAK();
3389 return false;
3390
3391 }
3392 return true;
3393 }
3394
MicroProfileTicksPerSecondGpu()3395 uint64_t MicroProfileTicksPerSecondGpu()
3396 {
3397 return S.GPU.m_nQueryFrequency;
3398 }
3399
MicroProfileGpuFlip()3400 void MicroProfileGpuFlip()
3401 {
3402 MicroProfileD3D11Frame& CurrentFrame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame];
3403 ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext;
3404 if(CurrentFrame.m_nRateQueryStarted)
3405 {
3406 pContext->End((ID3D11Query*)CurrentFrame.m_pRateQuery);
3407 }
3408 uint32_t nNextFrame = (S.GPU.m_nQueryFrame + 1) % MICROPROFILE_GPU_FRAME_DELAY;
3409 MicroProfileD3D11Frame& OldFrame = S.GPU.m_QueryFrames[nNextFrame];
3410 if(OldFrame.m_nRateQueryStarted)
3411 {
3412 struct RateQueryResult
3413 {
3414 uint64_t nFrequency;
3415 BOOL bDisjoint;
3416 };
3417 RateQueryResult Result;
3418 if(MicroProfileGpuGetData(OldFrame.m_pRateQuery, &Result, sizeof(Result)))
3419 {
3420 if(S.GPU.m_nQueryFrequency != (int64_t)Result.nFrequency)
3421 {
3422 if(S.GPU.m_nQueryFrequency)
3423 {
3424 OutputDebugString("Query freq changing");
3425 }
3426 S.GPU.m_nQueryFrequency = Result.nFrequency;
3427 }
3428 uint32_t nStart = OldFrame.m_nQueryStart;
3429 uint32_t nCount = OldFrame.m_nQueryCount;
3430 for(uint32_t i = 0; i < nCount; ++i)
3431 {
3432 uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES;
3433
3434
3435
3436 if(!MicroProfileGpuGetData(S.GPU.m_pQueries[nIndex], &S.GPU.m_nQueryResults[nIndex], sizeof(uint64_t)))
3437 {
3438 S.GPU.m_nQueryResults[nIndex] = -1;
3439 }
3440 }
3441 }
3442 else
3443 {
3444 uint32_t nStart = OldFrame.m_nQueryStart;
3445 uint32_t nCount = OldFrame.m_nQueryCount;
3446 for(uint32_t i = 0; i < nCount; ++i)
3447 {
3448 uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES;
3449 S.GPU.m_nQueryResults[nIndex] = -1;
3450 }
3451 }
3452 S.GPU.m_nQueryGet = (OldFrame.m_nQueryStart + OldFrame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES;
3453 }
3454
3455 S.GPU.m_nQueryFrame = nNextFrame;
3456 MicroProfileD3D11Frame& NextFrame = S.GPU.m_QueryFrames[nNextFrame];
3457 pContext->Begin((ID3D11Query*)NextFrame.m_pRateQuery);
3458 NextFrame.m_nQueryStart = S.GPU.m_nQueryPut;
3459 NextFrame.m_nQueryCount = 0;
3460
3461 NextFrame.m_nRateQueryStarted = 1;
3462 }
3463
MicroProfileGpuInitD3D11(void * pDevice_,void * pDeviceContext_)3464 void MicroProfileGpuInitD3D11(void* pDevice_, void* pDeviceContext_)
3465 {
3466 ID3D11Device* pDevice = (ID3D11Device*)pDevice_;
3467 ID3D11DeviceContext* pDeviceContext = (ID3D11DeviceContext*)pDeviceContext_;
3468 S.GPU.m_pDeviceContext = pDeviceContext_;
3469
3470 D3D11_QUERY_DESC Desc;
3471 Desc.MiscFlags = 0;
3472 Desc.Query = D3D11_QUERY_TIMESTAMP;
3473 for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i)
3474 {
3475 HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_pQueries[i]);
3476 MP_ASSERT(hr == S_OK);
3477 S.GPU.m_nQueryResults[i] = -1;
3478 }
3479 S.GPU.m_nQueryPut = 0;
3480 S.GPU.m_nQueryGet = 0;
3481 S.GPU.m_nQueryFrame = 0;
3482 S.GPU.m_nQueryFrequency = 0;
3483 Desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
3484 for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
3485 {
3486 S.GPU.m_QueryFrames[i].m_nQueryStart = 0;
3487 S.GPU.m_QueryFrames[i].m_nQueryCount = 0;
3488 S.GPU.m_QueryFrames[i].m_nRateQueryStarted = 0;
3489 HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_QueryFrames[i].m_pRateQuery);
3490 MP_ASSERT(hr == S_OK);
3491 }
3492 }
3493
3494
MicroProfileGpuShutdown()3495 void MicroProfileGpuShutdown()
3496 {
3497 for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i)
3498 {
3499 ((ID3D11Query*)&S.GPU.m_pQueries[i])->Release();
3500 S.GPU.m_pQueries[i] = 0;
3501 }
3502 for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
3503 {
3504 ((ID3D11Query*)S.GPU.m_QueryFrames[i].m_pRateQuery)->Release();
3505 S.GPU.m_QueryFrames[i].m_pRateQuery = 0;
3506 }
3507 }
3508
MicroProfileGetGpuTickReference(int64_t * pOutCPU,int64_t * pOutGpu)3509 int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu)
3510 {
3511 return 0;
3512 }
3513
3514
3515 #elif MICROPROFILE_GPU_TIMERS_GL
MicroProfileGpuInitGL()3516 void MicroProfileGpuInitGL()
3517 {
3518 S.GPU.GLTimerPos = 0;
3519 glGenQueries(MICROPROFILE_GL_MAX_QUERIES, &S.GPU.GLTimers[0]);
3520 }
3521
MicroProfileGpuInsertTimeStamp()3522 uint32_t MicroProfileGpuInsertTimeStamp()
3523 {
3524 uint32_t nIndex = (S.GPU.GLTimerPos+1)%MICROPROFILE_GL_MAX_QUERIES;
3525 glQueryCounter(S.GPU.GLTimers[nIndex], GL_TIMESTAMP);
3526 S.GPU.GLTimerPos = nIndex;
3527 return nIndex;
3528 }
MicroProfileGpuGetTimeStamp(uint32_t nKey)3529 uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey)
3530 {
3531 uint64_t result;
3532 glGetQueryObjectui64v(S.GPU.GLTimers[nKey], GL_QUERY_RESULT, &result);
3533 return result;
3534 }
3535
MicroProfileTicksPerSecondGpu()3536 uint64_t MicroProfileTicksPerSecondGpu()
3537 {
3538 return 1000000000ll;
3539 }
3540
MicroProfileGetGpuTickReference(int64_t * pOutCpu,int64_t * pOutGpu)3541 int MicroProfileGetGpuTickReference(int64_t* pOutCpu, int64_t* pOutGpu)
3542 {
3543 int64_t nGpuTimeStamp;
3544 glGetInteger64v(GL_TIMESTAMP, &nGpuTimeStamp);
3545 if(nGpuTimeStamp)
3546 {
3547 *pOutCpu = MP_TICK();
3548 *pOutGpu = nGpuTimeStamp;
3549 #if 0 //debug test if timestamp diverges
3550 static int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
3551 static int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
3552 static int64_t nGpuStart = 0;
3553 static int64_t nCpuStart = 0;
3554 if(!nCpuStart)
3555 {
3556 nCpuStart = *pOutCpu;
3557 nGpuStart = *pOutGpu;
3558 }
3559 static int nCountDown = 100;
3560 if(0 == nCountDown--)
3561 {
3562 int64_t nCurCpu = *pOutCpu;
3563 int64_t nCurGpu = *pOutGpu;
3564 double fDistanceCpu = (nCurCpu - nCpuStart) / (double)nTicksPerSecondCpu;
3565 double fDistanceGpu = (nCurGpu - nGpuStart) / (double)nTicksPerSecondGpu;
3566
3567 char buf[254];
3568 snprintf(buf, sizeof(buf)-1,"Distance %f %f diff %f\n", fDistanceCpu, fDistanceGpu, fDistanceCpu-fDistanceGpu);
3569 OutputDebugString(buf);
3570 nCountDown = 100;
3571 }
3572 #endif
3573 return 1;
3574 }
3575 return 0;
3576 }
3577
3578
3579 #endif
3580
3581 #undef S
3582
3583 #ifdef _WIN32
3584 #pragma warning(pop)
3585 #endif
3586
3587
3588
3589
3590
3591 #endif
3592 #endif
3593 #ifdef MICROPROFILE_EMBED_HTML
3594 #include "microprofile_html.h"
3595 #endif
3596