1 #pragma once
2 // This is free and unencumbered software released into the public domain.
3 // Anyone is free to copy, modify, publish, use, compile, sell, or
4 // distribute this software, either in source code form or as a compiled
5 // binary, for any purpose, commercial or non-commercial, and by any
6 // means.
7 // In jurisdictions that recognize copyright laws, the author or authors
8 // of this software dedicate any and all copyright interest in the
9 // software to the public domain. We make this dedication for the benefit
10 // of the public at large and to the detriment of our heirs and
11 // successors. We intend this dedication to be an overt act of
12 // relinquishment in perpetuity of all present and future rights to this
13 // software under copyright law.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 // For more information, please refer to <http://unlicense.org/>
22 //
23 // ***********************************************************************
24 //
25 //
26 //
27 //
28 // Howto:
29 // Call these functions from your code:
30 //  MicroProfileOnThreadCreate
31 //  MicroProfileMouseButton
32 //  MicroProfileMousePosition
33 //  MicroProfileModKey
34 //  MicroProfileFlip                <-- Call this once per frame
35 //  MicroProfileDraw                <-- Call this once per frame
36 //  MicroProfileToggleDisplayMode   <-- Bind to a key to toggle profiling
37 //  MicroProfileTogglePause         <-- Bind to a key to toggle pause
38 //
39 // Use these macros in your code in blocks you want to time:
40 //
41 //  MICROPROFILE_DECLARE
42 //  MICROPROFILE_DEFINE
43 //  MICROPROFILE_DECLARE_GPU
44 //  MICROPROFILE_DEFINE_GPU
45 //  MICROPROFILE_SCOPE
46 //  MICROPROFILE_SCOPEI
47 //  MICROPROFILE_SCOPEGPU
48 //  MICROPROFILE_SCOPEGPUI
49 //  MICROPROFILE_META
50 //
51 //
52 //  Usage:
53 //
54 //  {
55 //      MICROPROFILE_SCOPEI("GroupName", "TimerName", nColorRgb):
56 //      ..Code to be timed..
57 //  }
58 //
59 //  MICROPROFILE_DECLARE / MICROPROFILE_DEFINE allows defining groups in a shared place, to ensure sorting of the timers
60 //
61 //  (in global scope)
62 //  MICROPROFILE_DEFINE(g_ProfileFisk, "Fisk", "Skalle", nSomeColorRgb);
63 //
64 //  (in some other file)
65 //  MICROPROFILE_DECLARE(g_ProfileFisk);
66 //
67 //  void foo(){
68 //      MICROPROFILE_SCOPE(g_ProfileFisk);
69 //  }
70 //
71 //  Once code is instrumented the gui is activeted by calling MicroProfileToggleDisplayMode or by clicking in the upper left corner of
72 //  the screen
73 //
74 // The following functions must be implemented before the profiler is usable
75 //  debug render:
76 //      void MicroProfileDrawText(int nX, int nY, uint32_t nColor, const char* pText, uint32_t nNumCharacters);
77 //      void MicroProfileDrawBox(int nX, int nY, int nX1, int nY1, uint32_t nColor, MicroProfileBoxType = MicroProfileBoxTypeFlat);
78 //      void MicroProfileDrawLine2D(uint32_t nVertices, float* pVertices, uint32_t nColor);
79 //  Gpu time stamps: (See below for d3d/opengl helper)
80 //      uint32_t MicroProfileGpuInsertTimeStamp();
81 //      uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey);
82 //      uint64_t MicroProfileTicksPerSecondGpu();
83 //  threading:
84 //      const char* MicroProfileGetThreadName(); Threadnames in detailed view
85 //
86 // Default implementations of Gpu timestamp functions:
87 //      Opengl:
88 //          in .c file where MICROPROFILE_IMPL is defined:
89 //          #define MICROPROFILE_GPU_TIMERS_GL
90 //          call MicroProfileGpuInitGL() on startup
91 //      D3D11:
92 //          in .c file where MICROPROFILE_IMPL is defined:
93 //          #define MICROPROFILE_GPU_TIMERS_D3D11
94 //          call MICROPROFILE_GPU_TIMERS_D3D11(). Pass Device & ImmediateContext
95 //
96 // Limitations:
97 //  GPU timestamps can only be inserted from one thread.
98 
99 
100 
101 #ifndef MICROPROFILE_ENABLED
102 #define MICROPROFILE_ENABLED 1
103 #endif
104 
105 #include <stdint.h>
106 typedef uint64_t MicroProfileToken;
107 typedef uint16_t MicroProfileGroupId;
108 
109 #if 0 == MICROPROFILE_ENABLED
110 
111 #define MICROPROFILE_DECLARE(var)
112 #define MICROPROFILE_DEFINE(var, group, name, color)
113 #define MICROPROFILE_REGISTER_GROUP(group, color, category)
114 #define MICROPROFILE_DECLARE_GPU(var)
115 #define MICROPROFILE_DEFINE_GPU(var, name, color)
116 #define MICROPROFILE_SCOPE(var) do{}while(0)
117 #define MICROPROFILE_SCOPEI(group, name, color) do{}while(0)
118 #define MICROPROFILE_SCOPEGPU(var) do{}while(0)
119 #define MICROPROFILE_SCOPEGPUI( name, color) do{}while(0)
120 #define MICROPROFILE_META_CPU(name, count)
121 #define MICROPROFILE_META_GPU(name, count)
122 #define MICROPROFILE_FORCEENABLECPUGROUP(s) do{} while(0)
123 #define MICROPROFILE_FORCEDISABLECPUGROUP(s) do{} while(0)
124 #define MICROPROFILE_FORCEENABLEGPUGROUP(s) do{} while(0)
125 #define MICROPROFILE_FORCEDISABLEGPUGROUP(s) do{} while(0)
126 #define MICROPROFILE_SCOPE_TOKEN(token)
127 
128 #define MicroProfileGetTime(group, name) 0.f
129 #define MicroProfileOnThreadCreate(foo) do{}while(0)
130 #define MicroProfileFlip() do{}while(0)
131 #define MicroProfileSetAggregateFrames(a) do{}while(0)
132 #define MicroProfileGetAggregateFrames() 0
133 #define MicroProfileGetCurrentAggregateFrames() 0
134 #define MicroProfileTogglePause() do{}while(0)
135 #define MicroProfileToggleAllGroups() do{} while(0)
136 #define MicroProfileDumpTimers() do{}while(0)
137 #define MicroProfileShutdown() do{}while(0)
138 #define MicroProfileSetForceEnable(a) do{} while(0)
139 #define MicroProfileGetForceEnable() false
140 #define MicroProfileSetEnableAllGroups(a) do{} while(0)
141 #define MicroProfileEnableCategory(a) do{} while(0)
142 #define MicroProfileDisableCategory(a) do{} while(0)
143 #define MicroProfileGetEnableAllGroups() false
144 #define MicroProfileSetForceMetaCounters(a)
145 #define MicroProfileGetForceMetaCounters() 0
146 #define MicroProfileEnableMetaCounter(c) do{}while(0)
147 #define MicroProfileDisableMetaCounter(c) do{}while(0)
148 #define MicroProfileDumpFile(html,csv) do{} while(0)
149 #define MicroProfileWebServerPort() ((uint32_t)-1)
150 
151 #else
152 
153 #include <stdint.h>
154 #include <string.h>
155 #include <algorithm>
156 #include <array>
157 #include <atomic>
158 #include <mutex>
159 #include <thread>
160 
161 #ifndef MICROPROFILE_API
162 #define MICROPROFILE_API
163 #endif
164 
165 MICROPROFILE_API int64_t MicroProfileTicksPerSecondCpu();
166 
167 
168 #if defined(__APPLE__)
169 #include <mach/mach.h>
170 #include <mach/mach_time.h>
171 #include <unistd.h>
172 #include <libkern/OSAtomic.h>
173 #include <TargetConditionals.h>
174 #if TARGET_OS_IPHONE
175 #define MICROPROFILE_IOS
176 #endif
177 
178 #define MP_TICK() mach_absolute_time()
MicroProfileTicksPerSecondCpu()179 inline int64_t MicroProfileTicksPerSecondCpu()
180 {
181     static int64_t nTicksPerSecond = 0;
182     if(nTicksPerSecond == 0)
183     {
184         mach_timebase_info_data_t sTimebaseInfo;
185         mach_timebase_info(&sTimebaseInfo);
186         nTicksPerSecond = 1000000000ll * sTimebaseInfo.denom / sTimebaseInfo.numer;
187     }
188     return nTicksPerSecond;
189 }
MicroProfileGetCurrentThreadId()190 inline uint64_t MicroProfileGetCurrentThreadId()
191 {
192     uint64_t tid;
193     pthread_threadid_np(pthread_self(), &tid);
194     return tid;
195 }
196 
197 #define MP_BREAK() __builtin_trap()
198 #define MP_THREAD_LOCAL __thread
199 #define MP_STRCASECMP strcasecmp
200 #define MP_GETCURRENTTHREADID() MicroProfileGetCurrentThreadId()
201 typedef uint64_t ThreadIdType;
202 #elif defined(_WIN32)
203 int64_t MicroProfileGetTick();
204 #define MP_TICK() MicroProfileGetTick()
205 #define MP_BREAK() __debugbreak()
206 #define MP_THREAD_LOCAL thread_local
207 #define MP_STRCASECMP _stricmp
208 #define MP_GETCURRENTTHREADID() GetCurrentThreadId()
209 typedef uint32_t ThreadIdType;
210 
211 #elif !defined(_WIN32)
212 #include <unistd.h>
213 #include <time.h>
MicroProfileTicksPerSecondCpu()214 inline int64_t MicroProfileTicksPerSecondCpu()
215 {
216     return 1000000000ll;
217 }
218 
MicroProfileGetTick()219 inline int64_t MicroProfileGetTick()
220 {
221     timespec ts;
222     clock_gettime(CLOCK_REALTIME, &ts);
223     return 1000000000ll * ts.tv_sec + ts.tv_nsec;
224 }
225 #define MP_TICK() MicroProfileGetTick()
226 #define MP_BREAK() __builtin_trap()
227 #define MP_THREAD_LOCAL __thread
228 #define MP_STRCASECMP strcasecmp
229 #define MP_GETCURRENTTHREADID() (uint64_t)pthread_self()
230 typedef uint64_t ThreadIdType;
231 #endif
232 
233 
234 #ifndef MP_GETCURRENTTHREADID
235 #define MP_GETCURRENTTHREADID() 0
236 typedef uint32_t ThreadIdType;
237 #endif
238 
239 
240 #define MP_ASSERT(a) do{if(!(a)){MP_BREAK();} }while(0)
241 #define MICROPROFILE_DECLARE(var) extern MicroProfileToken g_mp_##var
242 #define MICROPROFILE_DEFINE(var, group, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu)
243 #define MICROPROFILE_REGISTER_GROUP(group, category, color) MicroProfileRegisterGroup(group, category, color)
244 #define MICROPROFILE_DECLARE_GPU(var) extern MicroProfileToken g_mp_##var
245 #define MICROPROFILE_DEFINE_GPU(var, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu)
246 #define MICROPROFILE_TOKEN_PASTE0(a, b) a ## b
247 #define MICROPROFILE_TOKEN_PASTE(a, b)  MICROPROFILE_TOKEN_PASTE0(a,b)
248 #define MICROPROFILE_TOKEN(var) g_mp_##var
249 #define MICROPROFILE_SCOPE(var) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var)
250 #define MICROPROFILE_SCOPE_TOKEN(token) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(token)
251 #define MICROPROFILE_SCOPEI(group, name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu); MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__))
252 #define MICROPROFILE_SCOPEGPU(var) MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var)
253 #define MICROPROFILE_SCOPEGPUI(name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken("GPU", name, color,  MicroProfileTokenTypeGpu); MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__))
254 #define MICROPROFILE_META_CPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeCpu)
255 #define MICROPROFILE_META_GPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeGpu)
256 
257 
258 #ifndef MICROPROFILE_USE_THREAD_NAME_CALLBACK
259 #define MICROPROFILE_USE_THREAD_NAME_CALLBACK 0
260 #endif
261 
262 #ifndef MICROPROFILE_PER_THREAD_BUFFER_SIZE
263 #define MICROPROFILE_PER_THREAD_BUFFER_SIZE (2048<<10)
264 #endif
265 
266 #ifndef MICROPROFILE_MAX_FRAME_HISTORY
267 #define MICROPROFILE_MAX_FRAME_HISTORY 512
268 #endif
269 
270 #ifndef MICROPROFILE_PRINTF
271 #define MICROPROFILE_PRINTF printf
272 #endif
273 
274 #ifndef MICROPROFILE_META_MAX
275 #define MICROPROFILE_META_MAX 8
276 #endif
277 
278 #ifndef MICROPROFILE_WEBSERVER_PORT
279 #define MICROPROFILE_WEBSERVER_PORT 1338
280 #endif
281 
282 #ifndef MICROPROFILE_WEBSERVER
283 #define MICROPROFILE_WEBSERVER 1
284 #endif
285 
286 #ifndef MICROPROFILE_WEBSERVER_MAXFRAMES
287 #define MICROPROFILE_WEBSERVER_MAXFRAMES 30
288 #endif
289 
290 #ifndef MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE
291 #define MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE (16<<10)
292 #endif
293 
294 #ifndef MICROPROFILE_GPU_TIMERS
295 #define MICROPROFILE_GPU_TIMERS 1
296 #endif
297 
298 #ifndef MICROPROFILE_GPU_FRAME_DELAY
299 #define MICROPROFILE_GPU_FRAME_DELAY 3 //must be > 0
300 #endif
301 
302 
303 #ifndef MICROPROFILE_NAME_MAX_LEN
304 #define MICROPROFILE_NAME_MAX_LEN 64
305 #endif
306 
307 #define MICROPROFILE_FORCEENABLECPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeCpu)
308 #define MICROPROFILE_FORCEDISABLECPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeCpu)
309 #define MICROPROFILE_FORCEENABLEGPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeGpu)
310 #define MICROPROFILE_FORCEDISABLEGPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeGpu)
311 
312 #define MICROPROFILE_INVALID_TICK ((uint64_t)-1)
313 #define MICROPROFILE_GROUP_MASK_ALL 0xffffffffffff
314 
315 
316 #define MICROPROFILE_INVALID_TOKEN (uint64_t)-1
317 
318 enum MicroProfileTokenType
319 {
320     MicroProfileTokenTypeCpu,
321     MicroProfileTokenTypeGpu,
322 };
323 
324 enum MicroProfileBoxType
325 {
326     MicroProfileBoxTypeBar,
327     MicroProfileBoxTypeFlat,
328 };
329 
330 
331 
332 struct MicroProfile;
333 
334 MICROPROFILE_API void MicroProfileInit();
335 MICROPROFILE_API void MicroProfileShutdown();
336 MICROPROFILE_API MicroProfileToken MicroProfileFindToken(const char* sGroup, const char* sName);
337 MICROPROFILE_API MicroProfileToken MicroProfileGetToken(const char* sGroup, const char* sName, uint32_t nColor, MicroProfileTokenType Token = MicroProfileTokenTypeCpu);
338 MICROPROFILE_API MicroProfileToken MicroProfileGetMetaToken(const char* pName);
339 MICROPROFILE_API void MicroProfileMetaUpdate(MicroProfileToken, int nCount, MicroProfileTokenType eTokenType);
340 MICROPROFILE_API uint64_t MicroProfileEnter(MicroProfileToken nToken);
341 MICROPROFILE_API void MicroProfileLeave(MicroProfileToken nToken, uint64_t nTick);
342 MICROPROFILE_API uint64_t MicroProfileGpuEnter(MicroProfileToken nToken);
343 MICROPROFILE_API void MicroProfileGpuLeave(MicroProfileToken nToken, uint64_t nTick);
MicroProfileGetTimerIndex(MicroProfileToken t)344 inline uint16_t MicroProfileGetTimerIndex(MicroProfileToken t){ return (t&0xffff); }
MicroProfileGetGroupMask(MicroProfileToken t)345 inline uint64_t MicroProfileGetGroupMask(MicroProfileToken t){ return ((t>>16)&MICROPROFILE_GROUP_MASK_ALL);}
MicroProfileMakeToken(uint64_t nGroupMask,uint16_t nTimer)346 inline MicroProfileToken MicroProfileMakeToken(uint64_t nGroupMask, uint16_t nTimer){ return (nGroupMask<<16) | nTimer;}
347 
348 MICROPROFILE_API void MicroProfileFlip(); //! call once per frame.
349 MICROPROFILE_API void MicroProfileTogglePause();
350 MICROPROFILE_API void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type);
351 MICROPROFILE_API void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type);
352 MICROPROFILE_API float MicroProfileGetTime(const char* pGroup, const char* pName);
353 MICROPROFILE_API void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu);
354 MICROPROFILE_API void MicroProfileOnThreadCreate(const char* pThreadName); //should be called from newly created threads
355 MICROPROFILE_API void MicroProfileOnThreadExit(); //call on exit to reuse log
356 MICROPROFILE_API void MicroProfileInitThreadLog();
357 MICROPROFILE_API void MicroProfileSetForceEnable(bool bForceEnable);
358 MICROPROFILE_API bool MicroProfileGetForceEnable();
359 MICROPROFILE_API void MicroProfileSetEnableAllGroups(bool bEnable);
360 MICROPROFILE_API void MicroProfileEnableCategory(const char* pCategory);
361 MICROPROFILE_API void MicroProfileDisableCategory(const char* pCategory);
362 MICROPROFILE_API bool MicroProfileGetEnableAllGroups();
363 MICROPROFILE_API void MicroProfileSetForceMetaCounters(bool bEnable);
364 MICROPROFILE_API bool MicroProfileGetForceMetaCounters();
365 MICROPROFILE_API void MicroProfileEnableMetaCounter(const char* pMet);
366 MICROPROFILE_API void MicroProfileDisableMetaCounter(const char* pMet);
367 MICROPROFILE_API void MicroProfileSetAggregateFrames(int frames);
368 MICROPROFILE_API int MicroProfileGetAggregateFrames();
369 MICROPROFILE_API int MicroProfileGetCurrentAggregateFrames();
370 MICROPROFILE_API MicroProfile* MicroProfileGet();
371 MICROPROFILE_API void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2]);
372 MICROPROFILE_API std::recursive_mutex& MicroProfileGetMutex();
373 MICROPROFILE_API void MicroProfileStartContextSwitchTrace();
374 MICROPROFILE_API void MicroProfileStopContextSwitchTrace();
375 MICROPROFILE_API bool MicroProfileIsLocalThread(uint32_t nThreadId);
376 
377 
378 #if MICROPROFILE_WEBSERVER
379 MICROPROFILE_API void MicroProfileDumpFile(const char* pHtml, const char* pCsv);
380 MICROPROFILE_API uint32_t MicroProfileWebServerPort();
381 #else
382 #define MicroProfileDumpFile(c) do{} while(0)
383 #define MicroProfileWebServerPort() ((uint32_t)-1)
384 #endif
385 
386 
387 
388 
389 #if MICROPROFILE_GPU_TIMERS
390 MICROPROFILE_API uint32_t MicroProfileGpuInsertTimeStamp();
391 MICROPROFILE_API uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey);
392 MICROPROFILE_API uint64_t MicroProfileTicksPerSecondGpu();
393 MICROPROFILE_API int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu);
394 #else
395 #define MicroProfileGpuInsertTimeStamp() 1
396 #define MicroProfileGpuGetTimeStamp(a) 0
397 #define MicroProfileTicksPerSecondGpu() 1
398 #define MicroProfileGetGpuTickReference(a,b) 0
399 #endif
400 
401 #if MICROPROFILE_GPU_TIMERS_D3D11
402 #define MICROPROFILE_D3D_MAX_QUERIES (8<<10)
403 MICROPROFILE_API void MicroProfileGpuInitD3D11(void* pDevice, void* pDeviceContext);
404 #endif
405 
406 #if MICROPROFILE_GPU_TIMERS_GL
407 #define MICROPROFILE_GL_MAX_QUERIES (8<<10)
408 MICROPROFILE_API void MicroProfileGpuInitGL();
409 #endif
410 
411 
412 
413 #if MICROPROFILE_USE_THREAD_NAME_CALLBACK
414 MICROPROFILE_API const char* MicroProfileGetThreadName();
415 #else
416 #define MicroProfileGetThreadName() "<implement MicroProfileGetThreadName to get threadnames>"
417 #endif
418 
419 #if !defined(MICROPROFILE_THREAD_NAME_FROM_ID)
420 #define MICROPROFILE_THREAD_NAME_FROM_ID(a) ""
421 #endif
422 
423 
424 struct MicroProfileScopeHandler
425 {
426     MicroProfileToken nToken;
427     uint64_t nTick;
MicroProfileScopeHandlerMicroProfileScopeHandler428     MicroProfileScopeHandler(MicroProfileToken Token):nToken(Token)
429     {
430         nTick = MicroProfileEnter(nToken);
431     }
~MicroProfileScopeHandlerMicroProfileScopeHandler432     ~MicroProfileScopeHandler()
433     {
434         MicroProfileLeave(nToken, nTick);
435     }
436 };
437 
438 struct MicroProfileScopeGpuHandler
439 {
440     MicroProfileToken nToken;
441     uint64_t nTick;
MicroProfileScopeGpuHandlerMicroProfileScopeGpuHandler442     MicroProfileScopeGpuHandler(MicroProfileToken Token):nToken(Token)
443     {
444         nTick = MicroProfileGpuEnter(nToken);
445     }
~MicroProfileScopeGpuHandlerMicroProfileScopeGpuHandler446     ~MicroProfileScopeGpuHandler()
447     {
448         MicroProfileGpuLeave(nToken, nTick);
449     }
450 };
451 
452 
453 
454 #define MICROPROFILE_MAX_TIMERS 1024
455 #define MICROPROFILE_MAX_GROUPS 48 //dont bump! no. of bits used it bitmask
456 #define MICROPROFILE_MAX_CATEGORIES 16
457 #define MICROPROFILE_MAX_GRAPHS 5
458 #define MICROPROFILE_GRAPH_HISTORY 128
459 #define MICROPROFILE_BUFFER_SIZE ((MICROPROFILE_PER_THREAD_BUFFER_SIZE)/sizeof(MicroProfileLogEntry))
460 #define MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS 256
461 #define MICROPROFILE_STACK_MAX 32
462 //#define MICROPROFILE_MAX_PRESETS 5
463 #define MICROPROFILE_ANIM_DELAY_PRC 0.5f
464 #define MICROPROFILE_GAP_TIME 50 //extra ms to fetch to close timers from earlier frames
465 
466 
467 #ifndef MICROPROFILE_MAX_THREADS
468 #define MICROPROFILE_MAX_THREADS 32
469 #endif
470 
471 #ifndef MICROPROFILE_UNPACK_RED
472 #define MICROPROFILE_UNPACK_RED(c) ((c)>>16)
473 #endif
474 
475 #ifndef MICROPROFILE_UNPACK_GREEN
476 #define MICROPROFILE_UNPACK_GREEN(c) ((c)>>8)
477 #endif
478 
479 #ifndef MICROPROFILE_UNPACK_BLUE
480 #define MICROPROFILE_UNPACK_BLUE(c) ((c))
481 #endif
482 
483 #ifndef MICROPROFILE_DEFAULT_PRESET
484 #define MICROPROFILE_DEFAULT_PRESET "Default"
485 #endif
486 
487 
488 #ifndef MICROPROFILE_CONTEXT_SWITCH_TRACE
489 #if defined(_WIN32)
490 #define MICROPROFILE_CONTEXT_SWITCH_TRACE 1
491 #elif defined(__APPLE__)
492 #define MICROPROFILE_CONTEXT_SWITCH_TRACE 0 //disabled until dtrace script is working.
493 #else
494 #define MICROPROFILE_CONTEXT_SWITCH_TRACE 0
495 #endif
496 #endif
497 
498 #if MICROPROFILE_CONTEXT_SWITCH_TRACE
499 #define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (128*1024) //2mb with 16 byte entry size
500 #else
501 #define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (1)
502 #endif
503 
504 #ifndef MICROPROFILE_MINIZ
505 #define MICROPROFILE_MINIZ 0
506 #endif
507 
508 #ifdef _WIN32
509 #include <basetsd.h>
510 typedef UINT_PTR MpSocket;
511 #else
512 typedef int MpSocket;
513 #endif
514 
515 
516 #ifndef _WIN32
517 typedef pthread_t MicroProfileThread;
518 #elif defined(_MSC_VER)
519 typedef HANDLE MicroProfileThread;
520 #else
521 typedef std::thread* MicroProfileThread;
522 #endif
523 
524 
525 
526 enum MicroProfileDrawMask
527 {
528     MP_DRAW_OFF         = 0x0,
529     MP_DRAW_BARS        = 0x1,
530     MP_DRAW_DETAILED    = 0x2,
531     MP_DRAW_HIDDEN      = 0x3,
532 };
533 
534 enum MicroProfileDrawBarsMask
535 {
536     MP_DRAW_TIMERS              = 0x1,
537     MP_DRAW_AVERAGE             = 0x2,
538     MP_DRAW_MAX                 = 0x4,
539     MP_DRAW_CALL_COUNT          = 0x8,
540     MP_DRAW_TIMERS_EXCLUSIVE    = 0x10,
541     MP_DRAW_AVERAGE_EXCLUSIVE   = 0x20,
542     MP_DRAW_MAX_EXCLUSIVE       = 0x40,
543     MP_DRAW_META_FIRST          = 0x80,
544     MP_DRAW_ALL                 = 0xffffffff,
545 
546 };
547 
548 typedef uint64_t MicroProfileLogEntry;
549 
550 struct MicroProfileTimer
551 {
552     uint64_t nTicks;
553     uint32_t nCount;
554 };
555 
556 struct MicroProfileCategory
557 {
558     char pName[MICROPROFILE_NAME_MAX_LEN];
559     uint64_t nGroupMask;
560 };
561 
562 struct MicroProfileGroupInfo
563 {
564     char pName[MICROPROFILE_NAME_MAX_LEN];
565     uint32_t nNameLen;
566     uint32_t nGroupIndex;
567     uint32_t nNumTimers;
568     uint32_t nMaxTimerNameLen;
569     uint32_t nColor;
570     uint32_t nCategory;
571     MicroProfileTokenType Type;
572 };
573 
574 struct MicroProfileTimerInfo
575 {
576     MicroProfileToken nToken;
577     uint32_t nTimerIndex;
578     uint32_t nGroupIndex;
579     char pName[MICROPROFILE_NAME_MAX_LEN];
580     uint32_t nNameLen;
581     uint32_t nColor;
582     bool bGraph;
583 };
584 
585 struct MicroProfileGraphState
586 {
587     int64_t nHistory[MICROPROFILE_GRAPH_HISTORY];
588     MicroProfileToken nToken;
589     int32_t nKey;
590 };
591 
592 struct MicroProfileContextSwitch
593 {
594     ThreadIdType nThreadOut;
595     ThreadIdType nThreadIn;
596     int64_t nCpu : 8;
597     int64_t nTicks : 56;
598 };
599 
600 
601 struct MicroProfileFrameState
602 {
603     int64_t nFrameStartCpu;
604     int64_t nFrameStartGpu;
605     uint32_t nLogStart[MICROPROFILE_MAX_THREADS];
606 };
607 
608 struct MicroProfileThreadLog
609 {
610     std::array<MicroProfileLogEntry, MICROPROFILE_BUFFER_SIZE> Log{};
611 
612     std::atomic<uint32_t>   nPut{0};
613     std::atomic<uint32_t>   nGet{0};
614     uint32_t                nActive = 0;
615     uint32_t                nGpu = 0;
616     ThreadIdType            nThreadId{};
617 
618     std::array<uint32_t, MICROPROFILE_STACK_MAX> nStack{};
619     std::array<int64_t, MICROPROFILE_STACK_MAX>  nChildTickStack{};
620     uint32_t                                     nStackPos = 0;
621 
622 
623     std::array<uint8_t, MICROPROFILE_MAX_GROUPS> nGroupStackPos{};
624     std::array<int64_t, MICROPROFILE_MAX_GROUPS> nGroupTicks{};
625     std::array<int64_t, MICROPROFILE_MAX_GROUPS> nAggregateGroupTicks{};
626     enum
627     {
628         THREAD_MAX_LEN = 64,
629     };
630     char                    ThreadName[64]{};
631     int                     nFreeListNext = 0;
632 
ResetMicroProfileThreadLog633     void Reset() {
634         Log.fill({});
635         nPut = 0;
636         nGet = 0;
637         nActive = 0;
638         nGpu = 0;
639         nThreadId = {};
640         nStack.fill(0);
641         nChildTickStack.fill(0);
642         nStackPos = 0;
643         nGroupStackPos.fill(0);
644         nGroupTicks.fill(0);
645         nAggregateGroupTicks.fill(0);
646         std::fill(std::begin(ThreadName), std::end(ThreadName), '\0');
647         nFreeListNext = 0;
648     }
649 };
650 
651 #if MICROPROFILE_GPU_TIMERS_D3D11
652 struct MicroProfileD3D11Frame
653 {
654     uint32_t m_nQueryStart;
655     uint32_t m_nQueryCount;
656     uint32_t m_nRateQueryStarted;
657     void* m_pRateQuery;
658 };
659 
660 struct MicroProfileGpuTimerState
661 {
662     uint32_t bInitialized;
663     void* m_pDevice;
664     void* m_pDeviceContext;
665     void* m_pQueries[MICROPROFILE_D3D_MAX_QUERIES];
666     int64_t m_nQueryResults[MICROPROFILE_D3D_MAX_QUERIES];
667     uint32_t m_nQueryPut;
668     uint32_t m_nQueryGet;
669     uint32_t m_nQueryFrame;
670     int64_t m_nQueryFrequency;
671     MicroProfileD3D11Frame m_QueryFrames[MICROPROFILE_GPU_FRAME_DELAY];
672 };
673 #elif MICROPROFILE_GPU_TIMERS_GL
674 struct MicroProfileGpuTimerState
675 {
676     uint32_t GLTimers[MICROPROFILE_GL_MAX_QUERIES];
677     uint32_t GLTimerPos;
678 };
679 #else
680 struct MicroProfileGpuTimerState{};
681 #endif
682 
683 struct MicroProfile
684 {
685     uint32_t nTotalTimers;
686     uint32_t nGroupCount;
687     uint32_t nCategoryCount;
688     uint32_t nAggregateClear;
689     uint32_t nAggregateFlip;
690     uint32_t nAggregateFlipCount;
691     uint32_t nAggregateFrames;
692 
693     uint64_t nAggregateFlipTick;
694 
695     uint32_t nDisplay;
696     uint32_t nBars;
697     uint64_t nActiveGroup;
698     uint32_t nActiveBars;
699 
700     uint64_t nForceGroup;
701     uint32_t nForceEnable;
702     uint32_t nForceMetaCounters;
703 
704     uint64_t nForceGroupUI;
705     uint64_t nActiveGroupWanted;
706     uint32_t nAllGroupsWanted;
707     uint32_t nAllThreadsWanted;
708 
709     uint32_t nOverflow;
710 
711     uint64_t nGroupMask;
712     uint32_t nRunning;
713     uint32_t nToggleRunning;
714     uint32_t nMaxGroupSize;
715     uint32_t nDumpFileNextFrame;
716     uint32_t nAutoClearFrames;
717     char HtmlDumpPath[512];
718     char CsvDumpPath[512];
719 
720     int64_t nPauseTicks;
721 
722     float fReferenceTime;
723     float fRcpReferenceTime;
724 
725     MicroProfileCategory    CategoryInfo[MICROPROFILE_MAX_CATEGORIES];
726     MicroProfileGroupInfo   GroupInfo[MICROPROFILE_MAX_GROUPS];
727     MicroProfileTimerInfo   TimerInfo[MICROPROFILE_MAX_TIMERS];
728     uint8_t                 TimerToGroup[MICROPROFILE_MAX_TIMERS];
729 
730     MicroProfileTimer       AccumTimers[MICROPROFILE_MAX_TIMERS];
731     uint64_t                AccumMaxTimers[MICROPROFILE_MAX_TIMERS];
732     uint64_t                AccumTimersExclusive[MICROPROFILE_MAX_TIMERS];
733     uint64_t                AccumMaxTimersExclusive[MICROPROFILE_MAX_TIMERS];
734 
735     MicroProfileTimer       Frame[MICROPROFILE_MAX_TIMERS];
736     uint64_t                FrameExclusive[MICROPROFILE_MAX_TIMERS];
737 
738     MicroProfileTimer       Aggregate[MICROPROFILE_MAX_TIMERS];
739     uint64_t                AggregateMax[MICROPROFILE_MAX_TIMERS];
740     uint64_t                AggregateExclusive[MICROPROFILE_MAX_TIMERS];
741     uint64_t                AggregateMaxExclusive[MICROPROFILE_MAX_TIMERS];
742 
743 
744     uint64_t                FrameGroup[MICROPROFILE_MAX_GROUPS];
745     uint64_t                AccumGroup[MICROPROFILE_MAX_GROUPS];
746     uint64_t                AccumGroupMax[MICROPROFILE_MAX_GROUPS];
747 
748     uint64_t                AggregateGroup[MICROPROFILE_MAX_GROUPS];
749     uint64_t                AggregateGroupMax[MICROPROFILE_MAX_GROUPS];
750 
751 
752     struct
753     {
754         uint64_t nCounters[MICROPROFILE_MAX_TIMERS];
755 
756         uint64_t nAccum[MICROPROFILE_MAX_TIMERS];
757         uint64_t nAccumMax[MICROPROFILE_MAX_TIMERS];
758 
759         uint64_t nAggregate[MICROPROFILE_MAX_TIMERS];
760         uint64_t nAggregateMax[MICROPROFILE_MAX_TIMERS];
761 
762         uint64_t nSum;
763         uint64_t nSumAccum;
764         uint64_t nSumAccumMax;
765         uint64_t nSumAggregate;
766         uint64_t nSumAggregateMax;
767 
768         const char* pName;
769     } MetaCounters[MICROPROFILE_META_MAX];
770 
771     MicroProfileGraphState  Graph[MICROPROFILE_MAX_GRAPHS];
772     uint32_t                nGraphPut;
773 
774     uint32_t                nThreadActive[MICROPROFILE_MAX_THREADS];
775     MicroProfileThreadLog*  Pool[MICROPROFILE_MAX_THREADS];
776     uint32_t                nNumLogs;
777     uint32_t                nMemUsage;
778     int                     nFreeListHead;
779 
780     uint32_t                nFrameCurrent;
781     uint32_t                nFrameCurrentIndex;
782     uint32_t                nFramePut;
783     uint64_t                nFramePutIndex;
784 
785     MicroProfileFrameState Frames[MICROPROFILE_MAX_FRAME_HISTORY];
786 
787     uint64_t                nFlipTicks;
788     uint64_t                nFlipAggregate;
789     uint64_t                nFlipMax;
790     uint64_t                nFlipAggregateDisplay;
791     uint64_t                nFlipMaxDisplay;
792 
793     MicroProfileThread          ContextSwitchThread;
794     bool                        bContextSwitchRunning;
795     bool                        bContextSwitchStop;
796     bool                        bContextSwitchAllThreads;
797     bool                        bContextSwitchNoBars;
798     uint32_t                    nContextSwitchUsage;
799     uint32_t                    nContextSwitchLastPut;
800 
801     int64_t                     nContextSwitchHoverTickIn;
802     int64_t                     nContextSwitchHoverTickOut;
803     uint32_t                    nContextSwitchHoverThread;
804     uint32_t                    nContextSwitchHoverThreadBefore;
805     uint32_t                    nContextSwitchHoverThreadAfter;
806     uint8_t                     nContextSwitchHoverCpu;
807     uint8_t                     nContextSwitchHoverCpuNext;
808 
809     uint32_t                    nContextSwitchPut;
810     MicroProfileContextSwitch   ContextSwitch[MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE];
811 
812 
813     MpSocket                    ListenerSocket;
814     uint32_t                    nWebServerPort;
815 
816     char                        WebServerBuffer[MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE];
817     uint32_t                    WebServerPut;
818 
819     uint64_t                    nWebServerDataSent;
820 
821     MicroProfileGpuTimerState   GPU;
822 
823 
824 };
825 
826 #define MP_LOG_TICK_MASK  0x0000ffffffffffff
827 #define MP_LOG_INDEX_MASK 0x3fff000000000000
828 #define MP_LOG_BEGIN_MASK 0xc000000000000000
829 #define MP_LOG_GPU_EXTRA 0x3
830 #define MP_LOG_META 0x2
831 #define MP_LOG_ENTER 0x1
832 #define MP_LOG_LEAVE 0x0
833 
834 
MicroProfileLogType(MicroProfileLogEntry Index)835 inline int MicroProfileLogType(MicroProfileLogEntry Index)
836 {
837     return ((MP_LOG_BEGIN_MASK & Index)>>62) & 0x3;
838 }
839 
MicroProfileLogTimerIndex(MicroProfileLogEntry Index)840 inline uint64_t MicroProfileLogTimerIndex(MicroProfileLogEntry Index)
841 {
842     return (0x3fff&(Index>>48));
843 }
844 
MicroProfileMakeLogIndex(uint64_t nBegin,MicroProfileToken nToken,int64_t nTick)845 inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick)
846 {
847     MicroProfileLogEntry Entry =  (nBegin<<62) | ((0x3fff&nToken)<<48) | (MP_LOG_TICK_MASK&nTick);
848     int t = MicroProfileLogType(Entry);
849     uint64_t nTimerIndex = MicroProfileLogTimerIndex(Entry);
850     MP_ASSERT(t == nBegin);
851     MP_ASSERT(nTimerIndex == (nToken&0x3fff));
852     return Entry;
853 
854 }
855 
MicroProfileLogTickDifference(MicroProfileLogEntry Start,MicroProfileLogEntry End)856 inline int64_t MicroProfileLogTickDifference(MicroProfileLogEntry Start, MicroProfileLogEntry End)
857 {
858     uint64_t nStart = Start;
859     uint64_t nEnd = End;
860     int64_t nDifference = ((nEnd<<16) - (nStart<<16));
861     return nDifference >> 16;
862 }
863 
MicroProfileLogGetTick(MicroProfileLogEntry e)864 inline int64_t MicroProfileLogGetTick(MicroProfileLogEntry e)
865 {
866     return MP_LOG_TICK_MASK & e;
867 }
868 
MicroProfileLogSetTick(MicroProfileLogEntry e,int64_t nTick)869 inline int64_t MicroProfileLogSetTick(MicroProfileLogEntry e, int64_t nTick)
870 {
871     return (MP_LOG_TICK_MASK & nTick) | (e & ~MP_LOG_TICK_MASK);
872 }
873 
874 template<typename T>
MicroProfileMin(T a,T b)875 T MicroProfileMin(T a, T b)
876 { return a < b ? a : b; }
877 
878 template<typename T>
MicroProfileMax(T a,T b)879 T MicroProfileMax(T a, T b)
880 { return a > b ? a : b; }
881 
MicroProfileMsToTick(float fMs,int64_t nTicksPerSecond)882 inline int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond)
883 {
884     return (int64_t)(fMs*0.001f*nTicksPerSecond);
885 }
886 
MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond)887 inline float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond)
888 {
889     return 1000.f / nTicksPerSecond;
890 }
891 
MicroProfileGetGroupIndex(MicroProfileToken t)892 inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t)
893 {
894     return (uint16_t)MicroProfileGet()->TimerToGroup[MicroProfileGetTimerIndex(t)];
895 }
896 
897 
898 
899 #ifdef MICROPROFILE_IMPL
900 
901 #ifdef _WIN32
902 #include <windows.h>
903 #define snprintf _snprintf
904 
905 #pragma warning(push)
906 #pragma warning(disable: 4244)
MicroProfileTicksPerSecondCpu()907 int64_t MicroProfileTicksPerSecondCpu()
908 {
909     static int64_t nTicksPerSecond = 0;
910     if(nTicksPerSecond == 0)
911     {
912         QueryPerformanceFrequency((LARGE_INTEGER*)&nTicksPerSecond);
913     }
914     return nTicksPerSecond;
915 }
MicroProfileGetTick()916 int64_t MicroProfileGetTick()
917 {
918     int64_t ticks;
919     QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
920     return ticks;
921 }
922 
923 #endif
924 
925 #if defined(MICROPROFILE_WEBSERVER) || defined(MICROPROFILE_CONTEXT_SWITCH_TRACE)
926 
927 
928 typedef void* (*MicroProfileThreadFunc)(void*);
929 
930 #ifndef _WIN32
931 typedef pthread_t MicroProfileThread;
MicroProfileThreadStart(MicroProfileThread * pThread,MicroProfileThreadFunc Func)932 void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
933 {
934     pthread_attr_t Attr;
935     int r  = pthread_attr_init(&Attr);
936     MP_ASSERT(r == 0);
937     pthread_create(pThread, &Attr, Func, 0);
938 }
MicroProfileThreadJoin(MicroProfileThread * pThread)939 void MicroProfileThreadJoin(MicroProfileThread* pThread)
940 {
941     int r = pthread_join(*pThread, 0);
942     MP_ASSERT(r == 0);
943 }
944 #elif defined(_MSC_VER)
945 typedef HANDLE MicroProfileThread;
ThreadTrampoline(void * pFunc)946 DWORD _stdcall ThreadTrampoline(void* pFunc)
947 {
948     MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc;
949 
950     // The return value of F will always return a void*, however, this is for
951     // compatibility with pthreads. The underlying "address" of the pointer
952     // is always a 32-bit value, so this cast is safe to perform.
953     return static_cast<DWORD>(reinterpret_cast<uint64_t>(F(0)));
954 }
955 
MicroProfileThreadStart(MicroProfileThread * pThread,MicroProfileThreadFunc Func)956 void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
957 {
958     *pThread = CreateThread(0, 0, ThreadTrampoline, Func, 0, 0);
959 }
MicroProfileThreadJoin(MicroProfileThread * pThread)960 void MicroProfileThreadJoin(MicroProfileThread* pThread)
961 {
962     WaitForSingleObject(*pThread, INFINITE);
963     CloseHandle(*pThread);
964 }
965 #else
966 #include <thread>
967 typedef std::thread* MicroProfileThread;
MicroProfileThreadStart(MicroProfileThread * pThread,MicroProfileThreadFunc Func)968 inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
969 {
970     *pThread = new std::thread(Func, nullptr);
971 }
MicroProfileThreadJoin(MicroProfileThread * pThread)972 inline void MicroProfileThreadJoin(MicroProfileThread* pThread)
973 {
974     (*pThread)->join();
975     delete *pThread;
976 }
977 #endif
978 #endif
979 
980 #if MICROPROFILE_WEBSERVER
981 
982 #ifdef _WIN32
983 #define MP_INVALID_SOCKET(f) (f == INVALID_SOCKET)
984 #endif
985 
986 #ifndef _WIN32
987 #include <sys/socket.h>
988 #include <netinet/in.h>
989 #include <fcntl.h>
990 #define MP_INVALID_SOCKET(f) (f < 0)
991 #endif
992 
993 
994 void MicroProfileWebServerStart();
995 void MicroProfileWebServerStop();
996 bool MicroProfileWebServerUpdate();
997 void MicroProfileDumpToFile();
998 
999 #else
1000 
1001 #define MicroProfileWebServerStart() do{}while(0)
1002 #define MicroProfileWebServerStop() do{}while(0)
1003 #define MicroProfileWebServerUpdate() false
1004 #define MicroProfileDumpToFile() do{} while(0)
1005 #endif
1006 
1007 
1008 #if MICROPROFILE_GPU_TIMERS_D3D11
1009 void MicroProfileGpuFlip();
1010 void MicroProfileGpuShutdown();
1011 #else
1012 #define MicroProfileGpuFlip() do{}while(0)
1013 #define MicroProfileGpuShutdown() do{}while(0)
1014 #endif
1015 
1016 
1017 
1018 #include <stdlib.h>
1019 #include <stdio.h>
1020 #include <math.h>
1021 #include <algorithm>
1022 
1023 
1024 #ifndef MICROPROFILE_DEBUG
1025 #define MICROPROFILE_DEBUG 0
1026 #endif
1027 
1028 
1029 #define S g_MicroProfile
1030 
1031 MicroProfile g_MicroProfile;
1032 MicroProfileThreadLog*          g_MicroProfileGpuLog = 0;
1033 #ifdef MICROPROFILE_IOS
1034 // iOS doesn't support __thread
1035 static pthread_key_t g_MicroProfileThreadLogKey;
1036 static pthread_once_t g_MicroProfileThreadLogKeyOnce = PTHREAD_ONCE_INIT;
MicroProfileCreateThreadLogKey()1037 static void MicroProfileCreateThreadLogKey()
1038 {
1039     pthread_key_create(&g_MicroProfileThreadLogKey, NULL);
1040 }
1041 #else
1042 MP_THREAD_LOCAL MicroProfileThreadLog* g_MicroProfileThreadLog = 0;
1043 #endif
1044 static std::atomic<bool> g_bUseLock{false}; /// This is used because windows does not support using mutexes under dll init(which is where global initialization is handled)
1045 
1046 
1047 MICROPROFILE_DEFINE(g_MicroProfileFlip, "MicroProfile", "MicroProfileFlip", 0x3355ee);
1048 MICROPROFILE_DEFINE(g_MicroProfileThreadLoop, "MicroProfile", "ThreadLoop", 0x3355ee);
1049 MICROPROFILE_DEFINE(g_MicroProfileClear, "MicroProfile", "Clear", 0x3355ee);
1050 MICROPROFILE_DEFINE(g_MicroProfileAccumulate, "MicroProfile", "Accumulate", 0x3355ee);
1051 MICROPROFILE_DEFINE(g_MicroProfileContextSwitchSearch,"MicroProfile", "ContextSwitchSearch", 0xDD7300);
1052 
MicroProfileMutex()1053 inline std::recursive_mutex& MicroProfileMutex()
1054 {
1055     static std::recursive_mutex Mutex;
1056     return Mutex;
1057 }
MicroProfileGetMutex()1058 std::recursive_mutex& MicroProfileGetMutex()
1059 {
1060     return MicroProfileMutex();
1061 }
1062 
MicroProfileGet()1063 MICROPROFILE_API MicroProfile* MicroProfileGet()
1064 {
1065     return &g_MicroProfile;
1066 }
1067 
1068 
1069 MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName);
1070 
1071 
MicroProfileInit()1072 void MicroProfileInit()
1073 {
1074     std::recursive_mutex& mutex = MicroProfileMutex();
1075     bool bUseLock = g_bUseLock;
1076     if(bUseLock)
1077         mutex.lock();
1078     static bool bOnce = true;
1079     if(bOnce)
1080     {
1081         S.nMemUsage += sizeof(S);
1082         bOnce = false;
1083         memset(&S, 0, sizeof(S));
1084         for(int i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
1085         {
1086             S.GroupInfo[i].pName[0] = '\0';
1087         }
1088         for(int i = 0; i < MICROPROFILE_MAX_CATEGORIES; ++i)
1089         {
1090             S.CategoryInfo[i].pName[0] = '\0';
1091             S.CategoryInfo[i].nGroupMask = 0;
1092         }
1093         strcpy(&S.CategoryInfo[0].pName[0], "default");
1094         S.nCategoryCount = 1;
1095         for(int i = 0; i < MICROPROFILE_MAX_TIMERS; ++i)
1096         {
1097             S.TimerInfo[i].pName[0] = '\0';
1098         }
1099         S.nGroupCount = 0;
1100         S.nAggregateFlipTick = MP_TICK();
1101         S.nActiveGroup = 0;
1102         S.nActiveBars = 0;
1103         S.nForceGroup = 0;
1104         S.nAllGroupsWanted = 0;
1105         S.nActiveGroupWanted = 0;
1106         S.nAllThreadsWanted = 1;
1107         S.nAggregateFlip = 0;
1108         S.nTotalTimers = 0;
1109         for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
1110         {
1111             S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
1112         }
1113         S.nRunning = 1;
1114         S.fReferenceTime = 33.33f;
1115         S.fRcpReferenceTime = 1.f / S.fReferenceTime;
1116         S.nFreeListHead = -1;
1117         int64_t nTick = MP_TICK();
1118         for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
1119         {
1120             S.Frames[i].nFrameStartCpu = nTick;
1121             S.Frames[i].nFrameStartGpu = -1;
1122         }
1123 
1124         MicroProfileThreadLog* pGpu = MicroProfileCreateThreadLog("GPU");
1125         g_MicroProfileGpuLog = pGpu;
1126         MP_ASSERT(S.Pool[0] == pGpu);
1127         pGpu->nGpu = 1;
1128         pGpu->nThreadId = 0;
1129 
1130         S.nWebServerDataSent = (uint64_t)-1;
1131     }
1132     if(bUseLock)
1133         mutex.unlock();
1134 }
1135 
MicroProfileShutdown()1136 void MicroProfileShutdown()
1137 {
1138     std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1139     MicroProfileWebServerStop();
1140     MicroProfileStopContextSwitchTrace();
1141     MicroProfileGpuShutdown();
1142 }
1143 
1144 #ifdef MICROPROFILE_IOS
MicroProfileGetThreadLog()1145 inline MicroProfileThreadLog* MicroProfileGetThreadLog()
1146 {
1147     pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
1148     return (MicroProfileThreadLog*)pthread_getspecific(g_MicroProfileThreadLogKey);
1149 }
1150 
MicroProfileSetThreadLog(MicroProfileThreadLog * pLog)1151 inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
1152 {
1153     pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
1154     pthread_setspecific(g_MicroProfileThreadLogKey, pLog);
1155 }
1156 #else
MicroProfileGetThreadLog()1157 MicroProfileThreadLog* MicroProfileGetThreadLog()
1158 {
1159     return g_MicroProfileThreadLog;
1160 }
MicroProfileSetThreadLog(MicroProfileThreadLog * pLog)1161 inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
1162 {
1163     g_MicroProfileThreadLog = pLog;
1164 }
1165 #endif
1166 
1167 
MicroProfileCreateThreadLog(const char * pName)1168 MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName)
1169 {
1170     MicroProfileThreadLog* pLog = 0;
1171     if(S.nFreeListHead != -1)
1172     {
1173         pLog = S.Pool[S.nFreeListHead];
1174         MP_ASSERT(pLog->nPut.load() == 0);
1175         MP_ASSERT(pLog->nGet.load() == 0);
1176         S.nFreeListHead = S.Pool[S.nFreeListHead]->nFreeListNext;
1177         pLog->Reset();
1178     }
1179     else
1180     {
1181         pLog = new MicroProfileThreadLog;
1182         S.nMemUsage += sizeof(MicroProfileThreadLog);
1183         S.Pool[S.nNumLogs++] = pLog;
1184     }
1185     int len = (int)strlen(pName);
1186     int maxlen = sizeof(pLog->ThreadName)-1;
1187     len = len < maxlen ? len : maxlen;
1188     memcpy(&pLog->ThreadName[0], pName, len);
1189     pLog->ThreadName[len] = '\0';
1190     pLog->nThreadId = MP_GETCURRENTTHREADID();
1191     pLog->nFreeListNext = -1;
1192     pLog->nActive = 1;
1193     return pLog;
1194 }
1195 
MicroProfileOnThreadCreate(const char * pThreadName)1196 void MicroProfileOnThreadCreate(const char* pThreadName)
1197 {
1198     g_bUseLock = true;
1199     MicroProfileInit();
1200     std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1201     MP_ASSERT(MicroProfileGetThreadLog() == 0);
1202     MicroProfileThreadLog* pLog = MicroProfileCreateThreadLog(pThreadName ? pThreadName : MicroProfileGetThreadName());
1203     MP_ASSERT(pLog);
1204     MicroProfileSetThreadLog(pLog);
1205 }
1206 
MicroProfileOnThreadExit()1207 void MicroProfileOnThreadExit()
1208 {
1209     std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1210     MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
1211     if(pLog)
1212     {
1213         int32_t nLogIndex = -1;
1214         for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1215         {
1216             if(pLog == S.Pool[i])
1217             {
1218                 nLogIndex = i;
1219                 break;
1220             }
1221         }
1222         MP_ASSERT(nLogIndex < MICROPROFILE_MAX_THREADS && nLogIndex > 0);
1223         pLog->nFreeListNext = S.nFreeListHead;
1224         pLog->nActive = 0;
1225         pLog->nPut.store(0);
1226         pLog->nGet.store(0);
1227         S.nFreeListHead = nLogIndex;
1228         for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
1229         {
1230             S.Frames[i].nLogStart[nLogIndex] = 0;
1231         }
1232         pLog->nGroupStackPos.fill(0);
1233         pLog->nGroupTicks.fill(0);
1234     }
1235 }
1236 
MicroProfileInitThreadLog()1237 void MicroProfileInitThreadLog()
1238 {
1239     MicroProfileOnThreadCreate(nullptr);
1240 }
1241 
1242 
1243 struct MicroProfileScopeLock
1244 {
1245     bool bUseLock;
1246     std::recursive_mutex& m;
MicroProfileScopeLockMicroProfileScopeLock1247     MicroProfileScopeLock(std::recursive_mutex& m) : bUseLock(g_bUseLock), m(m)
1248     {
1249         if(bUseLock)
1250             m.lock();
1251     }
~MicroProfileScopeLockMicroProfileScopeLock1252     ~MicroProfileScopeLock()
1253     {
1254         if(bUseLock)
1255             m.unlock();
1256     }
1257 };
1258 
MicroProfileFindToken(const char * pGroup,const char * pName)1259 MicroProfileToken MicroProfileFindToken(const char* pGroup, const char* pName)
1260 {
1261     MicroProfileInit();
1262     MicroProfileScopeLock L(MicroProfileMutex());
1263     for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1264     {
1265         if(!MP_STRCASECMP(pName, S.TimerInfo[i].pName) && !MP_STRCASECMP(pGroup, S.GroupInfo[S.TimerToGroup[i]].pName))
1266         {
1267             return S.TimerInfo[i].nToken;
1268         }
1269     }
1270     return MICROPROFILE_INVALID_TOKEN;
1271 }
1272 
MicroProfileGetGroup(const char * pGroup,MicroProfileTokenType Type)1273 uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
1274 {
1275     for(uint32_t i = 0; i < S.nGroupCount; ++i)
1276     {
1277         if(!MP_STRCASECMP(pGroup, S.GroupInfo[i].pName))
1278         {
1279             return i;
1280         }
1281     }
1282     uint16_t nGroupIndex = 0xffff;
1283     uint32_t nLen = (uint32_t)strlen(pGroup);
1284     if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
1285         nLen = MICROPROFILE_NAME_MAX_LEN-1;
1286     memcpy(&S.GroupInfo[S.nGroupCount].pName[0], pGroup, nLen);
1287     S.GroupInfo[S.nGroupCount].pName[nLen] = '\0';
1288     S.GroupInfo[S.nGroupCount].nNameLen = nLen;
1289     S.GroupInfo[S.nGroupCount].nNumTimers = 0;
1290     S.GroupInfo[S.nGroupCount].nGroupIndex = S.nGroupCount;
1291     S.GroupInfo[S.nGroupCount].Type = Type;
1292     S.GroupInfo[S.nGroupCount].nMaxTimerNameLen = 0;
1293     S.GroupInfo[S.nGroupCount].nColor = 0x88888888;
1294     S.GroupInfo[S.nGroupCount].nCategory = 0;
1295     S.CategoryInfo[0].nGroupMask |= (1ll << (uint64_t)S.nGroupCount);
1296     nGroupIndex = S.nGroupCount++;
1297     S.nGroupMask = (S.nGroupMask<<1)|1;
1298     MP_ASSERT(nGroupIndex < MICROPROFILE_MAX_GROUPS);
1299     return nGroupIndex;
1300 }
1301 
MicroProfileRegisterGroup(const char * pGroup,const char * pCategory,uint32_t nColor)1302 void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor)
1303 {
1304     int nCategoryIndex = -1;
1305     for(uint32_t i = 0; i < S.nCategoryCount; ++i)
1306     {
1307         if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
1308         {
1309             nCategoryIndex = (int)i;
1310             break;
1311         }
1312     }
1313     if(-1 == nCategoryIndex && S.nCategoryCount < MICROPROFILE_MAX_CATEGORIES)
1314     {
1315         MP_ASSERT(S.CategoryInfo[S.nCategoryCount].pName[0] == '\0');
1316         nCategoryIndex = (int)S.nCategoryCount++;
1317         uint32_t nLen = (uint32_t)strlen(pCategory);
1318         if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
1319             nLen = MICROPROFILE_NAME_MAX_LEN-1;
1320         memcpy(&S.CategoryInfo[nCategoryIndex].pName[0], pCategory, nLen);
1321         S.CategoryInfo[nCategoryIndex].pName[nLen] = '\0';
1322     }
1323     uint16_t nGroup = MicroProfileGetGroup(pGroup, 0 != MP_STRCASECMP(pGroup, "gpu")?MicroProfileTokenTypeCpu : MicroProfileTokenTypeGpu);
1324     S.GroupInfo[nGroup].nColor = nColor;
1325     if(nCategoryIndex >= 0)
1326     {
1327         uint64_t nBit = 1ll << nGroup;
1328         uint32_t nOldCategory = S.GroupInfo[nGroup].nCategory;
1329         S.CategoryInfo[nOldCategory].nGroupMask &= ~nBit;
1330         S.CategoryInfo[nCategoryIndex].nGroupMask |= nBit;
1331         S.GroupInfo[nGroup].nCategory = nCategoryIndex;
1332     }
1333 }
1334 
MicroProfileGetToken(const char * pGroup,const char * pName,uint32_t nColor,MicroProfileTokenType Type)1335 MicroProfileToken MicroProfileGetToken(const char* pGroup, const char* pName, uint32_t nColor, MicroProfileTokenType Type)
1336 {
1337     MicroProfileInit();
1338     MicroProfileScopeLock L(MicroProfileMutex());
1339     MicroProfileToken ret = MicroProfileFindToken(pGroup, pName);
1340     if(ret != MICROPROFILE_INVALID_TOKEN)
1341         return ret;
1342     uint16_t nGroupIndex = MicroProfileGetGroup(pGroup, Type);
1343     uint16_t nTimerIndex = (uint16_t)(S.nTotalTimers++);
1344     uint64_t nGroupMask = 1ll << nGroupIndex;
1345     MicroProfileToken nToken = MicroProfileMakeToken(nGroupMask, nTimerIndex);
1346     S.GroupInfo[nGroupIndex].nNumTimers++;
1347     S.GroupInfo[nGroupIndex].nMaxTimerNameLen = MicroProfileMax(S.GroupInfo[nGroupIndex].nMaxTimerNameLen, (uint32_t)strlen(pName));
1348     MP_ASSERT(S.GroupInfo[nGroupIndex].Type == Type); //dont mix cpu & gpu timers in the same group
1349     S.nMaxGroupSize = MicroProfileMax(S.nMaxGroupSize, S.GroupInfo[nGroupIndex].nNumTimers);
1350     S.TimerInfo[nTimerIndex].nToken = nToken;
1351     uint32_t nLen = (uint32_t)strlen(pName);
1352     if(nLen > MICROPROFILE_NAME_MAX_LEN-1)
1353         nLen = MICROPROFILE_NAME_MAX_LEN-1;
1354     memcpy(&S.TimerInfo[nTimerIndex].pName, pName, nLen);
1355     S.TimerInfo[nTimerIndex].pName[nLen] = '\0';
1356     S.TimerInfo[nTimerIndex].nNameLen = nLen;
1357     S.TimerInfo[nTimerIndex].nColor = nColor&0xffffff;
1358     S.TimerInfo[nTimerIndex].nGroupIndex = nGroupIndex;
1359     S.TimerInfo[nTimerIndex].nTimerIndex = nTimerIndex;
1360     S.TimerToGroup[nTimerIndex] = nGroupIndex;
1361     return nToken;
1362 }
1363 
MicroProfileGetMetaToken(const char * pName)1364 MicroProfileToken MicroProfileGetMetaToken(const char* pName)
1365 {
1366     MicroProfileInit();
1367     MicroProfileScopeLock L(MicroProfileMutex());
1368     for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i)
1369     {
1370         if(!S.MetaCounters[i].pName)
1371         {
1372             S.MetaCounters[i].pName = pName;
1373             return i;
1374         }
1375         else if(!MP_STRCASECMP(pName, S.MetaCounters[i].pName))
1376         {
1377             return i;
1378         }
1379     }
1380     MP_ASSERT(0);//out of slots, increase MICROPROFILE_META_MAX
1381     return (MicroProfileToken)-1;
1382 }
1383 
1384 
MicroProfileLogPut(MicroProfileToken nToken_,uint64_t nTick,uint64_t nBegin,MicroProfileThreadLog * pLog)1385 inline void MicroProfileLogPut(MicroProfileToken nToken_, uint64_t nTick, uint64_t nBegin, MicroProfileThreadLog* pLog)
1386 {
1387     MP_ASSERT(pLog != 0); //this assert is hit if MicroProfileOnCreateThread is not called
1388     MP_ASSERT(pLog->nActive);
1389     uint32_t nPos = pLog->nPut.load(std::memory_order_relaxed);
1390     uint32_t nNextPos = (nPos+1) % MICROPROFILE_BUFFER_SIZE;
1391     if(nNextPos == pLog->nGet.load(std::memory_order_relaxed))
1392     {
1393         S.nOverflow = 100;
1394     }
1395     else
1396     {
1397         pLog->Log[nPos] = MicroProfileMakeLogIndex(nBegin, nToken_, nTick);
1398         pLog->nPut.store(nNextPos, std::memory_order_release);
1399     }
1400 }
1401 
MicroProfileEnter(MicroProfileToken nToken_)1402 uint64_t MicroProfileEnter(MicroProfileToken nToken_)
1403 {
1404     if(MicroProfileGetGroupMask(nToken_) & S.nActiveGroup)
1405     {
1406         if(!MicroProfileGetThreadLog())
1407         {
1408             MicroProfileInitThreadLog();
1409         }
1410         uint64_t nTick = MP_TICK();
1411         MicroProfileLogPut(nToken_, nTick, MP_LOG_ENTER, MicroProfileGetThreadLog());
1412         return nTick;
1413     }
1414     return MICROPROFILE_INVALID_TICK;
1415 }
1416 
MicroProfileMetaUpdate(MicroProfileToken nToken,int nCount,MicroProfileTokenType eTokenType)1417 void MicroProfileMetaUpdate(MicroProfileToken nToken, int nCount, MicroProfileTokenType eTokenType)
1418 {
1419     if((MP_DRAW_META_FIRST<<nToken) & S.nActiveBars)
1420     {
1421         MicroProfileThreadLog* pLog = MicroProfileTokenTypeCpu == eTokenType ? MicroProfileGetThreadLog() : g_MicroProfileGpuLog;
1422         if(pLog)
1423         {
1424             MP_ASSERT(nToken < MICROPROFILE_META_MAX);
1425             MicroProfileLogPut(nToken, nCount, MP_LOG_META, pLog);
1426         }
1427     }
1428 }
1429 
1430 
MicroProfileLeave(MicroProfileToken nToken_,uint64_t nTickStart)1431 void MicroProfileLeave(MicroProfileToken nToken_, uint64_t nTickStart)
1432 {
1433     if(MICROPROFILE_INVALID_TICK != nTickStart)
1434     {
1435         if(!MicroProfileGetThreadLog())
1436         {
1437             MicroProfileInitThreadLog();
1438         }
1439         uint64_t nTick = MP_TICK();
1440         MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
1441         MicroProfileLogPut(nToken_, nTick, MP_LOG_LEAVE, pLog);
1442     }
1443 }
1444 
1445 
MicroProfileGpuEnter(MicroProfileToken nToken_)1446 uint64_t MicroProfileGpuEnter(MicroProfileToken nToken_)
1447 {
1448     if(MicroProfileGetGroupMask(nToken_) & S.nActiveGroup)
1449     {
1450         uint64_t nTimer = MicroProfileGpuInsertTimeStamp();
1451         MicroProfileLogPut(nToken_, nTimer, MP_LOG_ENTER, g_MicroProfileGpuLog);
1452         MicroProfileLogPut(nToken_, MP_TICK(), MP_LOG_GPU_EXTRA, g_MicroProfileGpuLog);
1453         return 1;
1454     }
1455     return 0;
1456 }
1457 
MicroProfileGpuLeave(MicroProfileToken nToken_,uint64_t nTickStart)1458 void MicroProfileGpuLeave(MicroProfileToken nToken_, uint64_t nTickStart)
1459 {
1460     if(nTickStart)
1461     {
1462         uint64_t nTimer = MicroProfileGpuInsertTimeStamp();
1463         MicroProfileLogPut(nToken_, nTimer, MP_LOG_LEAVE, g_MicroProfileGpuLog);
1464         MicroProfileLogPut(nToken_, MP_TICK(), MP_LOG_GPU_EXTRA, g_MicroProfileGpuLog);
1465     }
1466 }
1467 
MicroProfileContextSwitchPut(MicroProfileContextSwitch * pContextSwitch)1468 void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch)
1469 {
1470     if(S.nRunning || pContextSwitch->nTicks <= S.nPauseTicks)
1471     {
1472         uint32_t nPut = S.nContextSwitchPut;
1473         S.ContextSwitch[nPut] = *pContextSwitch;
1474         S.nContextSwitchPut = (S.nContextSwitchPut+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
1475     }
1476 }
1477 
1478 
MicroProfileGetRange(uint32_t nPut,uint32_t nGet,uint32_t nRange[2][2])1479 void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2])
1480 {
1481     if(nPut > nGet)
1482     {
1483         nRange[0][0] = nGet;
1484         nRange[0][1] = nPut;
1485         nRange[1][0] = nRange[1][1] = 0;
1486     }
1487     else if(nPut != nGet)
1488     {
1489         MP_ASSERT(nGet != MICROPROFILE_BUFFER_SIZE);
1490         uint32_t nCountEnd = MICROPROFILE_BUFFER_SIZE - nGet;
1491         nRange[0][0] = nGet;
1492         nRange[0][1] = nGet + nCountEnd;
1493         nRange[1][0] = 0;
1494         nRange[1][1] = nPut;
1495     }
1496 }
1497 
MicroProfileFlip()1498 void MicroProfileFlip()
1499 {
1500     #if 0
1501     //verify LogEntry wraps correctly
1502     MicroProfileLogEntry c = MP_LOG_TICK_MASK-5000;
1503     for(int i = 0; i < 10000; ++i, c += 1)
1504     {
1505         MicroProfileLogEntry l2 = (c+2500) & MP_LOG_TICK_MASK;
1506         MP_ASSERT(2500 == MicroProfileLogTickDifference(c, l2));
1507     }
1508     #endif
1509     MICROPROFILE_SCOPE(g_MicroProfileFlip);
1510     std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
1511 
1512 
1513     MicroProfileGpuFlip();
1514 
1515     if(S.nToggleRunning)
1516     {
1517         S.nRunning = !S.nRunning;
1518         if(!S.nRunning)
1519             S.nPauseTicks = MP_TICK();
1520         S.nToggleRunning = 0;
1521         for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1522         {
1523             MicroProfileThreadLog* pLog = S.Pool[i];
1524             if(pLog)
1525             {
1526                 pLog->nStackPos = 0;
1527             }
1528         }
1529     }
1530     uint32_t nAggregateClear = S.nAggregateClear || S.nAutoClearFrames, nAggregateFlip = 0;
1531     if(S.nDumpFileNextFrame)
1532     {
1533         MicroProfileDumpToFile();
1534         S.nDumpFileNextFrame = 0;
1535         S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage
1536     }
1537     if(S.nWebServerDataSent == (uint64_t)-1)
1538     {
1539         MicroProfileWebServerStart();
1540         S.nWebServerDataSent = 0;
1541     }
1542 
1543     if(MicroProfileWebServerUpdate())
1544     {
1545         S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage
1546     }
1547 
1548     if(S.nAutoClearFrames)
1549     {
1550         nAggregateClear = 1;
1551         nAggregateFlip = 1;
1552         S.nAutoClearFrames -= 1;
1553     }
1554 
1555 
1556     if(S.nRunning || S.nForceEnable)
1557     {
1558         S.nFramePutIndex++;
1559         S.nFramePut = (S.nFramePut+1) % MICROPROFILE_MAX_FRAME_HISTORY;
1560         MP_ASSERT((S.nFramePutIndex % MICROPROFILE_MAX_FRAME_HISTORY) == S.nFramePut);
1561         S.nFrameCurrent = (S.nFramePut + MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 1) % MICROPROFILE_MAX_FRAME_HISTORY;
1562         S.nFrameCurrentIndex++;
1563         uint32_t nFrameNext = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY;
1564 
1565         uint32_t nContextSwitchPut = S.nContextSwitchPut;
1566         if(S.nContextSwitchLastPut < nContextSwitchPut)
1567         {
1568             S.nContextSwitchUsage = (nContextSwitchPut - S.nContextSwitchLastPut);
1569         }
1570         else
1571         {
1572             S.nContextSwitchUsage = MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - S.nContextSwitchLastPut + nContextSwitchPut;
1573         }
1574         S.nContextSwitchLastPut = nContextSwitchPut;
1575 
1576         MicroProfileFrameState* pFramePut = &S.Frames[S.nFramePut];
1577         MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
1578         MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext];
1579 
1580         pFramePut->nFrameStartCpu = MP_TICK();
1581         pFramePut->nFrameStartGpu = (uint32_t)MicroProfileGpuInsertTimeStamp();
1582         if(pFrameNext->nFrameStartGpu != (uint64_t)-1)
1583             pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu);
1584 
1585         if(pFrameCurrent->nFrameStartGpu == (uint64_t)-1)
1586             pFrameCurrent->nFrameStartGpu = pFrameNext->nFrameStartGpu + 1;
1587 
1588         uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu;
1589         uint64_t nFrameEndCpu = pFrameNext->nFrameStartCpu;
1590 
1591         {
1592             uint64_t nTick = nFrameEndCpu - nFrameStartCpu;
1593             S.nFlipTicks = nTick;
1594             S.nFlipAggregate += nTick;
1595             S.nFlipMax = MicroProfileMax(S.nFlipMax, nTick);
1596         }
1597 
1598         uint8_t* pTimerToGroup = &S.TimerToGroup[0];
1599         for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1600         {
1601             MicroProfileThreadLog* pLog = S.Pool[i];
1602             if(!pLog)
1603             {
1604                 pFramePut->nLogStart[i] = 0;
1605             }
1606             else
1607             {
1608                 uint32_t nPut = pLog->nPut.load(std::memory_order_acquire);
1609                 pFramePut->nLogStart[i] = nPut;
1610                 MP_ASSERT(nPut< MICROPROFILE_BUFFER_SIZE);
1611                 //need to keep last frame around to close timers. timers more than 1 frame old is ditched.
1612                 pLog->nGet.store(nPut, std::memory_order_relaxed);
1613             }
1614         }
1615 
1616         if(S.nRunning)
1617         {
1618             uint64_t* pFrameGroup = &S.FrameGroup[0];
1619             {
1620                 MICROPROFILE_SCOPE(g_MicroProfileClear);
1621                 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1622                 {
1623                     S.Frame[i].nTicks = 0;
1624                     S.Frame[i].nCount = 0;
1625                     S.FrameExclusive[i] = 0;
1626                 }
1627                 for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
1628                 {
1629                     pFrameGroup[i] = 0;
1630                 }
1631                 for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j)
1632                 {
1633                     if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j)))
1634                     {
1635                         auto& Meta = S.MetaCounters[j];
1636                         for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1637                         {
1638                             Meta.nCounters[i] = 0;
1639                         }
1640                     }
1641                 }
1642 
1643             }
1644             {
1645                 MICROPROFILE_SCOPE(g_MicroProfileThreadLoop);
1646                 for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1647                 {
1648                     MicroProfileThreadLog* pLog = S.Pool[i];
1649                     if(!pLog)
1650                         continue;
1651 
1652                     uint8_t* pGroupStackPos = &pLog->nGroupStackPos[0];
1653                     int64_t nGroupTicks[MICROPROFILE_MAX_GROUPS] = {0};
1654 
1655 
1656                     uint32_t nPut = pFrameNext->nLogStart[i];
1657                     uint32_t nGet = pFrameCurrent->nLogStart[i];
1658                     uint32_t nRange[2][2] = { {0, 0}, {0, 0}, };
1659                     MicroProfileGetRange(nPut, nGet, nRange);
1660 
1661 
1662                     //fetch gpu results.
1663                     if(pLog->nGpu)
1664                     {
1665                         for(uint32_t j = 0; j < 2; ++j)
1666                         {
1667                             uint32_t nStart = nRange[j][0];
1668                             uint32_t nEnd = nRange[j][1];
1669                             for(uint32_t k = nStart; k < nEnd; ++k)
1670                             {
1671                                 MicroProfileLogEntry L = pLog->Log[k];
1672                                 if(MicroProfileLogType(L) < MP_LOG_META)
1673                                 {
1674                                     pLog->Log[k] = MicroProfileLogSetTick(L, MicroProfileGpuGetTimeStamp((uint32_t)MicroProfileLogGetTick(L)));
1675                                 }
1676                             }
1677                         }
1678                     }
1679 
1680 
1681                     uint32_t* pStack = &pLog->nStack[0];
1682                     int64_t* pChildTickStack = &pLog->nChildTickStack[0];
1683                     uint32_t nStackPos = pLog->nStackPos;
1684 
1685                     for(uint32_t j = 0; j < 2; ++j)
1686                     {
1687                         uint32_t nStart = nRange[j][0];
1688                         uint32_t nEnd = nRange[j][1];
1689                         for(uint32_t k = nStart; k < nEnd; ++k)
1690                         {
1691                             MicroProfileLogEntry LE = pLog->Log[k];
1692                             int nType = MicroProfileLogType(LE);
1693 
1694                             if(MP_LOG_ENTER == nType)
1695                             {
1696                                 int nTimer = MicroProfileLogTimerIndex(LE);
1697                                 uint8_t nGroup = pTimerToGroup[nTimer];
1698                                 MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
1699                                 MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
1700                                 pGroupStackPos[nGroup]++;
1701                                 pStack[nStackPos++] = k;
1702                                 pChildTickStack[nStackPos] = 0;
1703 
1704                             }
1705                             else if(MP_LOG_META == nType)
1706                             {
1707                                 if(nStackPos)
1708                                 {
1709                                     int64_t nMetaIndex = MicroProfileLogTimerIndex(LE);
1710                                     int64_t nMetaCount = MicroProfileLogGetTick(LE);
1711                                     MP_ASSERT(nMetaIndex < MICROPROFILE_META_MAX);
1712                                     int64_t nCounter = MicroProfileLogTimerIndex(pLog->Log[pStack[nStackPos-1]]);
1713                                     S.MetaCounters[nMetaIndex].nCounters[nCounter] += nMetaCount;
1714                                 }
1715                             }
1716                             else if(MP_LOG_LEAVE == nType)
1717                             {
1718                                 int nTimer = MicroProfileLogTimerIndex(LE);
1719                                 uint8_t nGroup = pTimerToGroup[nTimer];
1720                                 MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
1721                                 if(nStackPos)
1722                                 {
1723                                     int64_t nTickStart = pLog->Log[pStack[nStackPos-1]];
1724                                     int64_t nTicks = MicroProfileLogTickDifference(nTickStart, LE);
1725                                     int64_t nChildTicks = pChildTickStack[nStackPos];
1726                                     nStackPos--;
1727                                     pChildTickStack[nStackPos] += nTicks;
1728 
1729                                     uint32_t nTimerIndex = MicroProfileLogTimerIndex(LE);
1730                                     S.Frame[nTimerIndex].nTicks += nTicks;
1731                                     S.FrameExclusive[nTimerIndex] += (nTicks-nChildTicks);
1732                                     S.Frame[nTimerIndex].nCount += 1;
1733 
1734                                     MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
1735                                     uint8_t nGroupStackPos = pGroupStackPos[nGroup];
1736                                     if(nGroupStackPos)
1737                                     {
1738                                         nGroupStackPos--;
1739                                         if(0 == nGroupStackPos)
1740                                         {
1741                                             nGroupTicks[nGroup] += nTicks;
1742                                         }
1743                                         pGroupStackPos[nGroup] = nGroupStackPos;
1744                                     }
1745                                 }
1746                             }
1747                         }
1748                     }
1749                     for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
1750                     {
1751                         pLog->nGroupTicks[i] += nGroupTicks[i];
1752                         pFrameGroup[i] += nGroupTicks[i];
1753                     }
1754                     pLog->nStackPos = nStackPos;
1755                 }
1756             }
1757             {
1758                 MICROPROFILE_SCOPE(g_MicroProfileAccumulate);
1759                 for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1760                 {
1761                     S.AccumTimers[i].nTicks += S.Frame[i].nTicks;
1762                     S.AccumTimers[i].nCount += S.Frame[i].nCount;
1763                     S.AccumMaxTimers[i] = MicroProfileMax(S.AccumMaxTimers[i], S.Frame[i].nTicks);
1764                     S.AccumTimersExclusive[i] += S.FrameExclusive[i];
1765                     S.AccumMaxTimersExclusive[i] = MicroProfileMax(S.AccumMaxTimersExclusive[i], S.FrameExclusive[i]);
1766                 }
1767 
1768                 for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
1769                 {
1770                     S.AccumGroup[i] += pFrameGroup[i];
1771                     S.AccumGroupMax[i] = MicroProfileMax(S.AccumGroupMax[i], pFrameGroup[i]);
1772                 }
1773 
1774                 for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j)
1775                 {
1776                     if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j)))
1777                     {
1778                         auto& Meta = S.MetaCounters[j];
1779                         uint64_t nSum = 0;;
1780                         for(uint32_t i = 0; i < S.nTotalTimers; ++i)
1781                         {
1782                             uint64_t nCounter = Meta.nCounters[i];
1783                             Meta.nAccumMax[i] = MicroProfileMax(Meta.nAccumMax[i], nCounter);
1784                             Meta.nAccum[i] += nCounter;
1785                             nSum += nCounter;
1786                         }
1787                         Meta.nSumAccum += nSum;
1788                         Meta.nSumAccumMax = MicroProfileMax(Meta.nSumAccumMax, nSum);
1789                     }
1790                 }
1791             }
1792             for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
1793             {
1794                 if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN)
1795                 {
1796                     MicroProfileToken nToken = S.Graph[i].nToken;
1797                     S.Graph[i].nHistory[S.nGraphPut] = S.Frame[MicroProfileGetTimerIndex(nToken)].nTicks;
1798                 }
1799             }
1800             S.nGraphPut = (S.nGraphPut+1) % MICROPROFILE_GRAPH_HISTORY;
1801 
1802         }
1803 
1804 
1805         if(S.nRunning && S.nAggregateFlip <= ++S.nAggregateFlipCount)
1806         {
1807             nAggregateFlip = 1;
1808             if(S.nAggregateFlip) // if 0 accumulate indefinitely
1809             {
1810                 nAggregateClear = 1;
1811             }
1812         }
1813     }
1814     if(nAggregateFlip)
1815     {
1816         memcpy(&S.Aggregate[0], &S.AccumTimers[0], sizeof(S.Aggregate[0]) * S.nTotalTimers);
1817         memcpy(&S.AggregateMax[0], &S.AccumMaxTimers[0], sizeof(S.AggregateMax[0]) * S.nTotalTimers);
1818         memcpy(&S.AggregateExclusive[0], &S.AccumTimersExclusive[0], sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
1819         memcpy(&S.AggregateMaxExclusive[0], &S.AccumMaxTimersExclusive[0], sizeof(S.AggregateMaxExclusive[0]) * S.nTotalTimers);
1820 
1821         memcpy(&S.AggregateGroup[0], &S.AccumGroup[0], sizeof(S.AggregateGroup));
1822         memcpy(&S.AggregateGroupMax[0], &S.AccumGroupMax[0], sizeof(S.AggregateGroup));
1823 
1824         for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
1825         {
1826             MicroProfileThreadLog* pLog = S.Pool[i];
1827             if(!pLog)
1828                 continue;
1829 
1830             memcpy(&pLog->nAggregateGroupTicks[0], &pLog->nGroupTicks[0], sizeof(pLog->nAggregateGroupTicks));
1831 
1832             if(nAggregateClear)
1833             {
1834                 memset(&pLog->nGroupTicks[0], 0, sizeof(pLog->nGroupTicks));
1835             }
1836         }
1837 
1838         for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j)
1839         {
1840             if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j)))
1841             {
1842                 auto& Meta = S.MetaCounters[j];
1843                 memcpy(&Meta.nAggregateMax[0], &Meta.nAccumMax[0], sizeof(Meta.nAggregateMax[0]) * S.nTotalTimers);
1844                 memcpy(&Meta.nAggregate[0], &Meta.nAccum[0], sizeof(Meta.nAggregate[0]) * S.nTotalTimers);
1845                 Meta.nSumAggregate = Meta.nSumAccum;
1846                 Meta.nSumAggregateMax = Meta.nSumAccumMax;
1847                 if(nAggregateClear)
1848                 {
1849                     memset(&Meta.nAccumMax[0], 0, sizeof(Meta.nAccumMax[0]) * S.nTotalTimers);
1850                     memset(&Meta.nAccum[0], 0, sizeof(Meta.nAccum[0]) * S.nTotalTimers);
1851                     Meta.nSumAccum = 0;
1852                     Meta.nSumAccumMax = 0;
1853                 }
1854             }
1855         }
1856 
1857 
1858 
1859 
1860 
1861         S.nAggregateFrames = S.nAggregateFlipCount;
1862         S.nFlipAggregateDisplay = S.nFlipAggregate;
1863         S.nFlipMaxDisplay = S.nFlipMax;
1864         if(nAggregateClear)
1865         {
1866             memset(&S.AccumTimers[0], 0, sizeof(S.Aggregate[0]) * S.nTotalTimers);
1867             memset(&S.AccumMaxTimers[0], 0, sizeof(S.AccumMaxTimers[0]) * S.nTotalTimers);
1868             memset(&S.AccumTimersExclusive[0], 0, sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
1869             memset(&S.AccumMaxTimersExclusive[0], 0, sizeof(S.AccumMaxTimersExclusive[0]) * S.nTotalTimers);
1870             memset(&S.AccumGroup[0], 0, sizeof(S.AggregateGroup));
1871             memset(&S.AccumGroupMax[0], 0, sizeof(S.AggregateGroup));
1872 
1873             S.nAggregateFlipCount = 0;
1874             S.nFlipAggregate = 0;
1875             S.nFlipMax = 0;
1876 
1877             S.nAggregateFlipTick = MP_TICK();
1878         }
1879     }
1880     S.nAggregateClear = 0;
1881 
1882     uint64_t nNewActiveGroup = 0;
1883     if(S.nForceEnable || (S.nDisplay && S.nRunning))
1884         nNewActiveGroup = S.nAllGroupsWanted ? S.nGroupMask : S.nActiveGroupWanted;
1885     nNewActiveGroup |= S.nForceGroup;
1886     nNewActiveGroup |= S.nForceGroupUI;
1887     if(S.nActiveGroup != nNewActiveGroup)
1888         S.nActiveGroup = nNewActiveGroup;
1889     uint32_t nNewActiveBars = 0;
1890     if(S.nDisplay && S.nRunning)
1891         nNewActiveBars = S.nBars;
1892     if(S.nForceMetaCounters)
1893     {
1894         for(int i = 0; i < MICROPROFILE_META_MAX; ++i)
1895         {
1896             if(S.MetaCounters[i].pName)
1897             {
1898                 nNewActiveBars |= (MP_DRAW_META_FIRST<<i);
1899             }
1900         }
1901     }
1902     if(nNewActiveBars != S.nActiveBars)
1903         S.nActiveBars = nNewActiveBars;
1904 }
1905 
MicroProfileSetForceEnable(bool bEnable)1906 void MicroProfileSetForceEnable(bool bEnable)
1907 {
1908     S.nForceEnable = bEnable ? 1 : 0;
1909 }
MicroProfileGetForceEnable()1910 bool MicroProfileGetForceEnable()
1911 {
1912     return S.nForceEnable != 0;
1913 }
1914 
MicroProfileSetEnableAllGroups(bool bEnableAllGroups)1915 void MicroProfileSetEnableAllGroups(bool bEnableAllGroups)
1916 {
1917     S.nAllGroupsWanted = bEnableAllGroups ? 1 : 0;
1918 }
1919 
MicroProfileEnableCategory(const char * pCategory,bool bEnabled)1920 void MicroProfileEnableCategory(const char* pCategory, bool bEnabled)
1921 {
1922     int nCategoryIndex = -1;
1923     for(uint32_t i = 0; i < S.nCategoryCount; ++i)
1924     {
1925         if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
1926         {
1927             nCategoryIndex = (int)i;
1928             break;
1929         }
1930     }
1931     if(nCategoryIndex >= 0)
1932     {
1933         if(bEnabled)
1934         {
1935             S.nActiveGroupWanted |= S.CategoryInfo[nCategoryIndex].nGroupMask;
1936         }
1937         else
1938         {
1939             S.nActiveGroupWanted &= ~S.CategoryInfo[nCategoryIndex].nGroupMask;
1940         }
1941     }
1942 }
1943 
1944 
MicroProfileEnableCategory(const char * pCategory)1945 void MicroProfileEnableCategory(const char* pCategory)
1946 {
1947     MicroProfileEnableCategory(pCategory, true);
1948 }
MicroProfileDisableCategory(const char * pCategory)1949 void MicroProfileDisableCategory(const char* pCategory)
1950 {
1951     MicroProfileEnableCategory(pCategory, false);
1952 }
1953 
MicroProfileGetEnableAllGroups()1954 bool MicroProfileGetEnableAllGroups()
1955 {
1956     return 0 != S.nAllGroupsWanted;
1957 }
1958 
MicroProfileSetForceMetaCounters(bool bForce)1959 void MicroProfileSetForceMetaCounters(bool bForce)
1960 {
1961     S.nForceMetaCounters = bForce ? 1 : 0;
1962 }
1963 
MicroProfileGetForceMetaCounters()1964 bool MicroProfileGetForceMetaCounters()
1965 {
1966     return 0 != S.nForceMetaCounters;
1967 }
1968 
MicroProfileEnableMetaCounter(const char * pMeta)1969 void MicroProfileEnableMetaCounter(const char* pMeta)
1970 {
1971     for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i)
1972     {
1973         if(S.MetaCounters[i].pName && 0 == MP_STRCASECMP(S.MetaCounters[i].pName, pMeta))
1974         {
1975             S.nBars |= (MP_DRAW_META_FIRST<<i);
1976             return;
1977         }
1978     }
1979 }
MicroProfileDisableMetaCounter(const char * pMeta)1980 void MicroProfileDisableMetaCounter(const char* pMeta)
1981 {
1982     for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i)
1983     {
1984         if(S.MetaCounters[i].pName && 0 == MP_STRCASECMP(S.MetaCounters[i].pName, pMeta))
1985         {
1986             S.nBars &= ~(MP_DRAW_META_FIRST<<i);
1987             return;
1988         }
1989     }
1990 }
1991 
1992 
MicroProfileSetAggregateFrames(int nFrames)1993 void MicroProfileSetAggregateFrames(int nFrames)
1994 {
1995     S.nAggregateFlip = (uint32_t)nFrames;
1996     if(0 == nFrames)
1997     {
1998         S.nAggregateClear = 1;
1999     }
2000 }
2001 
MicroProfileGetAggregateFrames()2002 int MicroProfileGetAggregateFrames()
2003 {
2004     return S.nAggregateFlip;
2005 }
2006 
MicroProfileGetCurrentAggregateFrames()2007 int MicroProfileGetCurrentAggregateFrames()
2008 {
2009     return int(S.nAggregateFlip ? S.nAggregateFlip : S.nAggregateFlipCount);
2010 }
2011 
2012 
MicroProfileForceEnableGroup(const char * pGroup,MicroProfileTokenType Type)2013 void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type)
2014 {
2015     MicroProfileInit();
2016     std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2017     uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
2018     S.nForceGroup |= (1ll << nGroup);
2019 }
2020 
MicroProfileForceDisableGroup(const char * pGroup,MicroProfileTokenType Type)2021 void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type)
2022 {
2023     MicroProfileInit();
2024     std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2025     uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
2026     S.nForceGroup &= ~(1ll << nGroup);
2027 }
2028 
2029 
MicroProfileCalcAllTimers(float * pTimers,float * pAverage,float * pMax,float * pCallAverage,float * pExclusive,float * pAverageExclusive,float * pMaxExclusive,float * pTotal,uint32_t nSize)2030 void MicroProfileCalcAllTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize)
2031 {
2032     for(uint32_t i = 0; i < S.nTotalTimers && i < nSize; ++i)
2033     {
2034         const uint32_t nGroupId = S.TimerInfo[i].nGroupIndex;
2035         const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
2036         uint32_t nTimer = i;
2037         uint32_t nIdx = i * 2;
2038         uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
2039         uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1;
2040         float fToPrc = S.fRcpReferenceTime;
2041         float fMs = fToMs * (S.Frame[nTimer].nTicks);
2042         float fPrc = MicroProfileMin(fMs * fToPrc, 1.f);
2043         float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames);
2044         float fAveragePrc = MicroProfileMin(fAverageMs * fToPrc, 1.f);
2045         float fMaxMs = fToMs * (S.AggregateMax[nTimer]);
2046         float fMaxPrc = MicroProfileMin(fMaxMs * fToPrc, 1.f);
2047         float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount);
2048         float fCallAveragePrc = MicroProfileMin(fCallAverageMs * fToPrc, 1.f);
2049         float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]);
2050         float fPrcExclusive = MicroProfileMin(fMsExclusive * fToPrc, 1.f);
2051         float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames);
2052         float fAveragePrcExclusive = MicroProfileMin(fAverageMsExclusive * fToPrc, 1.f);
2053         float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]);
2054         float fMaxPrcExclusive = MicroProfileMin(fMaxMsExclusive * fToPrc, 1.f);
2055         float fTotalMs = fToMs * S.Aggregate[nTimer].nTicks;
2056         pTimers[nIdx] = fMs;
2057         pTimers[nIdx+1] = fPrc;
2058         pAverage[nIdx] = fAverageMs;
2059         pAverage[nIdx+1] = fAveragePrc;
2060         pMax[nIdx] = fMaxMs;
2061         pMax[nIdx+1] = fMaxPrc;
2062         pCallAverage[nIdx] = fCallAverageMs;
2063         pCallAverage[nIdx+1] = fCallAveragePrc;
2064         pExclusive[nIdx] = fMsExclusive;
2065         pExclusive[nIdx+1] = fPrcExclusive;
2066         pAverageExclusive[nIdx] = fAverageMsExclusive;
2067         pAverageExclusive[nIdx+1] = fAveragePrcExclusive;
2068         pMaxExclusive[nIdx] = fMaxMsExclusive;
2069         pMaxExclusive[nIdx+1] = fMaxPrcExclusive;
2070         pTotal[nIdx] = fTotalMs;
2071         pTotal[nIdx+1] = 0.f;
2072     }
2073 }
2074 
MicroProfileTogglePause()2075 void MicroProfileTogglePause()
2076 {
2077     S.nToggleRunning = 1;
2078 }
2079 
MicroProfileGetTime(const char * pGroup,const char * pName)2080 float MicroProfileGetTime(const char* pGroup, const char* pName)
2081 {
2082     MicroProfileToken nToken = MicroProfileFindToken(pGroup, pName);
2083     if(nToken == MICROPROFILE_INVALID_TOKEN)
2084     {
2085         return 0.f;
2086     }
2087     uint32_t nTimerIndex = MicroProfileGetTimerIndex(nToken);
2088     uint32_t nGroupIndex = MicroProfileGetGroupIndex(nToken);
2089     float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
2090     return S.Frame[nTimerIndex].nTicks * fToMs;
2091 }
2092 
2093 
MicroProfileContextSwitchSearch(uint32_t * pContextSwitchStart,uint32_t * pContextSwitchEnd,uint64_t nBaseTicksCpu,uint64_t nBaseTicksEndCpu)2094 void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu)
2095 {
2096     MICROPROFILE_SCOPE(g_MicroProfileContextSwitchSearch);
2097     uint32_t nContextSwitchPut = S.nContextSwitchPut;
2098     uint64_t nContextSwitchStart, nContextSwitchEnd;
2099     nContextSwitchStart = nContextSwitchEnd = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
2100     int64_t nSearchEnd = nBaseTicksEndCpu + MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
2101     int64_t nSearchBegin = nBaseTicksCpu - MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
2102     for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; ++i)
2103     {
2104         uint32_t nIndex = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - (i+1)) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
2105         MicroProfileContextSwitch& CS = S.ContextSwitch[nIndex];
2106         if(CS.nTicks > nSearchEnd)
2107         {
2108             nContextSwitchEnd = nIndex;
2109         }
2110         if(CS.nTicks > nSearchBegin)
2111         {
2112             nContextSwitchStart = nIndex;
2113         }
2114     }
2115     *pContextSwitchStart = nContextSwitchStart;
2116     *pContextSwitchEnd = nContextSwitchEnd;
2117 }
2118 
2119 
2120 
2121 #if MICROPROFILE_WEBSERVER
2122 
2123 #define MICROPROFILE_EMBED_HTML
2124 
2125 extern const char* g_MicroProfileHtml_begin[];
2126 extern size_t g_MicroProfileHtml_begin_sizes[];
2127 extern size_t g_MicroProfileHtml_begin_count;
2128 extern const char* g_MicroProfileHtml_end[];
2129 extern size_t g_MicroProfileHtml_end_sizes[];
2130 extern size_t g_MicroProfileHtml_end_count;
2131 
2132 typedef void MicroProfileWriteCallback(void* Handle, size_t size, const char* pData);
2133 
MicroProfileWebServerPort()2134 uint32_t MicroProfileWebServerPort()
2135 {
2136     return S.nWebServerPort;
2137 }
2138 
MicroProfileDumpFile(const char * pHtml,const char * pCsv)2139 void MicroProfileDumpFile(const char* pHtml, const char* pCsv)
2140 {
2141     S.nDumpFileNextFrame = 0;
2142     if(pHtml)
2143     {
2144         uint32_t nLen = strlen(pHtml);
2145         if(nLen > sizeof(S.HtmlDumpPath)-1)
2146         {
2147             return;
2148         }
2149         memcpy(S.HtmlDumpPath, pHtml, nLen+1);
2150         S.nDumpFileNextFrame |= 1;
2151     }
2152     if(pCsv)
2153     {
2154         uint32_t nLen = strlen(pCsv);
2155         if(nLen > sizeof(S.CsvDumpPath)-1)
2156         {
2157             return;
2158         }
2159         memcpy(S.CsvDumpPath, pCsv, nLen+1);
2160         S.nDumpFileNextFrame |= 2;
2161     }
2162 }
2163 
MicroProfilePrintf(MicroProfileWriteCallback CB,void * Handle,const char * pFmt,...)2164 void MicroProfilePrintf(MicroProfileWriteCallback CB, void* Handle, const char* pFmt, ...)
2165 {
2166     char buffer[32*1024];
2167     va_list args;
2168     va_start (args, pFmt);
2169 #ifdef _WIN32
2170     size_t size = vsprintf_s(buffer, pFmt, args);
2171 #else
2172     size_t size = vsnprintf(buffer, sizeof(buffer)-1,  pFmt, args);
2173 #endif
2174     CB(Handle, size, &buffer[0]);
2175     va_end (args);
2176 }
2177 
2178 #define printf(...) MicroProfilePrintf(CB, Handle, __VA_ARGS__)
MicroProfileDumpCsv(MicroProfileWriteCallback CB,void * Handle,int nMaxFrames)2179 void MicroProfileDumpCsv(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames)
2180 {
2181     uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
2182     float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
2183     float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
2184 
2185     printf("frames,%d\n", nAggregateFrames);
2186     printf("group,name,average,max,callaverage\n");
2187 
2188     uint32_t nNumTimers = S.nTotalTimers;
2189     uint32_t nBlockSize = 2 * nNumTimers;
2190     float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float));
2191     float* pAverage = pTimers + nBlockSize;
2192     float* pMax = pTimers + 2 * nBlockSize;
2193     float* pCallAverage = pTimers + 3 * nBlockSize;
2194     float* pTimersExclusive = pTimers + 4 * nBlockSize;
2195     float* pAverageExclusive = pTimers + 5 * nBlockSize;
2196     float* pMaxExclusive = pTimers + 6 * nBlockSize;
2197     float* pTotal = pTimers + 7 * nBlockSize;
2198 
2199     MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
2200 
2201     for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2202     {
2203         uint32_t nIdx = i * 2;
2204         printf("\"%s\",\"%s\",%f,%f,%f\n", S.TimerInfo[i].pName, S.GroupInfo[S.TimerInfo[i].nGroupIndex].pName, pAverage[nIdx], pMax[nIdx], pCallAverage[nIdx]);
2205     }
2206 
2207     printf("\n\n");
2208 
2209     printf("group,average,max,total\n");
2210     for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
2211     {
2212         const char* pGroupName = S.GroupInfo[j].pName;
2213         float fToMs =  S.GroupInfo[j].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
2214         if(pGroupName[0] != '\0')
2215         {
2216             printf("\"%s\",%.3f,%.3f,%.3f\n", pGroupName, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j]);
2217         }
2218     }
2219 
2220     printf("\n\n");
2221     printf("group,thread,average,total\n");
2222     for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
2223     {
2224         for(uint32_t i = 0; i < S.nNumLogs; ++i)
2225         {
2226             if(S.Pool[i])
2227             {
2228                 const char* pThreadName = &S.Pool[i]->ThreadName[0];
2229                 // MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i);
2230                 float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
2231                 {
2232                     uint64_t nTicks = S.Pool[i]->nAggregateGroupTicks[j];
2233                     float fTime = nTicks / nAggregateFrames * fToMs;
2234                     float fTimeTotal = nTicks * fToMs;
2235                     if(fTimeTotal > 0.01f)
2236                     {
2237                         const char* pGroupName = S.GroupInfo[j].pName;
2238                         printf("\"%s\",\"%s\",%.3f,%.3f\n", pGroupName, pThreadName, fTime, fTimeTotal);
2239                     }
2240                 }
2241             }
2242         }
2243     }
2244 
2245     printf("\n\n");
2246     printf("frametimecpu\n");
2247 
2248     const uint32_t nCount = MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3;
2249     const uint32_t nStart = S.nFrameCurrent;
2250     for(uint32_t i = nCount; i > 0; i--)
2251     {
2252         uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
2253         uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
2254         uint64_t nTicks = S.Frames[nFrameNext].nFrameStartCpu - S.Frames[nFrame].nFrameStartCpu;
2255         printf("%f,", nTicks * fToMsCPU);
2256     }
2257     printf("\n");
2258 
2259     printf("\n\n");
2260     printf("frametimegpu\n");
2261 
2262     for(uint32_t i = nCount; i > 0; i--)
2263     {
2264         uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
2265         uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
2266         uint64_t nTicks = S.Frames[nFrameNext].nFrameStartGpu - S.Frames[nFrame].nFrameStartGpu;
2267         printf("%f,", nTicks * fToMsGPU);
2268     }
2269     printf("\n\n");
2270     printf("Meta\n");//only single frame snapshot
2271     printf("name,average,max,total\n");
2272     for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
2273     {
2274         if(S.MetaCounters[j].pName)
2275         {
2276             printf("\"%s\",%f,%lld,%lld\n",S.MetaCounters[j].pName, S.MetaCounters[j].nSumAggregate / (float)nAggregateFrames, S.MetaCounters[j].nSumAggregateMax,S.MetaCounters[j].nSumAggregate);
2277         }
2278     }
2279 }
2280 #undef printf
2281 
MicroProfileDumpHtml(MicroProfileWriteCallback CB,void * Handle,int nMaxFrames,const char * pHost)2282 void MicroProfileDumpHtml(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames, const char* pHost)
2283 {
2284     uint32_t nRunning = S.nRunning;
2285     S.nRunning = 0;
2286     //stall pushing of timers
2287     uint64_t nActiveGroup = S.nActiveGroup;
2288     S.nActiveGroup = 0;
2289     S.nPauseTicks = MP_TICK();
2290 
2291 
2292     for(size_t i = 0; i < g_MicroProfileHtml_begin_count; ++i)
2293     {
2294         CB(Handle, g_MicroProfileHtml_begin_sizes[i]-1, g_MicroProfileHtml_begin[i]);
2295     }
2296     //dump info
2297     uint64_t nTicks = MP_TICK();
2298 
2299     float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
2300     float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
2301     float fAggregateMs = fToMsCPU * (nTicks - S.nAggregateFlipTick);
2302     MicroProfilePrintf(CB, Handle, "var DumpHost = '%s';\n", pHost ? pHost : "");
2303     time_t CaptureTime;
2304     time(&CaptureTime);
2305     MicroProfilePrintf(CB, Handle, "var DumpUtcCaptureTime = %ld;\n", CaptureTime);
2306     MicroProfilePrintf(CB, Handle, "var AggregateInfo = {'Frames':%d, 'Time':%f};\n", S.nAggregateFrames, fAggregateMs);
2307 
2308     //categories
2309     MicroProfilePrintf(CB, Handle, "var CategoryInfo = Array(%d);\n",S.nCategoryCount);
2310     for(uint32_t i = 0; i < S.nCategoryCount; ++i)
2311     {
2312         MicroProfilePrintf(CB, Handle, "CategoryInfo[%d] = \"%s\";\n", i, S.CategoryInfo[i].pName);
2313     }
2314 
2315     //groups
2316     MicroProfilePrintf(CB, Handle, "var GroupInfo = Array(%d);\n\n",S.nGroupCount);
2317     uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
2318     float fRcpAggregateFrames = 1.f / nAggregateFrames;
2319     for(uint32_t i = 0; i < S.nGroupCount; ++i)
2320     {
2321         MP_ASSERT(i == S.GroupInfo[i].nGroupIndex);
2322         float fToMs = S.GroupInfo[i].Type == MicroProfileTokenTypeCpu ? fToMsCPU : fToMsGPU;
2323         MicroProfilePrintf(CB, Handle, "GroupInfo[%d] = MakeGroup(%d, \"%s\", %d, %d, %d, %f, %f, %f, '#%02x%02x%02x');\n",
2324             S.GroupInfo[i].nGroupIndex,
2325             S.GroupInfo[i].nGroupIndex,
2326             S.GroupInfo[i].pName,
2327             S.GroupInfo[i].nCategory,
2328             S.GroupInfo[i].nNumTimers,
2329             S.GroupInfo[i].Type == MicroProfileTokenTypeGpu?1:0,
2330             fToMs * S.AggregateGroup[i],
2331             fToMs * S.AggregateGroup[i] / nAggregateFrames,
2332             fToMs * S.AggregateGroupMax[i],
2333             MICROPROFILE_UNPACK_RED(S.GroupInfo[i].nColor) & 0xff,
2334             MICROPROFILE_UNPACK_GREEN(S.GroupInfo[i].nColor) & 0xff,
2335             MICROPROFILE_UNPACK_BLUE(S.GroupInfo[i].nColor) & 0xff);
2336     }
2337     //timers
2338 
2339     uint32_t nNumTimers = S.nTotalTimers;
2340     uint32_t nBlockSize = 2 * nNumTimers;
2341     float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float));
2342     float* pAverage = pTimers + nBlockSize;
2343     float* pMax = pTimers + 2 * nBlockSize;
2344     float* pCallAverage = pTimers + 3 * nBlockSize;
2345     float* pTimersExclusive = pTimers + 4 * nBlockSize;
2346     float* pAverageExclusive = pTimers + 5 * nBlockSize;
2347     float* pMaxExclusive = pTimers + 6 * nBlockSize;
2348     float* pTotal = pTimers + 7 * nBlockSize;
2349 
2350     MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
2351 
2352     MicroProfilePrintf(CB, Handle, "\nvar TimerInfo = Array(%d);\n\n", S.nTotalTimers);
2353     for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2354     {
2355         uint32_t nIdx = i * 2;
2356         MP_ASSERT(i == S.TimerInfo[i].nTimerIndex);
2357         MicroProfilePrintf(CB, Handle, "var Meta%d = [", i);
2358         bool bOnce = true;
2359         for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
2360         {
2361             if(S.MetaCounters[j].pName)
2362             {
2363                 uint32_t lala = S.MetaCounters[j].nCounters[i];
2364                 MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", lala);
2365                 bOnce = false;
2366             }
2367         }
2368         MicroProfilePrintf(CB, Handle, "];\n");
2369         MicroProfilePrintf(CB, Handle, "var MetaAvg%d = [", i);
2370         bOnce = true;
2371         for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
2372         {
2373             if(S.MetaCounters[j].pName)
2374             {
2375                 MicroProfilePrintf(CB, Handle, bOnce ? "%f" : ",%f", fRcpAggregateFrames * S.MetaCounters[j].nAggregate[i]);
2376                 bOnce = false;
2377             }
2378         }
2379         MicroProfilePrintf(CB, Handle, "];\n");
2380         MicroProfilePrintf(CB, Handle, "var MetaMax%d = [", i);
2381         bOnce = true;
2382         for(int j = 0; j < MICROPROFILE_META_MAX; ++j)
2383         {
2384             if(S.MetaCounters[j].pName)
2385             {
2386                 MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", S.MetaCounters[j].nAggregateMax[i]);
2387                 bOnce = false;
2388             }
2389         }
2390         MicroProfilePrintf(CB, Handle, "];\n");
2391 
2392 
2393         uint32_t nColor = S.TimerInfo[i].nColor;
2394         uint32_t nColorDark = (nColor >> 1) & ~0x80808080;
2395         MicroProfilePrintf(CB, Handle, "TimerInfo[%d] = MakeTimer(%d, \"%s\", %d, '#%02x%02x%02x','#%02x%02x%02x', %f, %f, %f, %f, %f, %d, %f, Meta%d, MetaAvg%d, MetaMax%d);\n", S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].pName, S.TimerInfo[i].nGroupIndex,
2396             MICROPROFILE_UNPACK_RED(nColor) & 0xff,
2397             MICROPROFILE_UNPACK_GREEN(nColor) & 0xff,
2398             MICROPROFILE_UNPACK_BLUE(nColor) & 0xff,
2399             MICROPROFILE_UNPACK_RED(nColorDark) & 0xff,
2400             MICROPROFILE_UNPACK_GREEN(nColorDark) & 0xff,
2401             MICROPROFILE_UNPACK_BLUE(nColorDark) & 0xff,
2402             pAverage[nIdx],
2403             pMax[nIdx],
2404             pAverageExclusive[nIdx],
2405             pMaxExclusive[nIdx],
2406             pCallAverage[nIdx],
2407             S.Aggregate[i].nCount,
2408             pTotal[nIdx],
2409             i,i,i);
2410 
2411     }
2412 
2413     MicroProfilePrintf(CB, Handle, "\nvar ThreadNames = [");
2414     for(uint32_t i = 0; i < S.nNumLogs; ++i)
2415     {
2416         if(S.Pool[i])
2417         {
2418             MicroProfilePrintf(CB, Handle, "'%s',", S.Pool[i]->ThreadName);
2419         }
2420         else
2421         {
2422             MicroProfilePrintf(CB, Handle, "'Thread %d',", i);
2423         }
2424     }
2425     MicroProfilePrintf(CB, Handle, "];\n\n");
2426 
2427 
2428     for(uint32_t i = 0; i < S.nNumLogs; ++i)
2429     {
2430         if(S.Pool[i])
2431         {
2432             MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i);
2433             float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
2434             for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
2435             {
2436                 MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j]/nAggregateFrames * fToMs);
2437             }
2438             MicroProfilePrintf(CB, Handle, "];\n");
2439         }
2440     }
2441     MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeArray = [");
2442     for(uint32_t i = 0; i < S.nNumLogs; ++i)
2443     {
2444         if(S.Pool[i])
2445         {
2446             MicroProfilePrintf(CB, Handle, "ThreadGroupTime%d,", i);
2447         }
2448     }
2449     MicroProfilePrintf(CB, Handle, "];\n");
2450 
2451 
2452     for(uint32_t i = 0; i < S.nNumLogs; ++i)
2453     {
2454         if(S.Pool[i])
2455         {
2456             MicroProfilePrintf(CB, Handle, "var ThreadGroupTimeTotal%d = [", i);
2457             float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
2458             for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
2459             {
2460                 MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j] * fToMs);
2461             }
2462             MicroProfilePrintf(CB, Handle, "];\n");
2463         }
2464     }
2465     MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeTotalArray = [");
2466     for(uint32_t i = 0; i < S.nNumLogs; ++i)
2467     {
2468         if(S.Pool[i])
2469         {
2470             MicroProfilePrintf(CB, Handle, "ThreadGroupTimeTotal%d,", i);
2471         }
2472     }
2473     MicroProfilePrintf(CB, Handle, "];");
2474 
2475 
2476 
2477 
2478     MicroProfilePrintf(CB, Handle, "\nvar ThreadIds = [");
2479     for(uint32_t i = 0; i < S.nNumLogs; ++i)
2480     {
2481         if(S.Pool[i])
2482         {
2483             ThreadIdType ThreadId = S.Pool[i]->nThreadId;
2484             if(!ThreadId)
2485             {
2486                 ThreadId = (ThreadIdType)-1;
2487             }
2488             MicroProfilePrintf(CB, Handle, "%d,", ThreadId);
2489         }
2490         else
2491         {
2492             MicroProfilePrintf(CB, Handle, "-1,", i);
2493         }
2494     }
2495     MicroProfilePrintf(CB, Handle, "];\n\n");
2496 
2497     MicroProfilePrintf(CB, Handle, "\nvar MetaNames = [");
2498     for(int i = 0; i < MICROPROFILE_META_MAX; ++i)
2499     {
2500         if(S.MetaCounters[i].pName)
2501         {
2502             MicroProfilePrintf(CB, Handle, "'%s',", S.MetaCounters[i].pName);
2503         }
2504     }
2505 
2506 
2507     MicroProfilePrintf(CB, Handle, "];\n\n");
2508 
2509 
2510 
2511     uint32_t nNumFrames = (MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3); //leave a few to not overwrite
2512     nNumFrames = MicroProfileMin(nNumFrames, (uint32_t)nMaxFrames);
2513 
2514 
2515     uint32_t nFirstFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
2516     uint32_t nLastFrame = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
2517     MP_ASSERT(nLastFrame == (S.nFrameCurrent % MICROPROFILE_MAX_FRAME_HISTORY));
2518     MP_ASSERT(nFirstFrame < MICROPROFILE_MAX_FRAME_HISTORY);
2519     MP_ASSERT(nLastFrame  < MICROPROFILE_MAX_FRAME_HISTORY);
2520     const int64_t nTickStart = S.Frames[nFirstFrame].nFrameStartCpu;
2521     const int64_t nTickEnd = S.Frames[nLastFrame].nFrameStartCpu;
2522     int64_t nTickStartGpu = S.Frames[nFirstFrame].nFrameStartGpu;
2523 
2524     int64_t nTickReferenceCpu, nTickReferenceGpu;
2525     int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
2526     int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
2527     int nTickReference = 0;
2528     if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu))
2529     {
2530         nTickStartGpu = (nTickStart - nTickReferenceCpu) * nTicksPerSecondGpu / nTicksPerSecondCpu + nTickReferenceGpu;
2531         nTickReference = 1;
2532     }
2533 
2534 
2535 #if MICROPROFILE_DEBUG
2536     printf("dumping %d frames\n", nNumFrames);
2537     printf("dumping frame %d to %d\n", nFirstFrame, nLastFrame);
2538 #endif
2539 
2540 
2541     uint32_t* nTimerCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers);
2542     memset(nTimerCounter, 0, sizeof(uint32_t) * S.nTotalTimers);
2543 
2544     MicroProfilePrintf(CB, Handle, "var Frames = Array(%d);\n", nNumFrames);
2545     for(uint32_t i = 0; i < nNumFrames; ++i)
2546     {
2547         uint32_t nFrameIndex = (nFirstFrame + i) % MICROPROFILE_MAX_FRAME_HISTORY;
2548         uint32_t nFrameIndexNext = (nFrameIndex + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
2549 
2550         for(uint32_t j = 0; j < S.nNumLogs; ++j)
2551         {
2552             MicroProfileThreadLog* pLog = S.Pool[j];
2553             int64_t nStartTickBase = pLog->nGpu ? nTickStartGpu : nTickStart;
2554             uint32_t nLogStart = S.Frames[nFrameIndex].nLogStart[j];
2555             uint32_t nLogEnd = S.Frames[nFrameIndexNext].nLogStart[j];
2556 
2557             float fToMsCpu = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
2558             float fToMsBase = MicroProfileTickToMsMultiplier(pLog->nGpu ? nTicksPerSecondGpu : nTicksPerSecondCpu);
2559             MicroProfilePrintf(CB, Handle, "var ts_%d_%d = [", i, j);
2560             if(nLogStart != nLogEnd)
2561             {
2562                 uint32_t k = nLogStart;
2563                 uint32_t nLogType = MicroProfileLogType(pLog->Log[k]);
2564                 float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase;
2565                 int64_t nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase;
2566                 float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs;
2567                 MicroProfilePrintf(CB, Handle, "%f", fTime);
2568                 for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE)
2569                 {
2570                     uint32_t nLogType = MicroProfileLogType(pLog->Log[k]);
2571                     float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase;
2572                     nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase;
2573                     float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs;
2574                     MicroProfilePrintf(CB, Handle, ",%f", fTime);
2575                 }
2576             }
2577             MicroProfilePrintf(CB, Handle, "];\n");
2578             MicroProfilePrintf(CB, Handle, "var tt_%d_%d = [", i, j);
2579             if(nLogStart != nLogEnd)
2580             {
2581                 uint32_t k = nLogStart;
2582                 MicroProfilePrintf(CB, Handle, "%d", MicroProfileLogType(pLog->Log[k]));
2583                 for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE)
2584                 {
2585                     uint32_t nLogType = MicroProfileLogType(pLog->Log[k]);
2586                     if(nLogType == MP_LOG_META)
2587                     {
2588                         //for meta, store the count + 3, which is the tick part
2589                         nLogType = 3 + MicroProfileLogGetTick(pLog->Log[k]);
2590                     }
2591                     MicroProfilePrintf(CB, Handle, ",%d", nLogType);
2592                 }
2593             }
2594             MicroProfilePrintf(CB, Handle, "];\n");
2595 
2596             MicroProfilePrintf(CB, Handle, "var ti_%d_%d = [", i, j);
2597             if(nLogStart != nLogEnd)
2598             {
2599                 uint32_t k = nLogStart;
2600                 MicroProfilePrintf(CB, Handle, "%d", (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k]));
2601                 for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE)
2602                 {
2603                     uint32_t nTimerIndex = (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k]);
2604                     MicroProfilePrintf(CB, Handle, ",%d", nTimerIndex);
2605                     nTimerCounter[nTimerIndex]++;
2606                 }
2607             }
2608             MicroProfilePrintf(CB, Handle, "];\n");
2609 
2610         }
2611 
2612         MicroProfilePrintf(CB, Handle, "var ts%d = [", i);
2613         for(uint32_t j = 0; j < S.nNumLogs; ++j)
2614         {
2615             MicroProfilePrintf(CB, Handle, "ts_%d_%d,", i, j);
2616         }
2617         MicroProfilePrintf(CB, Handle, "];\n");
2618         MicroProfilePrintf(CB, Handle, "var tt%d = [", i);
2619         for(uint32_t j = 0; j < S.nNumLogs; ++j)
2620         {
2621             MicroProfilePrintf(CB, Handle, "tt_%d_%d,", i, j);
2622         }
2623         MicroProfilePrintf(CB, Handle, "];\n");
2624 
2625         MicroProfilePrintf(CB, Handle, "var ti%d = [", i);
2626         for(uint32_t j = 0; j < S.nNumLogs; ++j)
2627         {
2628             MicroProfilePrintf(CB, Handle, "ti_%d_%d,", i, j);
2629         }
2630         MicroProfilePrintf(CB, Handle, "];\n");
2631 
2632 
2633         int64_t nFrameStart = S.Frames[nFrameIndex].nFrameStartCpu;
2634         int64_t nFrameEnd = S.Frames[nFrameIndexNext].nFrameStartCpu;
2635 
2636         float fToMs = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
2637         float fFrameMs = MicroProfileLogTickDifference(nTickStart, nFrameStart) * fToMs;
2638         float fFrameEndMs = MicroProfileLogTickDifference(nTickStart, nFrameEnd) * fToMs;
2639         float fFrameGpuMs = 0;
2640         float fFrameGpuEndMs = 0;
2641         if(nTickReference)
2642         {
2643             fFrameGpuMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndex].nFrameStartGpu) * fToMsGPU;
2644             fFrameGpuEndMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndexNext].nFrameStartGpu) * fToMsGPU;
2645         }
2646         MicroProfilePrintf(CB, Handle, "Frames[%d] = MakeFrame(%d, %f, %f, %f, %f, ts%d, tt%d, ti%d);\n", i, 0, fFrameMs, fFrameEndMs, fFrameGpuMs, fFrameGpuEndMs, i, i, i);
2647     }
2648 
2649     uint32_t nContextSwitchStart = 0;
2650     uint32_t nContextSwitchEnd = 0;
2651     MicroProfileContextSwitchSearch(&nContextSwitchStart, &nContextSwitchEnd, nTickStart, nTickEnd);
2652 
2653     uint32_t nWrittenBefore = S.nWebServerDataSent;
2654     MicroProfilePrintf(CB, Handle, "var CSwitchThreadInOutCpu = [");
2655     for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
2656     {
2657         MicroProfileContextSwitch CS = S.ContextSwitch[j];
2658         int nCpu = CS.nCpu;
2659         MicroProfilePrintf(CB, Handle, "%d,%d,%d,", CS.nThreadIn, CS.nThreadOut, nCpu);
2660     }
2661     MicroProfilePrintf(CB, Handle, "];\n");
2662     MicroProfilePrintf(CB, Handle, "var CSwitchTime = [");
2663     float fToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
2664     for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
2665     {
2666         MicroProfileContextSwitch CS = S.ContextSwitch[j];
2667         float fTime = MicroProfileLogTickDifference(nTickStart, CS.nTicks) * fToMsCpu;
2668         MicroProfilePrintf(CB, Handle, "%f,", fTime);
2669     }
2670     MicroProfilePrintf(CB, Handle, "];\n");
2671     uint32_t nWrittenAfter = S.nWebServerDataSent;
2672     MicroProfilePrintf(CB, Handle, "//CSwitch Size %d\n", nWrittenAfter - nWrittenBefore);
2673 
2674 
2675     for(size_t i = 0; i < g_MicroProfileHtml_end_count; ++i)
2676     {
2677         CB(Handle, g_MicroProfileHtml_end_sizes[i]-1, g_MicroProfileHtml_end[i]);
2678     }
2679 
2680     uint32_t* nGroupCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount);
2681 
2682     memset(nGroupCounter, 0, sizeof(uint32_t) * S.nGroupCount);
2683     for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2684     {
2685         uint32_t nGroupIndex = S.TimerInfo[i].nGroupIndex;
2686         nGroupCounter[nGroupIndex] += nTimerCounter[i];
2687     }
2688 
2689     uint32_t* nGroupCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount);
2690     uint32_t* nTimerCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers);
2691     for(uint32_t i = 0; i < S.nGroupCount; ++i)
2692     {
2693         nGroupCounterSort[i] = i;
2694     }
2695     for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2696     {
2697         nTimerCounterSort[i] = i;
2698     }
2699     std::sort(nGroupCounterSort, nGroupCounterSort + S.nGroupCount,
2700         [nGroupCounter](const uint32_t l, const uint32_t r)
2701         {
2702             return nGroupCounter[l] > nGroupCounter[r];
2703         }
2704     );
2705 
2706     std::sort(nTimerCounterSort, nTimerCounterSort + S.nTotalTimers,
2707         [nTimerCounter](const uint32_t l, const uint32_t r)
2708         {
2709             return nTimerCounter[l] > nTimerCounter[r];
2710         }
2711     );
2712 
2713     MicroProfilePrintf(CB, Handle, "\n<!--\nMarker Per Group\n");
2714     for(uint32_t i = 0; i < S.nGroupCount; ++i)
2715     {
2716         uint32_t idx = nGroupCounterSort[i];
2717         MicroProfilePrintf(CB, Handle, "%8d:%s\n", nGroupCounter[idx], S.GroupInfo[idx].pName);
2718     }
2719     MicroProfilePrintf(CB, Handle, "Marker Per Timer\n");
2720     for(uint32_t i = 0; i < S.nTotalTimers; ++i)
2721     {
2722         uint32_t idx = nTimerCounterSort[i];
2723         MicroProfilePrintf(CB, Handle, "%8d:%s(%s)\n", nTimerCounter[idx], S.TimerInfo[idx].pName, S.GroupInfo[S.TimerInfo[idx].nGroupIndex].pName);
2724     }
2725     MicroProfilePrintf(CB, Handle, "\n-->\n");
2726 
2727     S.nActiveGroup = nActiveGroup;
2728     S.nRunning = nRunning;
2729 
2730 #if MICROPROFILE_DEBUG
2731     int64_t nTicksEnd = MP_TICK();
2732     float fMs = fToMsCpu * (nTicksEnd - S.nPauseTicks);
2733     printf("html dump took %6.2fms\n", fMs);
2734 #endif
2735 
2736 
2737 }
2738 
MicroProfileWriteFile(void * Handle,size_t nSize,const char * pData)2739 void MicroProfileWriteFile(void* Handle, size_t nSize, const char* pData)
2740 {
2741     fwrite(pData, nSize, 1, (FILE*)Handle);
2742 }
2743 
MicroProfileDumpToFile()2744 void MicroProfileDumpToFile()
2745 {
2746     std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2747     if(S.nDumpFileNextFrame&1)
2748     {
2749         FILE* F = fopen(S.HtmlDumpPath, "w");
2750         if(F)
2751         {
2752             MicroProfileDumpHtml(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES, S.HtmlDumpPath);
2753             fclose(F);
2754         }
2755     }
2756     if(S.nDumpFileNextFrame&2)
2757     {
2758         FILE* F = fopen(S.CsvDumpPath, "w");
2759         if(F)
2760         {
2761             MicroProfileDumpCsv(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES);
2762             fclose(F);
2763         }
2764     }
2765 }
2766 
MicroProfileFlushSocket(MpSocket Socket)2767 void MicroProfileFlushSocket(MpSocket Socket)
2768 {
2769     send(Socket, &S.WebServerBuffer[0], S.WebServerPut, 0);
2770     S.WebServerPut = 0;
2771 
2772 }
2773 
MicroProfileWriteSocket(void * Handle,size_t nSize,const char * pData)2774 void MicroProfileWriteSocket(void* Handle, size_t nSize, const char* pData)
2775 {
2776     S.nWebServerDataSent += nSize;
2777     MpSocket Socket = *(MpSocket*)Handle;
2778     if(nSize > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE / 2)
2779     {
2780         MicroProfileFlushSocket(Socket);
2781         send(Socket, pData, nSize, 0);
2782 
2783     }
2784     else
2785     {
2786         memcpy(&S.WebServerBuffer[S.WebServerPut], pData, nSize);
2787         S.WebServerPut += nSize;
2788         if(S.WebServerPut > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE/2)
2789         {
2790             MicroProfileFlushSocket(Socket);
2791         }
2792     }
2793 }
2794 
2795 #if MICROPROFILE_MINIZ
2796 #ifndef MICROPROFILE_COMPRESS_BUFFER_SIZE
2797 #define MICROPROFILE_COMPRESS_BUFFER_SIZE (256<<10)
2798 #endif
2799 
2800 #define MICROPROFILE_COMPRESS_CHUNK (MICROPROFILE_COMPRESS_BUFFER_SIZE/2)
2801 struct MicroProfileCompressedSocketState
2802 {
2803     unsigned char DeflateOut[MICROPROFILE_COMPRESS_CHUNK];
2804     unsigned char DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
2805     mz_stream Stream;
2806     MpSocket Socket;
2807     uint32_t nSize;
2808     uint32_t nCompressedSize;
2809     uint32_t nFlushes;
2810     uint32_t nMemmoveBytes;
2811 };
2812 
MicroProfileCompressedSocketFlush(MicroProfileCompressedSocketState * pState)2813 void MicroProfileCompressedSocketFlush(MicroProfileCompressedSocketState* pState)
2814 {
2815     mz_stream& Stream = pState->Stream;
2816     unsigned char* pSendStart = &pState->DeflateOut[0];
2817     unsigned char* pSendEnd = &pState->DeflateOut[MICROPROFILE_COMPRESS_CHUNK - Stream.avail_out];
2818     if(pSendStart != pSendEnd)
2819     {
2820         send(pState->Socket, (const char*)pSendStart, pSendEnd - pSendStart, 0);
2821         pState->nCompressedSize += pSendEnd - pSendStart;
2822     }
2823     Stream.next_out = &pState->DeflateOut[0];
2824     Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
2825 
2826 }
MicroProfileCompressedSocketStart(MicroProfileCompressedSocketState * pState,MpSocket Socket)2827 void MicroProfileCompressedSocketStart(MicroProfileCompressedSocketState* pState, MpSocket Socket)
2828 {
2829     mz_stream& Stream = pState->Stream;
2830     memset(&Stream, 0, sizeof(Stream));
2831     Stream.next_out = &pState->DeflateOut[0];
2832     Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
2833     Stream.next_in = &pState->DeflateIn[0];
2834     Stream.avail_in = 0;
2835     mz_deflateInit(&Stream, Z_DEFAULT_COMPRESSION);
2836     pState->Socket = Socket;
2837     pState->nSize = 0;
2838     pState->nCompressedSize = 0;
2839     pState->nFlushes = 0;
2840     pState->nMemmoveBytes = 0;
2841 
2842 }
MicroProfileCompressedSocketFinish(MicroProfileCompressedSocketState * pState)2843 void MicroProfileCompressedSocketFinish(MicroProfileCompressedSocketState* pState)
2844 {
2845     mz_stream& Stream = pState->Stream;
2846     MicroProfileCompressedSocketFlush(pState);
2847     int r = mz_deflate(&Stream, MZ_FINISH);
2848     MP_ASSERT(r == MZ_STREAM_END);
2849     MicroProfileCompressedSocketFlush(pState);
2850     r = mz_deflateEnd(&Stream);
2851     MP_ASSERT(r == MZ_OK);
2852 }
2853 
MicroProfileCompressedWriteSocket(void * Handle,size_t nSize,const char * pData)2854 void MicroProfileCompressedWriteSocket(void* Handle, size_t nSize, const char* pData)
2855 {
2856     MicroProfileCompressedSocketState* pState = (MicroProfileCompressedSocketState*)Handle;
2857     mz_stream& Stream = pState->Stream;
2858     const unsigned char* pDeflateInEnd = Stream.next_in + Stream.avail_in;
2859     const unsigned char* pDeflateInStart = &pState->DeflateIn[0];
2860     const unsigned char* pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
2861     pState->nSize += nSize;
2862     if(nSize <= pDeflateInRealEnd - pDeflateInEnd)
2863     {
2864         memcpy((void*)pDeflateInEnd, pData, nSize);
2865         Stream.avail_in += nSize;
2866         MP_ASSERT(Stream.next_in + Stream.avail_in <= pDeflateInRealEnd);
2867         return;
2868     }
2869     int Flush = 0;
2870     while(nSize)
2871     {
2872         pDeflateInEnd = Stream.next_in + Stream.avail_in;
2873         if(Flush)
2874         {
2875             pState->nFlushes++;
2876             MicroProfileCompressedSocketFlush(pState);
2877             pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
2878             if(pDeflateInEnd == pDeflateInRealEnd)
2879             {
2880                 if(Stream.avail_in)
2881                 {
2882                     MP_ASSERT(pDeflateInStart != Stream.next_in);
2883                     memmove((void*)pDeflateInStart, Stream.next_in, Stream.avail_in);
2884                     pState->nMemmoveBytes += Stream.avail_in;
2885                 }
2886                 Stream.next_in = pDeflateInStart;
2887                 pDeflateInEnd = Stream.next_in + Stream.avail_in;
2888             }
2889         }
2890         size_t nSpace = pDeflateInRealEnd - pDeflateInEnd;
2891         size_t nBytes = MicroProfileMin(nSpace, nSize);
2892         MP_ASSERT(nBytes + pDeflateInEnd <= pDeflateInRealEnd);
2893         memcpy((void*)pDeflateInEnd, pData, nBytes);
2894         Stream.avail_in += nBytes;
2895         nSize -= nBytes;
2896         pData += nBytes;
2897         int r = mz_deflate(&Stream, MZ_NO_FLUSH);
2898         Flush = r == MZ_BUF_ERROR || nBytes == 0 || Stream.avail_out == 0 ? 1 : 0;
2899         MP_ASSERT(r == MZ_BUF_ERROR || r == MZ_OK);
2900         if(r == MZ_BUF_ERROR)
2901         {
2902             r = mz_deflate(&Stream, MZ_SYNC_FLUSH);
2903         }
2904     }
2905 }
2906 #endif
2907 
2908 
2909 #ifndef MicroProfileSetNonBlocking //fcntl doesnt work on a some unix like platforms..
MicroProfileSetNonBlocking(MpSocket Socket,int NonBlocking)2910 void MicroProfileSetNonBlocking(MpSocket Socket, int NonBlocking)
2911 {
2912 #ifdef _WIN32
2913     u_long nonBlocking = NonBlocking ? 1 : 0;
2914     ioctlsocket(Socket, FIONBIO, &nonBlocking);
2915 #else
2916     int Options = fcntl(Socket, F_GETFL);
2917     if(NonBlocking)
2918     {
2919         fcntl(Socket, F_SETFL, Options|O_NONBLOCK);
2920     }
2921     else
2922     {
2923         fcntl(Socket, F_SETFL, Options&(~O_NONBLOCK));
2924     }
2925 #endif
2926 }
2927 #endif
2928 
MicroProfileWebServerStart()2929 void MicroProfileWebServerStart()
2930 {
2931 #ifdef _WIN32
2932     WSADATA wsa;
2933     if(WSAStartup(MAKEWORD(2, 2), &wsa))
2934     {
2935         S.ListenerSocket = -1;
2936         return;
2937     }
2938 #endif
2939 
2940     S.ListenerSocket = socket(PF_INET, SOCK_STREAM, 6);
2941     MP_ASSERT(!MP_INVALID_SOCKET(S.ListenerSocket));
2942     MicroProfileSetNonBlocking(S.ListenerSocket, 1);
2943 
2944     S.nWebServerPort = (uint32_t)-1;
2945     struct sockaddr_in Addr;
2946     Addr.sin_family = AF_INET;
2947     Addr.sin_addr.s_addr = INADDR_ANY;
2948     for(int i = 0; i < 20; ++i)
2949     {
2950         Addr.sin_port = htons(MICROPROFILE_WEBSERVER_PORT+i);
2951         if(0 == bind(S.ListenerSocket, (sockaddr*)&Addr, sizeof(Addr)))
2952         {
2953             S.nWebServerPort = MICROPROFILE_WEBSERVER_PORT+i;
2954             break;
2955         }
2956     }
2957     listen(S.ListenerSocket, 8);
2958 }
2959 
MicroProfileWebServerStop()2960 void MicroProfileWebServerStop()
2961 {
2962 #ifdef _WIN32
2963     closesocket(S.ListenerSocket);
2964     WSACleanup();
2965 #else
2966     close(S.ListenerSocket);
2967 #endif
2968 }
2969 
MicroProfileParseGet(const char * pGet)2970 int MicroProfileParseGet(const char* pGet)
2971 {
2972     const char* pStart = pGet;
2973     while(*pGet != '\0')
2974     {
2975         if(*pGet < '0' || *pGet > '9')
2976             return 0;
2977         pGet++;
2978     }
2979     int nFrames = atoi(pStart);
2980     if(nFrames)
2981     {
2982         return nFrames;
2983     }
2984     else
2985     {
2986         return MICROPROFILE_WEBSERVER_MAXFRAMES;
2987     }
2988 }
MicroProfileWebServerUpdate()2989 bool MicroProfileWebServerUpdate()
2990 {
2991     MICROPROFILE_SCOPEI("MicroProfile", "Webserver-update", -1);
2992     MpSocket Connection = accept(S.ListenerSocket, 0, 0);
2993     bool bServed = false;
2994     if(!MP_INVALID_SOCKET(Connection))
2995     {
2996         std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
2997         char Req[8192];
2998         MicroProfileSetNonBlocking(Connection, 0);
2999         int nReceived = recv(Connection, Req, sizeof(Req)-1, 0);
3000         if(nReceived > 0)
3001         {
3002             Req[nReceived] = '\0';
3003 #if MICROPROFILE_MINIZ
3004 #define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nContent-Encoding: deflate\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n"
3005 #else
3006 #define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n"
3007 #endif
3008             char* pHttp = strstr(Req, "HTTP/");
3009             char* pGet = strstr(Req, "GET /");
3010             char* pHost = strstr(Req, "Host: ");
3011             auto Terminate = [](char* pString)
3012             {
3013                 char* pEnd = pString;
3014                 while(*pEnd != '\0')
3015                 {
3016                     if(*pEnd == '\r' || *pEnd == '\n' || *pEnd == ' ')
3017                     {
3018                         *pEnd = '\0';
3019                         return;
3020                     }
3021                     pEnd++;
3022                 }
3023             };
3024             if(pHost)
3025             {
3026                 pHost += sizeof("Host: ")-1;
3027                 Terminate(pHost);
3028             }
3029 
3030             if(pHttp && pGet)
3031             {
3032                 *pHttp = '\0';
3033                 pGet += sizeof("GET /")-1;
3034                 Terminate(pGet);
3035                 int nFrames = MicroProfileParseGet(pGet);
3036                 if(nFrames)
3037                 {
3038                     uint64_t nTickStart = MP_TICK();
3039                     send(Connection, MICROPROFILE_HTML_HEADER, sizeof(MICROPROFILE_HTML_HEADER)-1, 0);
3040                     uint64_t nDataStart = S.nWebServerDataSent;
3041                     S.WebServerPut = 0;
3042     #if 0 == MICROPROFILE_MINIZ
3043                     MicroProfileDumpHtml(MicroProfileWriteSocket, &Connection, nFrames, pHost);
3044                     uint64_t nDataEnd = S.nWebServerDataSent;
3045                     uint64_t nTickEnd = MP_TICK();
3046                     uint64_t nDiff = (nTickEnd - nTickStart);
3047                     float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
3048                     int nKb = ((nDataEnd-nDataStart)>>10) + 1;
3049                     int nCompressedKb = nKb;
3050                     MicroProfilePrintf(MicroProfileWriteSocket, &Connection, "\n<!-- Sent %dkb in %.2fms-->\n\n",nKb, fMs);
3051                     MicroProfileFlushSocket(Connection);
3052     #else
3053                     MicroProfileCompressedSocketState CompressState;
3054                     MicroProfileCompressedSocketStart(&CompressState, Connection);
3055                     MicroProfileDumpHtml(MicroProfileCompressedWriteSocket, &CompressState, nFrames, pHost);
3056                     S.nWebServerDataSent += CompressState.nSize;
3057                     uint64_t nDataEnd = S.nWebServerDataSent;
3058                     uint64_t nTickEnd = MP_TICK();
3059                     uint64_t nDiff = (nTickEnd - nTickStart);
3060                     float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
3061                     int nKb = ((nDataEnd-nDataStart)>>10) + 1;
3062                     int nCompressedKb = ((CompressState.nCompressedSize)>>10) + 1;
3063                     MicroProfilePrintf(MicroProfileCompressedWriteSocket, &CompressState, "\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
3064                     MicroProfileCompressedSocketFinish(&CompressState);
3065                     MicroProfileFlushSocket(Connection);
3066     #endif
3067 
3068     #if MICROPROFILE_DEBUG
3069                     printf("\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
3070     #endif
3071                 }
3072             }
3073         }
3074 #ifdef _WIN32
3075         closesocket(Connection);
3076 #else
3077         close(Connection);
3078 #endif
3079     }
3080     return bServed;
3081 }
3082 #endif
3083 
3084 
3085 
3086 
3087 #if MICROPROFILE_CONTEXT_SWITCH_TRACE
3088 //functions that need to be implemented per platform.
3089 void* MicroProfileTraceThread(void* unused);
3090 bool MicroProfileIsLocalThread(uint32_t nThreadId);
3091 
3092 
MicroProfileStartContextSwitchTrace()3093 void MicroProfileStartContextSwitchTrace()
3094 {
3095     if(!S.bContextSwitchRunning)
3096     {
3097         S.bContextSwitchRunning    = true;
3098         S.bContextSwitchStop = false;
3099         MicroProfileThreadStart(&S.ContextSwitchThread, MicroProfileTraceThread);
3100     }
3101 }
3102 
MicroProfileStopContextSwitchTrace()3103 void MicroProfileStopContextSwitchTrace()
3104 {
3105     if(S.bContextSwitchRunning)
3106     {
3107         S.bContextSwitchStop = true;
3108         MicroProfileThreadJoin(&S.ContextSwitchThread);
3109     }
3110 }
3111 
3112 
3113 #ifdef _WIN32
3114 #define INITGUID
3115 #include <evntrace.h>
3116 #include <evntcons.h>
3117 #include <strsafe.h>
3118 
3119 
3120 static GUID g_MicroProfileThreadClassGuid = { 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c };
3121 
3122 struct MicroProfileSCSwitch
3123 {
3124     uint32_t NewThreadId;
3125     uint32_t OldThreadId;
3126     int8_t   NewThreadPriority;
3127     int8_t   OldThreadPriority;
3128     uint8_t  PreviousCState;
3129     int8_t   SpareByte;
3130     int8_t   OldThreadWaitReason;
3131     int8_t   OldThreadWaitMode;
3132     int8_t   OldThreadState;
3133     int8_t   OldThreadWaitIdealProcessor;
3134     uint32_t NewThreadWaitTime;
3135     uint32_t Reserved;
3136 };
3137 
3138 
MicroProfileContextSwitchCallback(PEVENT_TRACE pEvent)3139 VOID WINAPI MicroProfileContextSwitchCallback(PEVENT_TRACE pEvent)
3140 {
3141     if (pEvent->Header.Guid == g_MicroProfileThreadClassGuid)
3142     {
3143         if (pEvent->Header.Class.Type == 36)
3144         {
3145             MicroProfileSCSwitch* pCSwitch = (MicroProfileSCSwitch*) pEvent->MofData;
3146             if ((pCSwitch->NewThreadId != 0) || (pCSwitch->OldThreadId != 0))
3147             {
3148                 MicroProfileContextSwitch Switch;
3149                 Switch.nThreadOut = pCSwitch->OldThreadId;
3150                 Switch.nThreadIn = pCSwitch->NewThreadId;
3151                 Switch.nCpu = pEvent->BufferContext.ProcessorNumber;
3152                 Switch.nTicks = pEvent->Header.TimeStamp.QuadPart;
3153                 MicroProfileContextSwitchPut(&Switch);
3154             }
3155         }
3156     }
3157 }
3158 
MicroProfileBufferCallback(PEVENT_TRACE_LOGFILE Buffer)3159 ULONG WINAPI MicroProfileBufferCallback(PEVENT_TRACE_LOGFILE Buffer)
3160 {
3161     return (S.bContextSwitchStop || !S.bContextSwitchRunning) ? FALSE : TRUE;
3162 }
3163 
3164 
3165 struct MicroProfileKernelTraceProperties : public EVENT_TRACE_PROPERTIES
3166 {
3167     char dummy[sizeof(KERNEL_LOGGER_NAME)];
3168 };
3169 
MicroProfileContextSwitchShutdownTrace()3170 void MicroProfileContextSwitchShutdownTrace()
3171 {
3172     TRACEHANDLE SessionHandle = 0;
3173     MicroProfileKernelTraceProperties sessionProperties;
3174 
3175     ZeroMemory(&sessionProperties, sizeof(sessionProperties));
3176     sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
3177     sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
3178     sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution
3179     sessionProperties.Wnode.Guid = SystemTraceControlGuid;
3180     sessionProperties.BufferSize = 1;
3181     sessionProperties.NumberOfBuffers = 128;
3182     sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH;
3183     sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
3184     sessionProperties.MaximumFileSize = 0;
3185     sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
3186     sessionProperties.LogFileNameOffset = 0;
3187 
3188     EVENT_TRACE_LOGFILE log;
3189     ZeroMemory(&log, sizeof(log));
3190     log.LoggerName = KERNEL_LOGGER_NAME;
3191     log.ProcessTraceMode = 0;
3192     TRACEHANDLE hLog = OpenTrace(&log);
3193     if (hLog)
3194     {
3195         ControlTrace(SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties, EVENT_TRACE_CONTROL_STOP);
3196     }
3197     CloseTrace(hLog);
3198 
3199 
3200 }
3201 
MicroProfileTraceThread(void * unused)3202 void* MicroProfileTraceThread(void* unused)
3203 {
3204 
3205     MicroProfileContextSwitchShutdownTrace();
3206     ULONG status = ERROR_SUCCESS;
3207     TRACEHANDLE SessionHandle = 0;
3208     MicroProfileKernelTraceProperties sessionProperties;
3209 
3210     ZeroMemory(&sessionProperties, sizeof(sessionProperties));
3211     sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
3212     sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
3213     sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution
3214     sessionProperties.Wnode.Guid = SystemTraceControlGuid;
3215     sessionProperties.BufferSize = 1;
3216     sessionProperties.NumberOfBuffers = 128;
3217     sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH|EVENT_TRACE_FLAG_PROCESS;
3218     sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
3219     sessionProperties.MaximumFileSize = 0;
3220     sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
3221     sessionProperties.LogFileNameOffset = 0;
3222 
3223 
3224     status = StartTrace((PTRACEHANDLE) &SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties);
3225 
3226     if (ERROR_SUCCESS != status)
3227     {
3228         S.bContextSwitchRunning = false;
3229         return 0;
3230     }
3231 
3232     EVENT_TRACE_LOGFILE log;
3233     ZeroMemory(&log, sizeof(log));
3234 
3235     log.LoggerName = KERNEL_LOGGER_NAME;
3236     log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
3237     log.EventCallback = MicroProfileContextSwitchCallback;
3238     log.BufferCallback = MicroProfileBufferCallback;
3239 
3240     TRACEHANDLE hLog = OpenTrace(&log);
3241     ProcessTrace(&hLog, 1, 0, 0);
3242     CloseTrace(hLog);
3243     MicroProfileContextSwitchShutdownTrace();
3244 
3245     S.bContextSwitchRunning = false;
3246     return 0;
3247 }
3248 
MicroProfileIsLocalThread(uint32_t nThreadId)3249 bool MicroProfileIsLocalThread(uint32_t nThreadId)
3250 {
3251     HANDLE h = OpenThread(THREAD_QUERY_LIMITED_INFORMATION, FALSE, nThreadId);
3252     if(h == NULL)
3253         return false;
3254     DWORD hProcess = GetProcessIdOfThread(h);
3255     CloseHandle(h);
3256     return GetCurrentProcessId() == hProcess;
3257 }
3258 
3259 #elif defined(__APPLE__)
3260 #include <sys/time.h>
MicroProfileTraceThread(void * unused)3261 void* MicroProfileTraceThread(void* unused)
3262 {
3263     FILE* pFile = fopen("mypipe", "r");
3264     if(!pFile)
3265     {
3266         printf("CONTEXT SWITCH FAILED TO OPEN FILE: make sure to run dtrace script\n");
3267         S.bContextSwitchRunning = false;
3268         return 0;
3269     }
3270     printf("STARTING TRACE THREAD\n");
3271     char* pLine = 0;
3272     size_t cap = 0;
3273     size_t len = 0;
3274     struct timeval tv;
3275 
3276     gettimeofday(&tv, NULL);
3277 
3278     uint64_t nsSinceEpoch = ((uint64_t)(tv.tv_sec) * 1000000 + (uint64_t)(tv.tv_usec)) * 1000;
3279     uint64_t nTickEpoch = MP_TICK();
3280     uint32_t nLastThread[MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS] = {0};
3281     mach_timebase_info_data_t sTimebaseInfo;
3282     mach_timebase_info(&sTimebaseInfo);
3283     S.bContextSwitchRunning = true;
3284 
3285     uint64_t nProcessed = 0;
3286     uint64_t nProcessedLast = 0;
3287     while((len = getline(&pLine, &cap, pFile))>0 && !S.bContextSwitchStop)
3288     {
3289         nProcessed += len;
3290         if(nProcessed - nProcessedLast > 10<<10)
3291         {
3292             nProcessedLast = nProcessed;
3293             printf("processed %llukb %llukb\n", (nProcessed-nProcessedLast)>>10,nProcessed >>10);
3294         }
3295 
3296         char* pX = strchr(pLine, 'X');
3297         if(pX)
3298         {
3299             int cpu = atoi(pX+1);
3300             char* pX2 = strchr(pX + 1, 'X');
3301             char* pX3 = strchr(pX2 + 1, 'X');
3302             int thread = atoi(pX2+1);
3303             char* lala;
3304             int64_t timestamp = strtoll(pX3 + 1, &lala, 10);
3305             MicroProfileContextSwitch Switch;
3306 
3307             //convert to ticks.
3308             uint64_t nDeltaNsSinceEpoch = timestamp - nsSinceEpoch;
3309             uint64_t nDeltaTickSinceEpoch = sTimebaseInfo.numer * nDeltaNsSinceEpoch / sTimebaseInfo.denom;
3310             uint64_t nTicks = nDeltaTickSinceEpoch + nTickEpoch;
3311             if(cpu < MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS)
3312             {
3313                 Switch.nThreadOut = nLastThread[cpu];
3314                 Switch.nThreadIn = thread;
3315                 nLastThread[cpu] = thread;
3316                 Switch.nCpu = cpu;
3317                 Switch.nTicks = nTicks;
3318                 MicroProfileContextSwitchPut(&Switch);
3319             }
3320         }
3321     }
3322     printf("EXITING TRACE THREAD\n");
3323     S.bContextSwitchRunning = false;
3324     return 0;
3325 }
3326 
MicroProfileIsLocalThread(uint32_t nThreadId)3327 bool MicroProfileIsLocalThread(uint32_t nThreadId)
3328 {
3329     return false;
3330 }
3331 
3332 #endif
3333 #else
3334 
MicroProfileIsLocalThread(uint32_t nThreadId)3335 bool MicroProfileIsLocalThread(uint32_t nThreadId){return false;}
MicroProfileStopContextSwitchTrace()3336 void MicroProfileStopContextSwitchTrace(){}
MicroProfileStartContextSwitchTrace()3337 void MicroProfileStartContextSwitchTrace(){}
3338 
3339 #endif
3340 
3341 
3342 
3343 
3344 #if MICROPROFILE_GPU_TIMERS_D3D11
MicroProfileGpuInsertTimeStamp()3345 uint32_t MicroProfileGpuInsertTimeStamp()
3346 {
3347     MicroProfileD3D11Frame& Frame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame];
3348     if(Frame.m_nRateQueryStarted)
3349     {
3350         uint32_t nCurrent = (Frame.m_nQueryStart + Frame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES;
3351         uint32_t nNext = (nCurrent + 1) % MICROPROFILE_D3D_MAX_QUERIES;
3352         if(nNext != S.GPU.m_nQueryGet)
3353         {
3354             Frame.m_nQueryCount++;
3355             ID3D11Query* pQuery = (ID3D11Query*)S.GPU.m_pQueries[nCurrent];
3356             ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext;
3357             pContext->End(pQuery);
3358             S.GPU.m_nQueryPut = nNext;
3359             return nCurrent;
3360         }
3361     }
3362     return (uint32_t)-1;
3363 }
3364 
MicroProfileGpuGetTimeStamp(uint32_t nIndex)3365 uint64_t MicroProfileGpuGetTimeStamp(uint32_t nIndex)
3366 {
3367     if(nIndex == (uint32_t)-1)
3368     {
3369         return (uint64_t)-1;
3370     }
3371     int64_t nResult = S.GPU.m_nQueryResults[nIndex];
3372     MP_ASSERT(nResult != -1);
3373     return nResult;
3374 }
3375 
MicroProfileGpuGetData(void * pQuery,void * pData,uint32_t nDataSize)3376 bool MicroProfileGpuGetData(void* pQuery, void* pData, uint32_t nDataSize)
3377 {
3378     HRESULT hr;
3379     do
3380     {
3381         hr = ((ID3D11DeviceContext*)S.GPU.m_pDeviceContext)->GetData((ID3D11Query*)pQuery, pData, nDataSize, 0);
3382     }while(hr == S_FALSE);
3383     switch(hr)
3384     {
3385         case DXGI_ERROR_DEVICE_REMOVED:
3386         case DXGI_ERROR_INVALID_CALL:
3387         case E_INVALIDARG:
3388             MP_BREAK();
3389             return false;
3390 
3391     }
3392     return true;
3393 }
3394 
MicroProfileTicksPerSecondGpu()3395 uint64_t MicroProfileTicksPerSecondGpu()
3396 {
3397     return S.GPU.m_nQueryFrequency;
3398 }
3399 
MicroProfileGpuFlip()3400 void MicroProfileGpuFlip()
3401 {
3402     MicroProfileD3D11Frame& CurrentFrame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame];
3403     ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext;
3404     if(CurrentFrame.m_nRateQueryStarted)
3405     {
3406         pContext->End((ID3D11Query*)CurrentFrame.m_pRateQuery);
3407     }
3408     uint32_t nNextFrame = (S.GPU.m_nQueryFrame + 1) % MICROPROFILE_GPU_FRAME_DELAY;
3409     MicroProfileD3D11Frame& OldFrame = S.GPU.m_QueryFrames[nNextFrame];
3410     if(OldFrame.m_nRateQueryStarted)
3411     {
3412         struct RateQueryResult
3413         {
3414             uint64_t nFrequency;
3415             BOOL bDisjoint;
3416         };
3417         RateQueryResult Result;
3418         if(MicroProfileGpuGetData(OldFrame.m_pRateQuery, &Result, sizeof(Result)))
3419         {
3420             if(S.GPU.m_nQueryFrequency != (int64_t)Result.nFrequency)
3421             {
3422                 if(S.GPU.m_nQueryFrequency)
3423                 {
3424                     OutputDebugString("Query freq changing");
3425                 }
3426                 S.GPU.m_nQueryFrequency = Result.nFrequency;
3427             }
3428             uint32_t nStart = OldFrame.m_nQueryStart;
3429             uint32_t nCount = OldFrame.m_nQueryCount;
3430             for(uint32_t i = 0; i < nCount; ++i)
3431             {
3432                 uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES;
3433 
3434 
3435 
3436                 if(!MicroProfileGpuGetData(S.GPU.m_pQueries[nIndex], &S.GPU.m_nQueryResults[nIndex], sizeof(uint64_t)))
3437                 {
3438                     S.GPU.m_nQueryResults[nIndex] = -1;
3439                 }
3440             }
3441         }
3442         else
3443         {
3444             uint32_t nStart = OldFrame.m_nQueryStart;
3445             uint32_t nCount = OldFrame.m_nQueryCount;
3446             for(uint32_t i = 0; i < nCount; ++i)
3447             {
3448                 uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES;
3449                 S.GPU.m_nQueryResults[nIndex] = -1;
3450             }
3451         }
3452         S.GPU.m_nQueryGet = (OldFrame.m_nQueryStart + OldFrame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES;
3453     }
3454 
3455     S.GPU.m_nQueryFrame = nNextFrame;
3456     MicroProfileD3D11Frame& NextFrame = S.GPU.m_QueryFrames[nNextFrame];
3457     pContext->Begin((ID3D11Query*)NextFrame.m_pRateQuery);
3458     NextFrame.m_nQueryStart = S.GPU.m_nQueryPut;
3459     NextFrame.m_nQueryCount = 0;
3460 
3461     NextFrame.m_nRateQueryStarted = 1;
3462 }
3463 
MicroProfileGpuInitD3D11(void * pDevice_,void * pDeviceContext_)3464 void MicroProfileGpuInitD3D11(void* pDevice_, void* pDeviceContext_)
3465 {
3466     ID3D11Device* pDevice = (ID3D11Device*)pDevice_;
3467     ID3D11DeviceContext* pDeviceContext = (ID3D11DeviceContext*)pDeviceContext_;
3468     S.GPU.m_pDeviceContext = pDeviceContext_;
3469 
3470     D3D11_QUERY_DESC Desc;
3471     Desc.MiscFlags = 0;
3472     Desc.Query = D3D11_QUERY_TIMESTAMP;
3473     for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i)
3474     {
3475         HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_pQueries[i]);
3476         MP_ASSERT(hr == S_OK);
3477         S.GPU.m_nQueryResults[i] = -1;
3478     }
3479     S.GPU.m_nQueryPut = 0;
3480     S.GPU.m_nQueryGet = 0;
3481     S.GPU.m_nQueryFrame = 0;
3482     S.GPU.m_nQueryFrequency = 0;
3483     Desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
3484     for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
3485     {
3486         S.GPU.m_QueryFrames[i].m_nQueryStart = 0;
3487         S.GPU.m_QueryFrames[i].m_nQueryCount = 0;
3488         S.GPU.m_QueryFrames[i].m_nRateQueryStarted = 0;
3489         HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_QueryFrames[i].m_pRateQuery);
3490         MP_ASSERT(hr == S_OK);
3491     }
3492 }
3493 
3494 
MicroProfileGpuShutdown()3495 void MicroProfileGpuShutdown()
3496 {
3497     for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i)
3498     {
3499         ((ID3D11Query*)&S.GPU.m_pQueries[i])->Release();
3500         S.GPU.m_pQueries[i] = 0;
3501     }
3502     for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
3503     {
3504         ((ID3D11Query*)S.GPU.m_QueryFrames[i].m_pRateQuery)->Release();
3505         S.GPU.m_QueryFrames[i].m_pRateQuery = 0;
3506     }
3507 }
3508 
MicroProfileGetGpuTickReference(int64_t * pOutCPU,int64_t * pOutGpu)3509 int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu)
3510 {
3511     return 0;
3512 }
3513 
3514 
3515 #elif MICROPROFILE_GPU_TIMERS_GL
MicroProfileGpuInitGL()3516 void MicroProfileGpuInitGL()
3517 {
3518     S.GPU.GLTimerPos = 0;
3519     glGenQueries(MICROPROFILE_GL_MAX_QUERIES, &S.GPU.GLTimers[0]);
3520 }
3521 
MicroProfileGpuInsertTimeStamp()3522 uint32_t MicroProfileGpuInsertTimeStamp()
3523 {
3524     uint32_t nIndex = (S.GPU.GLTimerPos+1)%MICROPROFILE_GL_MAX_QUERIES;
3525     glQueryCounter(S.GPU.GLTimers[nIndex], GL_TIMESTAMP);
3526     S.GPU.GLTimerPos = nIndex;
3527     return nIndex;
3528 }
MicroProfileGpuGetTimeStamp(uint32_t nKey)3529 uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey)
3530 {
3531     uint64_t result;
3532     glGetQueryObjectui64v(S.GPU.GLTimers[nKey], GL_QUERY_RESULT, &result);
3533     return result;
3534 }
3535 
MicroProfileTicksPerSecondGpu()3536 uint64_t MicroProfileTicksPerSecondGpu()
3537 {
3538     return 1000000000ll;
3539 }
3540 
MicroProfileGetGpuTickReference(int64_t * pOutCpu,int64_t * pOutGpu)3541 int MicroProfileGetGpuTickReference(int64_t* pOutCpu, int64_t* pOutGpu)
3542 {
3543     int64_t nGpuTimeStamp;
3544     glGetInteger64v(GL_TIMESTAMP, &nGpuTimeStamp);
3545     if(nGpuTimeStamp)
3546     {
3547         *pOutCpu = MP_TICK();
3548         *pOutGpu = nGpuTimeStamp;
3549         #if 0 //debug test if timestamp diverges
3550         static int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
3551         static int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
3552         static int64_t nGpuStart = 0;
3553         static int64_t nCpuStart = 0;
3554         if(!nCpuStart)
3555         {
3556             nCpuStart = *pOutCpu;
3557             nGpuStart = *pOutGpu;
3558         }
3559         static int nCountDown = 100;
3560         if(0 == nCountDown--)
3561         {
3562             int64_t nCurCpu = *pOutCpu;
3563             int64_t nCurGpu = *pOutGpu;
3564             double fDistanceCpu = (nCurCpu - nCpuStart) / (double)nTicksPerSecondCpu;
3565             double fDistanceGpu = (nCurGpu - nGpuStart) / (double)nTicksPerSecondGpu;
3566 
3567             char buf[254];
3568             snprintf(buf, sizeof(buf)-1,"Distance %f %f diff %f\n", fDistanceCpu, fDistanceGpu, fDistanceCpu-fDistanceGpu);
3569             OutputDebugString(buf);
3570             nCountDown = 100;
3571         }
3572         #endif
3573         return 1;
3574     }
3575     return 0;
3576 }
3577 
3578 
3579 #endif
3580 
3581 #undef S
3582 
3583 #ifdef _WIN32
3584 #pragma warning(pop)
3585 #endif
3586 
3587 
3588 
3589 
3590 
3591 #endif
3592 #endif
3593 #ifdef MICROPROFILE_EMBED_HTML
3594 #include "microprofile_html.h"
3595 #endif
3596