1 // SuperTuxKart - a fun racing game with go-kart
2 // Copyright (C) 2004-2015 SuperTuxKart-Team
3 //
4 // This program is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU General Public License
6 // as published by the Free Software Foundation; either version 3
7 // of the License, or (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17
18 #include "profiler.hpp"
19
20 #include "config/user_config.hpp"
21 #include "graphics/glwrap.hpp"
22 #include "graphics/irr_driver.hpp"
23 #include "graphics/2dutils.hpp"
24 #include "guiengine/event_handler.hpp"
25 #include "guiengine/engine.hpp"
26 #include "graphics/irr_driver.hpp"
27 #include "guiengine/scalable_font.hpp"
28 #include "io/file_manager.hpp"
29 #include "utils/file_utils.hpp"
30 #include "utils/string_utils.hpp"
31 #include "utils/tls.hpp"
32 #include "utils/vs.hpp"
33
34 #include <algorithm>
35 #include <fstream>
36 #include <ostream>
37 #include <stack>
38 #include <sstream>
39
40 Profiler profiler;
41
42 // Unit is in pencentage of the screen dimensions
43 #define MARGIN_X 0.02f // left and right margin
44 #define MARGIN_Y 0.02f // top margin
45 #define LINE_HEIGHT 0.030f // height of a line representing a thread
46
47 #define MARKERS_NAMES_POS core::rect<s32>(50,100,150,600)
48 #define GPU_MARKERS_NAMES_POS core::rect<s32>(50,165,150,300)
49
50 // The width of the profiler corresponds to TIME_DRAWN_MS milliseconds
51 #define TIME_DRAWN_MS 30.0f
52
53 // --- Begin portable precise timer ---
54 #ifdef WIN32
55 #define WIN32_LEAN_AND_MEAN
56 #include <windows.h>
57
getTimeMilliseconds()58 double getTimeMilliseconds()
59 {
60 LARGE_INTEGER freq;
61 QueryPerformanceFrequency(&freq);
62 double perFreq = double(freq.QuadPart) / 1000.0;
63
64 LARGE_INTEGER timer;
65 QueryPerformanceCounter(&timer);
66 return double(timer.QuadPart) / perFreq;
67 } // getTimeMilliseconds
68
69 #else
70 #include <sys/time.h>
getTimeMilliseconds()71 double getTimeMilliseconds()
72 {
73 struct timeval tv;
74 gettimeofday(&tv, NULL);
75 return double(tv.tv_sec * 1000) + (double(tv.tv_usec) / 1000.0);
76 } // getTimeMilliseconds
77 #endif
78 // --- End portable precise timer ---
79
80 //-----------------------------------------------------------------------------
Profiler()81 Profiler::Profiler()
82 {
83 m_time_last_sync = getTimeMilliseconds();
84 m_time_between_sync = 0.0;
85 m_freeze_state = UNFROZEN;
86
87 // When initializing profile class during static initialization
88 // UserConfigParams::m_max_fps may not be properly initialized with default
89 // value, so we use hard-coded default value
90 m_max_frames = 20 * 120;
91 m_current_frame = 0;
92 m_has_wrapped_around = false;
93 m_threads_used = 1;
94 } // Profile
95
96 //-----------------------------------------------------------------------------
~Profiler()97 Profiler::~Profiler()
98 {
99 } // ~Profiler
100
101 thread_local int g_thread_id = -1;
102 const int MAX_THREADS = 10;
103 //-----------------------------------------------------------------------------
104 /** It is split from the constructor so that it can be avoided allocating
105 * unnecessary memory when the profiler is never used (for example in no
106 * graphics). */
init()107 void Profiler::init()
108 {
109 m_all_threads_data.resize(MAX_THREADS);
110
111 // Add this thread to the thread mapping
112 g_thread_id = 0;
113 m_gpu_times.resize(Q_LAST * m_max_frames);
114 } // init
115
116 //-----------------------------------------------------------------------------
117 /** Returns a unique index for a thread. If the calling thread is not yet in
118 * the mapping, it will assign a new unique id to this thread. */
getThreadID()119 int Profiler::getThreadID()
120 {
121 if (g_thread_id == -1)
122 {
123 if (m_threads_used >= MAX_THREADS)
124 {
125 g_thread_id = MAX_THREADS - 1;
126 return g_thread_id;
127 }
128 g_thread_id = m_threads_used.fetch_add(1);
129 }
130 return g_thread_id;
131 } // getThreadID
132
133 //-----------------------------------------------------------------------------
134 /// Push a new marker that starts now
pushCPUMarker(const char * name,const video::SColor & colour)135 void Profiler::pushCPUMarker(const char* name, const video::SColor& colour)
136 {
137 // Don't do anything when disabled or frozen
138 if (!UserConfigParams::m_profiler_enabled ||
139 m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE )
140 return;
141
142 // We need to look before getting the thread id (since this might
143 // be a new thread which changes the structure).
144 m_lock.lock();
145 int thread_id = getThreadID();
146
147 ThreadData &td = m_all_threads_data[thread_id];
148 AllEventData::iterator i = td.m_all_event_data.find(name);
149 double start = getTimeMilliseconds() - m_time_last_sync;
150 if (i != td.m_all_event_data.end())
151 {
152 i->second.setStart(m_current_frame, start, (int)td.m_event_stack.size());
153 }
154 else
155 {
156 EventData ed(colour, m_max_frames);
157 ed.setStart(m_current_frame, start, (int)td.m_event_stack.size());
158 td.m_all_event_data[name] = ed;
159 // Ordered headings is used to determine the order in which the
160 // bar graph is drawn. Outer profiling events will be added first,
161 // so they will be drawn first, which gives the proper nested
162 // displayed of events.
163 td.m_ordered_headings.push_back(name);
164 }
165 td.m_event_stack.push_back(name);
166 m_lock.unlock();
167 } // pushCPUMarker
168
169 //-----------------------------------------------------------------------------
170 /// Stop the last pushed marker
popCPUMarker()171 void Profiler::popCPUMarker()
172 {
173 // Don't do anything when disabled or frozen
174 if( !UserConfigParams::m_profiler_enabled ||
175 m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE )
176 return;
177 double now = getTimeMilliseconds();
178
179 m_lock.lock();
180 int thread_id = getThreadID();
181 ThreadData &td = m_all_threads_data[thread_id];
182
183 // When the profiler gets enabled (which happens in the middle of the
184 // main loop), there can be some pops without matching pushes (for one
185 // frame) - ignore those events.
186 if (td.m_event_stack.size() == 0)
187 {
188 m_lock.unlock();
189 return;
190 }
191
192 assert(td.m_event_stack.size() > 0);
193
194 const std::string &name = td.m_event_stack.back();
195 td.m_all_event_data[name].setEnd(m_current_frame, now - m_time_last_sync);
196
197 td.m_event_stack.pop_back();
198 m_lock.unlock();
199 } // popCPUMarker
200
201 //-----------------------------------------------------------------------------
202 /** Switches the profiler either on or off.
203 */
toggleStatus()204 void Profiler::toggleStatus()
205 {
206 UserConfigParams::m_profiler_enabled = !UserConfigParams::m_profiler_enabled;
207 // If the profiler would immediately enabled, calls that have started but
208 // not finished would not be registered correctly. So set the state to
209 // waiting, so the unfreeze started at the next sync frame (which is
210 // outside of the main loop, i.e. all profiling events inside of the main
211 // loop will work as expected.
212 if (m_freeze_state == UNFROZEN)
213 m_freeze_state = WAITING_FOR_UNFREEZE;
214 } // toggleStatus
215
216 //-----------------------------------------------------------------------------
217 /** Saves all data for the current frame, and starts the next frame in the
218 * circular buffer. Any events that are currently active (e.g. in a separate
219 * thread) will be split in two parts: the beginning (till now) in the current
220 * frame, the rest will be added to the next frame.
221 */
synchronizeFrame()222 void Profiler::synchronizeFrame()
223 {
224 // Don't do anything when frozen
225 if(!UserConfigParams::m_profiler_enabled || m_freeze_state == FROZEN)
226 return;
227
228 // Avoid using several times getTimeMilliseconds(),
229 // which would yield different results
230 double now = getTimeMilliseconds();
231
232 m_lock.lock();
233 // Set index to next frame
234 int next_frame = m_current_frame+1;
235 if (next_frame >= m_max_frames)
236 {
237 next_frame = 0;
238 m_has_wrapped_around = true;
239 }
240
241 // First finish all markers that are currently in progress, and add
242 // a new start marker for the next frame. So e.g. if a thread is busy in
243 // one event while the main thread syncs the frame, this event will get
244 // split into two parts in two consecutive frames
245 for (int i = 0; i < m_threads_used; i++)
246 {
247 ThreadData &td = m_all_threads_data[i];
248 for(unsigned int j=0; j<td.m_event_stack.size(); j++)
249 {
250 EventData &ed = td.m_all_event_data[td.m_event_stack[j]];
251 ed.setEnd(m_current_frame, now-m_time_last_sync);
252 ed.setStart(next_frame, 0, j);
253 } // for j in event stack
254 } // for i in threads
255
256 if (m_has_wrapped_around)
257 {
258 // The new entries for the circular buffer need to be cleared
259 // to make sure the new values are not accumulated on top of
260 // the data from a previous frame.
261 for (int i = 0; i < m_threads_used; i++)
262 {
263 ThreadData &td = m_all_threads_data[i];
264 AllEventData &aed = td.m_all_event_data;
265 AllEventData::iterator k;
266 for (k = aed.begin(); k != aed.end(); ++k)
267 k->second.getMarker(next_frame).clear();
268 }
269 } // is has wrapped around
270
271 m_current_frame = next_frame;
272
273 // Remember the date of last synchronization
274 m_time_between_sync = now - m_time_last_sync;
275 m_time_last_sync = now;
276
277 // Freeze/unfreeze as needed
278 if(m_freeze_state == WAITING_FOR_FREEZE)
279 m_freeze_state = FROZEN;
280 else if(m_freeze_state == WAITING_FOR_UNFREEZE)
281 m_freeze_state = UNFROZEN;
282
283 m_lock.unlock();
284 } // synchronizeFrame
285
286 //-----------------------------------------------------------------------------
287 /// Draw the markers
draw()288 void Profiler::draw()
289 {
290 #ifndef SERVER_ONLY
291 PROFILER_PUSH_CPU_MARKER("ProfilerDraw", 0xFF, 0xFF, 0x00);
292 video::IVideoDriver* driver = irr_driver->getVideoDriver();
293
294 // Current frame points to the frame in which currently data is
295 // being accumulated. Draw the previous (i.e. complete) frame.
296 m_lock.lock();
297 int indx = m_current_frame - 1;
298 if (indx < 0) indx = m_max_frames - 1;
299 m_lock.unlock();
300
301 drawBackground();
302
303 // Force to show the pointer
304 irr_driver->showPointer();
305
306 // Compute some values for drawing (unit: pixels, but we keep floats
307 // for reducing errors accumulation)
308 core::dimension2d<u32> screen_size = driver->getScreenSize();
309 const double profiler_width = (1.0 - 2.0*MARGIN_X) * screen_size.Width;
310 const double x_offset = MARGIN_X*screen_size.Width;
311 const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height;
312 const double line_height = LINE_HEIGHT*screen_size.Height;
313
314
315 // Compute start end end time for this frame
316 double start = 99999.0f;
317 double end = -1.0f;
318
319 // Use this thread to compute start and end time. All other
320 // threads might have 'unfinished' events, or multiple identical events
321 // in this frame (i.e. start time would be incorrect).
322 int thread_id = getThreadID();
323 AllEventData &aed = m_all_threads_data[thread_id].m_all_event_data;
324 AllEventData::iterator j;
325 for (j = aed.begin(); j != aed.end(); ++j)
326 {
327 const Marker &marker = j->second.getMarker(indx);
328 start = std::min(start, marker.getStart());
329 end = std::max(end, marker.getEnd());
330 } // for j in events
331
332
333 const double duration = end - start;
334 const double factor = profiler_width / duration;
335
336 // Get the mouse pos
337 core::vector2di mouse_pos = GUIEngine::EventHandler::get()->getMousePos();
338
339 std::stack<AllEventData::iterator> hovered_markers;
340 for (int i = 0; i < m_threads_used; i++)
341 {
342 ThreadData &td = m_all_threads_data[i];
343 AllEventData &aed = td.m_all_event_data;
344
345 // Thread 1 has 'proper' start and end events (assuming that each
346 // event is at most called once). But all other threads might have
347 // multiple start and end events, so the recorder start time is only
348 // of the last event and so can not be used to draw the bar graph
349 double start_xpos = 0;
350 for(int k=0; k<(int)td.m_ordered_headings.size(); k++)
351 {
352 AllEventData::iterator j = aed.find(td.m_ordered_headings[k]);
353 const Marker &marker = j->second.getMarker(indx);
354 if (i == thread_id)
355 start_xpos = factor*marker.getStart();
356 core::rect<s32> pos((s32)(x_offset + start_xpos),
357 (s32)(y_offset + i*line_height),
358 (s32)(x_offset + start_xpos
359 + factor*marker.getDuration()),
360 (s32)(y_offset + (i + 1)*line_height) );
361 if (i != 0)
362 start_xpos += factor*marker.getDuration();
363
364 // Reduce vertically the size of the markers according to their layer
365 pos.UpperLeftCorner.Y += 2 * (int)marker.getLayer();
366 pos.LowerRightCorner.Y -= 2 * (int)marker.getLayer();
367
368 GL32_draw2DRectangle(j->second.getColour(), pos);
369 // If the mouse cursor is over the marker, get its information
370 if (pos.isPointInside(mouse_pos))
371 {
372 hovered_markers.push(j);
373 }
374
375 } // for j in AllEventdata
376 } // for i in threads
377
378
379 // GPU profiler
380 QueryPerf hovered_gpu_marker = Q_LAST;
381 long hovered_gpu_marker_elapsed = 0;
382 int gpu_y = int(y_offset + m_threads_used*line_height + line_height/2);
383 float total = 0;
384 for (unsigned i = 0; i < Q_LAST; i++)
385 {
386 #ifndef SERVER_ONLY
387 int n = irr_driver->getGPUTimer(i).elapsedTimeus();
388 m_gpu_times[indx*Q_LAST + i] = n;
389 total += n;
390 #endif
391 }
392
393 static video::SColor colors[] = {
394 video::SColor(255, 255, 0, 0),
395 video::SColor(255, 0, 255, 0),
396 video::SColor(255, 0, 0, 255),
397 video::SColor(255, 255, 255, 0),
398 video::SColor(255, 255, 0, 255),
399 video::SColor(255, 0, 255, 255)
400 };
401
402 if (hovered_markers.empty())
403 {
404 float curr_val = 0;
405 for (unsigned i = 0; i < Q_LAST; i++)
406 {
407 //Log::info("GPU Perf", "Phase %d : %d us\n", i,
408 // irr_driver->getGPUTimer(i).elapsedTimeus());
409
410 float elapsed = float(m_gpu_times[indx*Q_LAST+i]);
411 core::rect<s32> pos((s32)(x_offset + (curr_val / total)*profiler_width),
412 (s32)(y_offset + gpu_y),
413 (s32)(x_offset + ((curr_val + elapsed) / total)*profiler_width),
414 (s32)(y_offset + gpu_y + line_height));
415
416 curr_val += elapsed;
417 GL32_draw2DRectangle(colors[i % 6], pos);
418
419 if (pos.isPointInside(mouse_pos))
420 {
421 hovered_gpu_marker = (QueryPerf)i;
422 hovered_gpu_marker_elapsed = m_gpu_times[indx*Q_LAST+i];
423 }
424
425 }
426
427 }
428
429 // Draw the end of the frame
430 {
431 s32 x_sync = (s32)(x_offset + factor*m_time_between_sync);
432 s32 y_up_sync = (s32)(MARGIN_Y*screen_size.Height);
433 s32 y_down_sync = (s32)( (MARGIN_Y + (2+m_threads_used)*LINE_HEIGHT)
434 * screen_size.Height );
435
436 GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00),
437 core::rect<s32>(x_sync, y_up_sync,
438 x_sync + 1, y_down_sync));
439 }
440
441 // Draw the hovered markers' names
442 gui::ScalableFont* font = GUIEngine::getFont();
443 if (font)
444 {
445 core::stringw text;
446 while(!hovered_markers.empty())
447 {
448 AllEventData::iterator j = hovered_markers.top();
449 const Marker &marker = j->second.getMarker(indx);
450 std::ostringstream oss;
451 oss.precision(4);
452 oss << j->first << " [" << (marker.getDuration()) << " ms / ";
453 oss.precision(3);
454 oss << marker.getDuration()*100.0 / duration << "%]" << std::endl;
455 text += oss.str().c_str();
456 hovered_markers.pop();
457 }
458 font->drawQuick(text, MARKERS_NAMES_POS, video::SColor(0xFF, 0xFF, 0x00, 0x00));
459
460 if (hovered_gpu_marker != Q_LAST)
461 {
462 std::ostringstream oss;
463 oss << irr_driver->getGPUQueryPhaseName(hovered_gpu_marker) << " : "
464 << hovered_gpu_marker_elapsed << " us";
465 font->drawQuick(oss.str().c_str(), GPU_MARKERS_NAMES_POS,
466 video::SColor(0xFF, 0xFF, 0x00, 0x00));
467 }
468 }
469
470 PROFILER_POP_CPU_MARKER();
471 #endif
472 } // draw
473
474 //-----------------------------------------------------------------------------
475 /// Handle freeze/unfreeze
onClick(const core::vector2di & mouse_pos)476 void Profiler::onClick(const core::vector2di& mouse_pos)
477 {
478 video::IVideoDriver* driver = irr_driver->getVideoDriver();
479 const core::dimension2d<u32>& screen_size = driver->getScreenSize();
480
481 core::rect<s32>background_rect(
482 (int)(MARGIN_X * screen_size.Width),
483 (int)(MARGIN_Y * screen_size.Height),
484 (int)((1.0-MARGIN_X) * screen_size.Width),
485 (int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height) );
486
487 if(!background_rect.isPointInside(mouse_pos))
488 return;
489
490 switch(m_freeze_state)
491 {
492 case UNFROZEN:
493 m_freeze_state = WAITING_FOR_FREEZE;
494 break;
495
496 case FROZEN:
497 m_freeze_state = WAITING_FOR_UNFREEZE;
498 break;
499
500 case WAITING_FOR_FREEZE:
501 case WAITING_FOR_UNFREEZE:
502 // the user should not be that quick, and we prefer avoiding to introduce
503 // bugs by unfrozing it while it has not frozen yet.
504 // Same the other way around.
505 break;
506 }
507 } // onClick
508
509 //-----------------------------------------------------------------------------
510 /// Helper to draw a white background
drawBackground()511 void Profiler::drawBackground()
512 {
513 #ifndef SERVER_ONLY
514 video::IVideoDriver* driver = irr_driver->getVideoDriver();
515 const core::dimension2d<u32>& screen_size = driver->getScreenSize();
516
517 core::rect<s32>background_rect(
518 (int)(MARGIN_X * screen_size.Width),
519 (int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height),
520 (int)((1.0-MARGIN_X) * screen_size.Width),
521 (int)((MARGIN_Y + 0.25f) * screen_size.Height));
522
523 video::SColor color(0x88, 0xFF, 0xFF, 0xFF);
524 GL32_draw2DRectangle(color, background_rect);
525 #endif
526 } // drawBackground
527
528 //-----------------------------------------------------------------------------
529 /** Saves the collected profile data to a file. Filename is based on the
530 * stdout name (with -profile appended).
531 */
writeToFile()532 void Profiler::writeToFile()
533 {
534 m_lock.lock();
535 std::string base_name =
536 file_manager->getUserConfigFile(file_manager->getStdoutName());
537 // First CPU data
538 for (int thread_id = 0; thread_id < m_threads_used; thread_id++)
539 {
540 std::ofstream f(FileUtils::getPortableWritingPath(
541 base_name + ".profile-cpu-" + StringUtils::toString(thread_id)));
542 ThreadData &td = m_all_threads_data[thread_id];
543 f << "# ";
544 for (unsigned int i = 0; i < td.m_ordered_headings.size(); i++)
545 f << "\"" << td.m_ordered_headings[i] << "(" << i+1 <<")\" ";
546 f << std::endl;
547 int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
548 if (start > m_max_frames) start -= m_max_frames;
549 while (start != m_current_frame)
550 {
551 for (unsigned int i = 0; i < td.m_ordered_headings.size(); i++)
552 {
553 const EventData &ed = td.m_all_event_data[td.m_ordered_headings[i]];
554 f << int(ed.getMarker(start).getDuration()*1000) << " ";
555 } // for i i new_headings
556 f << std::endl;
557 start = (start + 1) % m_max_frames;
558 } // while start != m_current_frame
559 f.close();
560 } // for all thread_ids
561
562 std::ofstream f_gpu(FileUtils::getPortableWritingPath(base_name + ".profile-gpu"));
563 f_gpu << "# ";
564
565 for (unsigned i = 0; i < Q_LAST; i++)
566 {
567 f_gpu << "\"" << irr_driver->getGPUQueryPhaseName(i) << "(" << i+1 << ")\" ";
568 } // for i < Q_LAST
569 f_gpu << std::endl;
570
571 int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
572 if (start > m_max_frames) start -= m_max_frames;
573 while (start != m_current_frame)
574 {
575 for (unsigned i = 0; i < Q_LAST; i++)
576 {
577 f_gpu << m_gpu_times[start*Q_LAST + i] << " ";
578 } // for i < Q_LAST
579 f_gpu << std::endl;
580 start = (start + 1) % m_max_frames;
581 }
582 f_gpu.close();
583 m_lock.unlock();
584
585 } // writeFile
586