1 //  SuperTuxKart - a fun racing game with go-kart
2 //  Copyright (C) 2004-2015 SuperTuxKart-Team
3 //
4 //  This program is free software; you can redistribute it and/or
5 //  modify it under the terms of the GNU General Public License
6 //  as published by the Free Software Foundation; either version 3
7 //  of the License, or (at your option) any later version.
8 //
9 //  This program is distributed in the hope that it will be useful,
10 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 //  GNU General Public License for more details.
13 //
14 //  You should have received a copy of the GNU General Public License
15 //  along with this program; if not, write to the Free Software
16 //  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
17 
18 #include "profiler.hpp"
19 
20 #include "config/user_config.hpp"
21 #include "graphics/glwrap.hpp"
22 #include "graphics/irr_driver.hpp"
23 #include "graphics/2dutils.hpp"
24 #include "guiengine/event_handler.hpp"
25 #include "guiengine/engine.hpp"
26 #include "graphics/irr_driver.hpp"
27 #include "guiengine/scalable_font.hpp"
28 #include "io/file_manager.hpp"
29 #include "utils/file_utils.hpp"
30 #include "utils/string_utils.hpp"
31 #include "utils/tls.hpp"
32 #include "utils/vs.hpp"
33 
34 #include <algorithm>
35 #include <fstream>
36 #include <ostream>
37 #include <stack>
38 #include <sstream>
39 
40 Profiler profiler;
41 
42 // Unit is in pencentage of the screen dimensions
43 #define MARGIN_X    0.02f    // left and right margin
44 #define MARGIN_Y    0.02f    // top margin
45 #define LINE_HEIGHT 0.030f   // height of a line representing a thread
46 
47 #define MARKERS_NAMES_POS     core::rect<s32>(50,100,150,600)
48 #define GPU_MARKERS_NAMES_POS core::rect<s32>(50,165,150,300)
49 
50 // The width of the profiler corresponds to TIME_DRAWN_MS milliseconds
51 #define TIME_DRAWN_MS 30.0f
52 
53 // --- Begin portable precise timer ---
54 #ifdef WIN32
55     #define WIN32_LEAN_AND_MEAN
56     #include <windows.h>
57 
getTimeMilliseconds()58     double getTimeMilliseconds()
59     {
60         LARGE_INTEGER freq;
61         QueryPerformanceFrequency(&freq);
62         double perFreq = double(freq.QuadPart) / 1000.0;
63 
64         LARGE_INTEGER timer;
65         QueryPerformanceCounter(&timer);
66         return double(timer.QuadPart) / perFreq;
67     }   // getTimeMilliseconds
68 
69 #else
70     #include <sys/time.h>
getTimeMilliseconds()71     double getTimeMilliseconds()
72     {
73         struct timeval tv;
74         gettimeofday(&tv, NULL);
75         return double(tv.tv_sec * 1000) + (double(tv.tv_usec) / 1000.0);
76     }   // getTimeMilliseconds
77 #endif
78 // --- End portable precise timer ---
79 
80 //-----------------------------------------------------------------------------
Profiler()81 Profiler::Profiler()
82 {
83     m_time_last_sync      = getTimeMilliseconds();
84     m_time_between_sync   = 0.0;
85     m_freeze_state        = UNFROZEN;
86 
87     // When initializing profile class during static initialization
88     // UserConfigParams::m_max_fps may not be properly initialized with default
89     // value, so we use hard-coded default value
90     m_max_frames          = 20 * 120;
91     m_current_frame       = 0;
92     m_has_wrapped_around  = false;
93     m_threads_used = 1;
94 }   // Profile
95 
96 //-----------------------------------------------------------------------------
~Profiler()97 Profiler::~Profiler()
98 {
99 }   // ~Profiler
100 
101 thread_local int g_thread_id = -1;
102 const int MAX_THREADS = 10;
103 //-----------------------------------------------------------------------------
104 /** It is split from the constructor so that it can be avoided allocating
105  *  unnecessary memory when the profiler is never used (for example in no
106  *  graphics). */
init()107 void Profiler::init()
108 {
109     m_all_threads_data.resize(MAX_THREADS);
110 
111     // Add this thread to the thread mapping
112     g_thread_id = 0;
113     m_gpu_times.resize(Q_LAST * m_max_frames);
114 }   // init
115 
116 //-----------------------------------------------------------------------------
117 /** Returns a unique index for a thread. If the calling thread is not yet in
118  *  the mapping, it will assign a new unique id to this thread. */
getThreadID()119 int Profiler::getThreadID()
120 {
121     if (g_thread_id == -1)
122     {
123         if (m_threads_used >= MAX_THREADS)
124         {
125             g_thread_id = MAX_THREADS - 1;
126             return g_thread_id;
127         }
128         g_thread_id = m_threads_used.fetch_add(1);
129     }
130     return g_thread_id;
131 }   // getThreadID
132 
133 //-----------------------------------------------------------------------------
134 /// Push a new marker that starts now
pushCPUMarker(const char * name,const video::SColor & colour)135 void Profiler::pushCPUMarker(const char* name, const video::SColor& colour)
136 {
137     // Don't do anything when disabled or frozen
138     if (!UserConfigParams::m_profiler_enabled ||
139          m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE )
140         return;
141 
142     // We need to look before getting the thread id (since this might
143     // be a new thread which changes the structure).
144     m_lock.lock();
145     int thread_id = getThreadID();
146 
147     ThreadData &td = m_all_threads_data[thread_id];
148     AllEventData::iterator i = td.m_all_event_data.find(name);
149     double  start = getTimeMilliseconds() - m_time_last_sync;
150     if (i != td.m_all_event_data.end())
151     {
152         i->second.setStart(m_current_frame, start, (int)td.m_event_stack.size());
153     }
154     else
155     {
156         EventData ed(colour, m_max_frames);
157         ed.setStart(m_current_frame, start, (int)td.m_event_stack.size());
158         td.m_all_event_data[name] = ed;
159         // Ordered headings is used to determine the order in which the
160         // bar graph is drawn. Outer profiling events will be added first,
161         // so they will be drawn first, which gives the proper nested
162         // displayed of events.
163         td.m_ordered_headings.push_back(name);
164     }
165     td.m_event_stack.push_back(name);
166     m_lock.unlock();
167 }   // pushCPUMarker
168 
169 //-----------------------------------------------------------------------------
170 /// Stop the last pushed marker
popCPUMarker()171 void Profiler::popCPUMarker()
172 {
173     // Don't do anything when disabled or frozen
174     if( !UserConfigParams::m_profiler_enabled ||
175         m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE )
176         return;
177     double now = getTimeMilliseconds();
178 
179     m_lock.lock();
180     int thread_id = getThreadID();
181     ThreadData &td = m_all_threads_data[thread_id];
182 
183     // When the profiler gets enabled (which happens in the middle of the
184     // main loop), there can be some pops without matching pushes (for one
185     // frame) - ignore those events.
186     if (td.m_event_stack.size() == 0)
187     {
188         m_lock.unlock();
189         return;
190     }
191 
192     assert(td.m_event_stack.size() > 0);
193 
194     const std::string &name = td.m_event_stack.back();
195     td.m_all_event_data[name].setEnd(m_current_frame, now - m_time_last_sync);
196 
197     td.m_event_stack.pop_back();
198     m_lock.unlock();
199 }   // popCPUMarker
200 
201 //-----------------------------------------------------------------------------
202 /** Switches the profiler either on or off.
203  */
toggleStatus()204 void Profiler::toggleStatus()
205 {
206     UserConfigParams::m_profiler_enabled = !UserConfigParams::m_profiler_enabled;
207     // If the profiler would immediately enabled, calls that have started but
208     // not finished would not be registered correctly. So set the state to
209     // waiting, so the unfreeze started at the next sync frame (which is
210     // outside of the main loop, i.e. all profiling events inside of the main
211     // loop will work as expected.
212     if (m_freeze_state == UNFROZEN)
213         m_freeze_state = WAITING_FOR_UNFREEZE;
214 }   // toggleStatus
215 
216 //-----------------------------------------------------------------------------
217 /** Saves all data for the current frame, and starts the next frame in the
218  *  circular buffer. Any events that are currently active (e.g. in a separate
219  *  thread) will be split in two parts: the beginning (till now) in the current
220  *  frame, the rest will be added to the next frame.
221  */
synchronizeFrame()222 void Profiler::synchronizeFrame()
223 {
224     // Don't do anything when frozen
225     if(!UserConfigParams::m_profiler_enabled || m_freeze_state == FROZEN)
226         return;
227 
228     // Avoid using several times getTimeMilliseconds(),
229     // which would yield different results
230     double now = getTimeMilliseconds();
231 
232     m_lock.lock();
233     // Set index to next frame
234     int next_frame = m_current_frame+1;
235     if (next_frame >= m_max_frames)
236     {
237         next_frame = 0;
238         m_has_wrapped_around = true;
239     }
240 
241     // First finish all markers that are currently in progress, and add
242     // a new start marker for the next frame. So e.g. if a thread is busy in
243     // one event while the main thread syncs the frame, this event will get
244     // split into two parts in two consecutive frames
245     for (int i = 0; i < m_threads_used; i++)
246     {
247         ThreadData &td = m_all_threads_data[i];
248         for(unsigned int j=0; j<td.m_event_stack.size(); j++)
249         {
250             EventData &ed = td.m_all_event_data[td.m_event_stack[j]];
251             ed.setEnd(m_current_frame, now-m_time_last_sync);
252             ed.setStart(next_frame, 0, j);
253         }   // for j in event stack
254     }   // for i in threads
255 
256     if (m_has_wrapped_around)
257     {
258         // The new entries for the circular buffer need to be cleared
259         // to make sure the new values are not accumulated on top of
260         // the data from a previous frame.
261         for (int i = 0; i < m_threads_used; i++)
262         {
263             ThreadData &td = m_all_threads_data[i];
264             AllEventData &aed = td.m_all_event_data;
265             AllEventData::iterator k;
266             for (k = aed.begin(); k != aed.end(); ++k)
267                 k->second.getMarker(next_frame).clear();
268         }
269     }   // is has wrapped around
270 
271     m_current_frame = next_frame;
272 
273     // Remember the date of last synchronization
274     m_time_between_sync = now - m_time_last_sync;
275     m_time_last_sync    = now;
276 
277     // Freeze/unfreeze as needed
278     if(m_freeze_state == WAITING_FOR_FREEZE)
279         m_freeze_state = FROZEN;
280     else if(m_freeze_state == WAITING_FOR_UNFREEZE)
281         m_freeze_state = UNFROZEN;
282 
283     m_lock.unlock();
284 }   // synchronizeFrame
285 
286 //-----------------------------------------------------------------------------
287 /// Draw the markers
draw()288 void Profiler::draw()
289 {
290 #ifndef SERVER_ONLY
291     PROFILER_PUSH_CPU_MARKER("ProfilerDraw", 0xFF, 0xFF, 0x00);
292     video::IVideoDriver*    driver = irr_driver->getVideoDriver();
293 
294     // Current frame points to the frame in which currently data is
295     // being accumulated. Draw the previous (i.e. complete) frame.
296     m_lock.lock();
297     int indx = m_current_frame - 1;
298     if (indx < 0) indx = m_max_frames - 1;
299     m_lock.unlock();
300 
301     drawBackground();
302 
303     // Force to show the pointer
304     irr_driver->showPointer();
305 
306     // Compute some values for drawing (unit: pixels, but we keep floats
307     // for reducing errors accumulation)
308     core::dimension2d<u32> screen_size = driver->getScreenSize();
309     const double profiler_width = (1.0 - 2.0*MARGIN_X) * screen_size.Width;
310     const double x_offset       = MARGIN_X*screen_size.Width;
311     const double y_offset       = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height;
312     const double line_height    = LINE_HEIGHT*screen_size.Height;
313 
314 
315     // Compute start end end time for this frame
316     double start = 99999.0f;
317     double end   = -1.0f;
318 
319     // Use this thread to compute start and end time. All other
320     // threads might have 'unfinished' events, or multiple identical events
321     // in this frame (i.e. start time would be incorrect).
322     int thread_id = getThreadID();
323     AllEventData &aed = m_all_threads_data[thread_id].m_all_event_data;
324     AllEventData::iterator j;
325     for (j = aed.begin(); j != aed.end(); ++j)
326     {
327         const Marker &marker = j->second.getMarker(indx);
328         start = std::min(start, marker.getStart());
329         end = std::max(end, marker.getEnd());
330     }   // for j in events
331 
332 
333     const double duration = end - start;
334     const double factor = profiler_width / duration;
335 
336     // Get the mouse pos
337     core::vector2di mouse_pos = GUIEngine::EventHandler::get()->getMousePos();
338 
339     std::stack<AllEventData::iterator> hovered_markers;
340     for (int i = 0; i < m_threads_used; i++)
341     {
342         ThreadData &td = m_all_threads_data[i];
343         AllEventData &aed = td.m_all_event_data;
344 
345         // Thread 1 has 'proper' start and end events (assuming that each
346         // event is at most called once). But all other threads might have
347         // multiple start and end events, so the recorder start time is only
348         // of the last event and so can not be used to draw the bar graph
349         double start_xpos = 0;
350         for(int k=0; k<(int)td.m_ordered_headings.size(); k++)
351         {
352             AllEventData::iterator j = aed.find(td.m_ordered_headings[k]);
353             const Marker &marker = j->second.getMarker(indx);
354             if (i == thread_id)
355                 start_xpos = factor*marker.getStart();
356             core::rect<s32> pos((s32)(x_offset + start_xpos),
357                                 (s32)(y_offset + i*line_height),
358                                 (s32)(x_offset + start_xpos
359                                                + factor*marker.getDuration()),
360                                 (s32)(y_offset + (i + 1)*line_height)        );
361             if (i != 0)
362                 start_xpos += factor*marker.getDuration();
363 
364             // Reduce vertically the size of the markers according to their layer
365             pos.UpperLeftCorner.Y  += 2 * (int)marker.getLayer();
366             pos.LowerRightCorner.Y -= 2 * (int)marker.getLayer();
367 
368             GL32_draw2DRectangle(j->second.getColour(), pos);
369             // If the mouse cursor is over the marker, get its information
370             if (pos.isPointInside(mouse_pos))
371             {
372                 hovered_markers.push(j);
373             }
374 
375         }   // for j in AllEventdata
376     }   // for i in threads
377 
378 
379     // GPU profiler
380     QueryPerf hovered_gpu_marker = Q_LAST;
381     long hovered_gpu_marker_elapsed = 0;
382     int gpu_y = int(y_offset + m_threads_used*line_height + line_height/2);
383     float total = 0;
384     for (unsigned i = 0; i < Q_LAST; i++)
385     {
386 #ifndef SERVER_ONLY
387         int n = irr_driver->getGPUTimer(i).elapsedTimeus();
388         m_gpu_times[indx*Q_LAST + i] = n;
389         total += n;
390 #endif
391     }
392 
393     static video::SColor colors[] = {
394         video::SColor(255, 255, 0, 0),
395         video::SColor(255, 0, 255, 0),
396         video::SColor(255, 0, 0, 255),
397         video::SColor(255, 255, 255, 0),
398         video::SColor(255, 255, 0, 255),
399         video::SColor(255, 0, 255, 255)
400     };
401 
402     if (hovered_markers.empty())
403     {
404         float curr_val = 0;
405         for (unsigned i = 0; i < Q_LAST; i++)
406         {
407             //Log::info("GPU Perf", "Phase %d : %d us\n", i,
408             //           irr_driver->getGPUTimer(i).elapsedTimeus());
409 
410             float elapsed = float(m_gpu_times[indx*Q_LAST+i]);
411             core::rect<s32> pos((s32)(x_offset + (curr_val / total)*profiler_width),
412                 (s32)(y_offset + gpu_y),
413                 (s32)(x_offset + ((curr_val + elapsed) / total)*profiler_width),
414                 (s32)(y_offset + gpu_y + line_height));
415 
416             curr_val += elapsed;
417             GL32_draw2DRectangle(colors[i % 6], pos);
418 
419             if (pos.isPointInside(mouse_pos))
420             {
421                 hovered_gpu_marker = (QueryPerf)i;
422                 hovered_gpu_marker_elapsed = m_gpu_times[indx*Q_LAST+i];
423             }
424 
425         }
426 
427     }
428 
429     // Draw the end of the frame
430     {
431         s32 x_sync = (s32)(x_offset + factor*m_time_between_sync);
432         s32 y_up_sync = (s32)(MARGIN_Y*screen_size.Height);
433         s32 y_down_sync = (s32)( (MARGIN_Y + (2+m_threads_used)*LINE_HEIGHT)
434                                 * screen_size.Height                         );
435 
436         GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00),
437                              core::rect<s32>(x_sync, y_up_sync,
438                                              x_sync + 1, y_down_sync));
439     }
440 
441     // Draw the hovered markers' names
442     gui::ScalableFont* font = GUIEngine::getFont();
443     if (font)
444     {
445         core::stringw text;
446         while(!hovered_markers.empty())
447         {
448             AllEventData::iterator j = hovered_markers.top();
449             const Marker &marker = j->second.getMarker(indx);
450             std::ostringstream oss;
451             oss.precision(4);
452             oss << j->first << " [" << (marker.getDuration()) << " ms / ";
453             oss.precision(3);
454             oss << marker.getDuration()*100.0 / duration << "%]" << std::endl;
455             text += oss.str().c_str();
456             hovered_markers.pop();
457         }
458         font->drawQuick(text, MARKERS_NAMES_POS, video::SColor(0xFF, 0xFF, 0x00, 0x00));
459 
460         if (hovered_gpu_marker != Q_LAST)
461         {
462             std::ostringstream oss;
463             oss << irr_driver->getGPUQueryPhaseName(hovered_gpu_marker) << " : "
464                 << hovered_gpu_marker_elapsed << " us";
465             font->drawQuick(oss.str().c_str(), GPU_MARKERS_NAMES_POS,
466                        video::SColor(0xFF, 0xFF, 0x00, 0x00));
467         }
468     }
469 
470     PROFILER_POP_CPU_MARKER();
471 #endif
472 }   // draw
473 
474 //-----------------------------------------------------------------------------
475 /// Handle freeze/unfreeze
onClick(const core::vector2di & mouse_pos)476 void Profiler::onClick(const core::vector2di& mouse_pos)
477 {
478     video::IVideoDriver*            driver = irr_driver->getVideoDriver();
479     const core::dimension2d<u32>&   screen_size = driver->getScreenSize();
480 
481     core::rect<s32>background_rect(
482         (int)(MARGIN_X                      * screen_size.Width),
483         (int)(MARGIN_Y                      * screen_size.Height),
484         (int)((1.0-MARGIN_X)                * screen_size.Width),
485         (int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height)   );
486 
487     if(!background_rect.isPointInside(mouse_pos))
488         return;
489 
490     switch(m_freeze_state)
491     {
492     case UNFROZEN:
493         m_freeze_state = WAITING_FOR_FREEZE;
494         break;
495 
496     case FROZEN:
497         m_freeze_state = WAITING_FOR_UNFREEZE;
498         break;
499 
500     case WAITING_FOR_FREEZE:
501     case WAITING_FOR_UNFREEZE:
502         // the user should not be that quick, and we prefer avoiding to introduce
503         // bugs by unfrozing it while it has not frozen yet.
504         // Same the other way around.
505         break;
506     }
507 }   // onClick
508 
509 //-----------------------------------------------------------------------------
510 /// Helper to draw a white background
drawBackground()511 void Profiler::drawBackground()
512 {
513 #ifndef SERVER_ONLY
514     video::IVideoDriver*            driver = irr_driver->getVideoDriver();
515     const core::dimension2d<u32>&   screen_size = driver->getScreenSize();
516 
517     core::rect<s32>background_rect(
518         (int)(MARGIN_X                      * screen_size.Width),
519         (int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height),
520         (int)((1.0-MARGIN_X)                * screen_size.Width),
521         (int)((MARGIN_Y + 0.25f)             * screen_size.Height));
522 
523     video::SColor   color(0x88, 0xFF, 0xFF, 0xFF);
524     GL32_draw2DRectangle(color, background_rect);
525 #endif
526 }   // drawBackground
527 
528 //-----------------------------------------------------------------------------
529 /** Saves the collected profile data to a file. Filename is based on the
530  *  stdout name (with -profile appended).
531  */
writeToFile()532 void Profiler::writeToFile()
533 {
534     m_lock.lock();
535     std::string base_name =
536                file_manager->getUserConfigFile(file_manager->getStdoutName());
537     // First CPU data
538     for (int thread_id = 0; thread_id < m_threads_used; thread_id++)
539     {
540         std::ofstream f(FileUtils::getPortableWritingPath(
541             base_name + ".profile-cpu-" + StringUtils::toString(thread_id)));
542         ThreadData &td = m_all_threads_data[thread_id];
543         f << "#  ";
544         for (unsigned int i = 0; i < td.m_ordered_headings.size(); i++)
545             f << "\"" << td.m_ordered_headings[i] << "(" << i+1 <<")\"   ";
546         f << std::endl;
547         int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
548         if (start > m_max_frames) start -= m_max_frames;
549         while (start != m_current_frame)
550         {
551             for (unsigned int i = 0; i < td.m_ordered_headings.size(); i++)
552             {
553                 const EventData &ed = td.m_all_event_data[td.m_ordered_headings[i]];
554                 f << int(ed.getMarker(start).getDuration()*1000) << " ";
555             }   // for i i new_headings
556             f << std::endl;
557             start = (start + 1) % m_max_frames;
558         }   // while start != m_current_frame
559         f.close();
560     }   // for all thread_ids
561 
562     std::ofstream f_gpu(FileUtils::getPortableWritingPath(base_name + ".profile-gpu"));
563     f_gpu << "# ";
564 
565     for (unsigned i = 0; i < Q_LAST; i++)
566     {
567         f_gpu << "\"" << irr_driver->getGPUQueryPhaseName(i) << "(" << i+1 << ")\"   ";
568     }   // for i < Q_LAST
569     f_gpu << std::endl;
570 
571     int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
572     if (start > m_max_frames) start -= m_max_frames;
573     while (start != m_current_frame)
574     {
575         for (unsigned i = 0; i < Q_LAST; i++)
576         {
577             f_gpu << m_gpu_times[start*Q_LAST + i] << "   ";
578         }   // for i < Q_LAST
579         f_gpu << std::endl;
580         start = (start + 1) % m_max_frames;
581     }
582     f_gpu.close();
583     m_lock.unlock();
584 
585 }   // writeFile
586