1 /*
2  Copyright (c) 2013 yvt
3 
4  This file is part of OpenSpades.
5 
6  OpenSpades is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OpenSpades is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OpenSpades.  If not, see <http://www.gnu.org/licenses/>.
18 
19  */
20 
21 #include <algorithm>
22 #include <chrono>
23 #include <cstdarg>
24 #include <cstdio>
25 #include <cstdlib>
26 #include <ctime>
27 #include <functional>
28 #include <list>
29 #include <unordered_map>
30 #include <cstring>
31 
32 #include "GLProfiler.h"
33 
34 #include "GLRenderer.h"
35 #include "GLSettings.h"
36 #include "IGLDevice.h"
37 #include <Core/Debug.h>
38 #include <Core/Settings.h>
39 #include <Core/TMPUtils.h>
40 
41 namespace chrono = std::chrono;
42 
43 namespace spades {
44 	namespace draw {
45 
46 		namespace {
47 			chrono::high_resolution_clock::time_point startTimePoint;
48 
ResetTimes()49 			void ResetTimes() { startTimePoint = chrono::high_resolution_clock::now(); }
50 
GetWallClockTime()51 			double GetWallClockTime() {
52 				return chrono::duration_cast<chrono::microseconds>(
53 				         chrono::high_resolution_clock::now() - startTimePoint)
54 				         .count() /
55 				       1000000.0;
56 			}
57 		}
58 
59 		struct GLProfiler::Measurement {
60 			double totalWallClockTime = 0.0;
61 			double totalGPUTime = 0.0;
62 			int totalNumFrames = 0;
63 		};
64 
65 		struct GLProfiler::Phase {
66 			const std::string name;
67 			std::string description;
68 
69 			// can't use vector here; a reference to a vector's element can be invalidated
70 			std::list<Phase> subphases;
71 
72 			std::list<Phase>::iterator nextSubphaseIterator;
73 
74 			double startWallClockTime;
75 			double endWallClockTime;
76 			stmp::optional<std::pair<std::size_t, std::size_t>> queryObjectIndices;
77 
78 			Measurement measurementLatest;
79 			bool measured = false;
80 
81 			stmp::optional<Measurement> measurementSaved;
82 
Phasespades::draw::GLProfiler::Phase83 			Phase(const std::string &name) : name{name}, nextSubphaseIterator{subphases.begin()} {}
84 		};
85 
GLProfiler(GLRenderer & renderer)86 		GLProfiler::GLProfiler(GLRenderer &renderer)
87 		    : m_settings{renderer.GetSettings()},
88 		      m_renderer{renderer},
89 		      m_device{*renderer.GetGLDevice()},
90 		      m_active{false},
91 		      m_lastSaveTime{0.0},
92 		      m_shouldSaveThisFrame{false},
93 		      m_waitingTimerQueryResult{false} {
94 			SPADES_MARK_FUNCTION();
95 
96 			m_font = m_renderer.RegisterImage("Gfx/Fonts/Debug.png");
97 			m_white = m_renderer.RegisterImage("Gfx/White.tga");
98 		}
99 
~GLProfiler()100 		GLProfiler::~GLProfiler() {
101 			SPADES_MARK_FUNCTION();
102 			for (IGLDevice::UInteger timerQueryObject : m_timerQueryObjects) {
103 				m_device.DeleteQuery(timerQueryObject);
104 			}
105 		}
106 
BeginFrame()107 		void GLProfiler::BeginFrame() {
108 			SPADES_MARK_FUNCTION();
109 
110 			if (!m_settings.r_debugTiming) {
111 				// Clear history
112 				m_root.reset();
113 				m_waitingTimerQueryResult = false;
114 				return;
115 			}
116 
117 			if (m_waitingTimerQueryResult) {
118 				FinalizeMeasurement();
119 
120 				// Still waiting?
121 				if (m_waitingTimerQueryResult) {
122 					return;
123 				}
124 			}
125 
126 			SPAssert(m_stack.empty());
127 			SPAssert(!m_active);
128 
129 			m_active = true;
130 
131 			if (m_settings.r_debugTimingAverage) {
132 				m_shouldSaveThisFrame = m_stopwatch.GetTime() > m_lastSaveTime + 1.0;
133 			} else {
134 				m_shouldSaveThisFrame = true;
135 			}
136 
137 			ResetTimes();
138 
139 			if (m_settings.r_debugTimingGPUTime) {
140 				m_currentTimerQueryObjectIndex = 0;
141 				if (m_timerQueryObjects.empty()) {
142 					m_timerQueryObjects.push_back(m_device.GenQuery());
143 				}
144 				m_device.BeginQuery(IGLDevice::TimeElapsed, m_timerQueryObjects[0]);
145 			}
146 
147 			if (m_root) {
148 				struct Traverser {
149 					GLProfiler &self;
150 					Traverser(GLProfiler &self) : self{self} {}
151 					void Traverse(Phase &phase) {
152 						phase.measured = false;
153 						phase.queryObjectIndices.reset();
154 						phase.nextSubphaseIterator = phase.subphases.begin();
155 						for (Phase &subphase : phase.subphases) {
156 							Traverse(subphase);
157 						}
158 					}
159 				};
160 				Traverser{*this}.Traverse(*m_root);
161 			}
162 
163 			if (!m_root) {
164 				m_root.reset(new Phase{"Frame"});
165 				m_root->description = "Frame";
166 			}
167 
168 			BeginPhaseInner(*m_root);
169 
170 			m_stack.emplace_back(*m_root);
171 		}
172 
EndFrame()173 		void GLProfiler::EndFrame() {
174 			SPADES_MARK_FUNCTION();
175 
176 			if (!m_active) {
177 				return;
178 			}
179 
180 			SPAssert(m_stack.size() == 1);
181 			SPAssert(&GetCurrentPhase() == m_root.get());
182 
183 			EndPhaseInner();
184 			SPAssert(m_stack.empty());
185 
186 			m_active = false;
187 
188 			FinalizeMeasurement();
189 		}
190 
FinalizeMeasurement()191 		void GLProfiler::FinalizeMeasurement() {
192 			SPADES_MARK_FUNCTION();
193 			Phase &root = *m_root;
194 
195 			// Fill gap
196 			if (m_settings.r_debugTimingFillGap) {
197 				struct Traverser {
198 					GLProfiler &self;
199 					Traverser(GLProfiler &self) : self{self} {}
200 					void Traverse(Phase &phase, stmp::optional<std::pair<Phase&, bool>> base) {
201 						if (!phase.queryObjectIndices) {
202 							return;
203 						}
204 						if (base) {
205 							auto baseIndices = *(*base).first.queryObjectIndices;
206 							(*phase.queryObjectIndices).second = (*base).second ? baseIndices.second : baseIndices.first;
207 						}
208 						auto it = phase.subphases.begin();
209 						while (it != phase.subphases.end() && !it->queryObjectIndices) {
210 							++it;
211 						}
212 						while (it != phase.subphases.end()) {
213 							auto it2 = it; ++it2;
214 							while (it2 != phase.subphases.end() && !it2->queryObjectIndices) {
215 								++it2;
216 							}
217 							if (it2 == phase.subphases.end()) {
218 								Traverse(*it, std::pair<Phase&, bool>{phase, true});
219 							} else {
220 								Traverse(*it, std::pair<Phase&, bool>{*it2, false});
221 							}
222 							it = it2;
223 						}
224 					}
225 				};
226 				Traverser{*this}.Traverse(root, {});
227 				// TODO: fill gap for wall clock times
228 			}
229 
230 			// Collect GPU time information
231 			if (m_settings.r_debugTimingGPUTime) {
232 				if (!m_waitingTimerQueryResult) {
233 					m_device.EndQuery(IGLDevice::TimeElapsed);
234 				}
235 
236 				m_waitingTimerQueryResult = false;
237 
238 				// are results available?
239 				for (std::size_t i = 0; i <= m_currentTimerQueryObjectIndex; ++i) {
240 					if (!m_device.GetQueryObjectUInteger(m_timerQueryObjects[i],
241 					                                     IGLDevice::QueryResultAvailable)) {
242 						m_waitingTimerQueryResult = true;
243 						return;
244 					}
245 				}
246 
247 				double t = 0;
248 				m_timerQueryTimes.resize(m_currentTimerQueryObjectIndex + 2);
249 				m_timerQueryTimes[0] = 0.0;
250 				for (std::size_t i = 0; i <= m_currentTimerQueryObjectIndex; ++i) {
251 					auto nanoseconds = m_device.GetQueryObjectUInteger64(m_timerQueryObjects.at(i),
252 					                                                     IGLDevice::QueryResult);
253 					t += static_cast<double>(nanoseconds) / 1.0e+9;
254 					m_timerQueryTimes[i + 1] = t;
255 				}
256 				struct Traverser {
257 					GLProfiler &self;
258 					Traverser(GLProfiler &self) : self{self} {}
259 					void Traverse(Phase &phase) {
260 						if (phase.queryObjectIndices) {
261 							auto indices = *phase.queryObjectIndices;
262 							double time1 = self.m_timerQueryTimes.at(indices.first);
263 							double time2 = self.m_timerQueryTimes.at(indices.second);
264 							phase.measurementLatest.totalGPUTime += time2 - time1;
265 						}
266 
267 						for (Phase &subphase : phase.subphases) {
268 							Traverse(subphase);
269 						}
270 					}
271 				};
272 				Traverser{*this}.Traverse(root);
273 			} else {
274 				m_waitingTimerQueryResult = false;
275 			}
276 
277 			if (m_shouldSaveThisFrame) {
278 				struct Traverser {
279 					GLProfiler &self;
280 					Traverser(GLProfiler &self) : self{self} {}
281 					void Traverse(Phase &phase) {
282 						phase.measurementSaved = phase.measurementLatest;
283 						phase.measurementLatest = Measurement{};
284 						for (Phase &subphase : phase.subphases) {
285 							Traverse(subphase);
286 						}
287 					}
288 				};
289 				Traverser{*this}.Traverse(root);
290 
291 				m_lastSaveTime = m_stopwatch.GetTime();
292 
293 				// Output the result to the system log
294 				if (m_settings.r_debugTimingOutputLog) {
295 					LogResult(root);
296 				}
297 			}
298 		}
299 
NewTimerQuery()300 		void GLProfiler::NewTimerQuery() {
301 			SPADES_MARK_FUNCTION_DEBUG();
302 
303 			m_device.EndQuery(IGLDevice::TimeElapsed);
304 			++m_currentTimerQueryObjectIndex;
305 
306 			if (m_currentTimerQueryObjectIndex >= m_timerQueryObjects.size()) {
307 				m_timerQueryObjects.push_back(m_device.GenQuery());
308 			}
309 			m_device.BeginQuery(IGLDevice::TimeElapsed,
310 			                    m_timerQueryObjects[m_currentTimerQueryObjectIndex]);
311 		}
312 
BeginPhase(const std::string & name,const std::string & description)313 		void GLProfiler::BeginPhase(const std::string &name, const std::string &description) {
314 			SPADES_MARK_FUNCTION_DEBUG();
315 
316 			SPAssert(m_active);
317 
318 			Phase &current = GetCurrentPhase();
319 
320 			auto it = current.nextSubphaseIterator;
321 
322 			for (; it != current.subphases.end(); ++it) {
323 				if (it->name == name) {
324 					break;
325 				}
326 			}
327 
328 			if (it == current.subphases.end()) {
329 				// Create a new subphase
330 				it = current.subphases.emplace(current.nextSubphaseIterator, name);
331 			}
332 
333 			current.nextSubphaseIterator = it;
334 			++current.nextSubphaseIterator;
335 
336 			Phase &next = *it;
337 			m_stack.emplace_back(next);
338 
339 			if (next.measured) {
340 				SPRaise("Cannot measure the timing of phase '%s' twice", name.c_str());
341 			}
342 
343 			next.measured = true;
344 			next.description = description;
345 
346 			BeginPhaseInner(next);
347 		}
348 
BeginPhaseInner(Phase & phase)349 		void GLProfiler::BeginPhaseInner(Phase &phase) {
350 			SPADES_MARK_FUNCTION_DEBUG();
351 
352 			if (m_settings.r_debugTimingFlush) {
353 				m_device.Flush();
354 			}
355 
356 			phase.startWallClockTime = GetWallClockTime();
357 
358 			if (m_settings.r_debugTimingGPUTime) {
359 				NewTimerQuery();
360 				phase.queryObjectIndices = std::pair<std::size_t, std::size_t>{};
361 				(*phase.queryObjectIndices).first = m_currentTimerQueryObjectIndex;
362 				(*phase.queryObjectIndices).second = static_cast<std::size_t>(-1);
363 			}
364 		}
365 
EndPhase()366 		void GLProfiler::EndPhase() {
367 			SPADES_MARK_FUNCTION_DEBUG();
368 
369 			SPAssert(m_active);
370 			SPAssert(m_stack.size() > 1);
371 
372 			EndPhaseInner();
373 		}
374 
EndPhaseInner()375 		void GLProfiler::EndPhaseInner() {
376 			SPADES_MARK_FUNCTION_DEBUG();
377 
378 			SPAssert(!m_stack.empty());
379 
380 			Phase &current = GetCurrentPhase();
381 
382 			if (m_settings.r_debugTimingFlush) {
383 				m_device.Flush();
384 			}
385 
386 			double wallClockTime = GetWallClockTime();
387 
388 			current.endWallClockTime = wallClockTime;
389 
390 			current.measurementLatest.totalWallClockTime +=
391 			  wallClockTime - current.startWallClockTime;
392 			current.measurementLatest.totalNumFrames += 1;
393 
394 			if (m_settings.r_debugTimingGPUTime) {
395 				SPAssert(current.queryObjectIndices);
396 				NewTimerQuery();
397 				(*current.queryObjectIndices).second = m_currentTimerQueryObjectIndex;
398 			}
399 
400 			m_stack.pop_back();
401 		}
402 
LogResult(Phase & root)403 		void GLProfiler::LogResult(Phase &root) {
404 			if (!root.measurementSaved) {
405 				// No results yet
406 				return;
407 			}
408 
409 			double factor = 1.0 / (*root.measurementSaved).totalNumFrames;
410 			SPLog("---- Start of GLProfiler Result ----");
411 			SPLog("(%d sampled frame(s). Showing the per-frame timing)",
412 			      (*root.measurementSaved).totalNumFrames);
413 
414 			if (m_settings.r_debugTimingGPUTime) {
415 				SPLog("(GPU time / wall clock time)");
416 			} else {
417 				SPLog("(wall clock time)");
418 			}
419 
420 			struct Traverser {
421 				GLProfiler &self;
422 				double factor;
423 				char buf[512];
424 
425 				Traverser(GLProfiler &self, double factor) : self{self}, factor{factor} {}
426 				void Traverse(Phase &phase, int level) {
427 					if (!phase.measurementSaved) {
428 						// No results yet
429 						return;
430 					}
431 					Measurement &result = *phase.measurementSaved;
432 
433 					int indent = level * 2;
434 					for (int i = 0; i < indent; i++)
435 						buf[i] = ' ';
436 					buf[511] = 0;
437 					if (self.m_settings.r_debugTimingGPUTime) {
438 						std::snprintf(buf + indent, 511 - indent, "%s - %.3fms / %.3fms",
439 						              phase.description.c_str(),
440 						              result.totalGPUTime * 1000. * factor,
441 						              result.totalWallClockTime * 1000. * factor);
442 					} else {
443 						std::snprintf(buf + indent, 511 - indent, "%s - %.3fms",
444 						              phase.description.c_str(),
445 						              result.totalWallClockTime * 1000. * factor);
446 					}
447 					SPLog("%s", buf);
448 
449 					for (Phase &subphase : phase.subphases) {
450 						Traverse(subphase, level + 1);
451 					}
452 				}
453 			};
454 			Traverser{*this, factor}.Traverse(root, 0);
455 
456 			SPLog("---- End of GLProfiler Result ----");
457 		}
458 
DrawResult()459 		void GLProfiler::DrawResult() {
460 			if (m_root) {
461 				DrawResult(*m_root);
462 			}
463 		}
464 
DrawResult(Phase & root)465 		void GLProfiler::DrawResult(Phase &root) {
466 			if (!root.measurementSaved) {
467 				// No results yet
468 				return;
469 			}
470 
471 			double factor = 1.0 / (*root.measurementSaved).totalNumFrames;
472 
473 			struct ResultRenderer {
474 				GLProfiler &self;
475 				GLRenderer &renderer;
476 				bool gpu;
477 				double factor;
478 				char buf[512];
479 				Vector2 cursor{0.0f, 0.0f};
480 				int column = 0;
481 
482 				ResultRenderer(GLProfiler &self, double factor)
483 				    : self{self},
484 				      renderer{self.m_renderer},
485 				      gpu{self.m_settings.r_debugTimingGPUTime},
486 				      factor{factor} {}
487 
488 				void DrawText(const char *str) {
489 					client::IImage *font = self.m_font;
490 
491 					while (*str) {
492 						char c = *str;
493 						if (c == '\n') {
494 							cursor.y += 10.0f;
495 							if (cursor.y + 10.0f > renderer.ScreenHeight()) {
496 								cursor.y = 0.0f;
497 								++column;
498 							}
499 							cursor.x = column * 500.0f;
500 						} else {
501 							int col = c & 15;
502 							int row = (c >> 4) - 2;
503 							renderer.DrawImage(font, cursor,
504 							                   AABB2{col * 6.0f, row * 10.0f, 6.0f, 10.0f});
505 							cursor.x += 6.0f;
506 						}
507 						++str;
508 					}
509 				}
510 
511 				void Traverse(Phase &phase, int level) {
512 					if (!phase.measurementSaved) {
513 						// No results yet
514 						return;
515 					}
516 					Measurement &result = *phase.measurementSaved;
517 					double time = (gpu ? result.totalGPUTime : result.totalWallClockTime) * factor;
518 
519 					// draw text
520 					if (result.totalNumFrames > 0) {
521 						renderer.SetColorAlphaPremultiplied(Vector4{1.0f, 1.0f, 1.0f, 1.0f});
522 					} else {
523 						renderer.SetColorAlphaPremultiplied(Vector4{0.5f, 0.5f, 0.5f, 1.0f});
524 					}
525 
526 					int indent = level * 2;
527 					int timeColumn = 30;
528 					for (int i = 0; i < indent; i++)
529 						buf[i] = ' ';
530 					std::fill(buf, buf + timeColumn, ' ');
531 					std::strcpy(buf + indent, phase.description.c_str());
532 					buf[std::strlen(buf)] = ' ';
533 					std::sprintf(buf + timeColumn, "%7.3fms", time * 1000.);
534 					DrawText(buf);
535 
536 					float subphaseTime = 0.0f;
537 					for (Phase &subphase : phase.subphases) {
538 						if (!subphase.measurementSaved) {
539 							continue;
540 						}
541 						Measurement &subresult = *subphase.measurementSaved;
542 						subphaseTime +=
543 						  (gpu ? subresult.totalGPUTime : subresult.totalWallClockTime) * factor;
544 					}
545 
546 					// draw bar
547 					float barScale = 4000.0 * self.m_settings.r_debugTimingOutputBarScale;
548 					float boxWidth = static_cast<float>(time * barScale);
549 					float childBoxWidth = static_cast<float>(subphaseTime * barScale);
550 					client::IImage *white = self.m_white;
551 
552 					renderer.SetColorAlphaPremultiplied(Vector4{0.0f, 0.0f, 0.0f, 0.5f});
553 					renderer.DrawImage(white, AABB2{cursor.x, cursor.y + 1.0f, boxWidth, 8.0f});
554 
555 					renderer.SetColorAlphaPremultiplied(Vector4{0.0f, 1.0f, 0.0f, 1.0f});
556 					renderer.DrawImage(white, AABB2{cursor.x, cursor.y + 3.0f, boxWidth, 4.0f});
557 
558 					renderer.SetColorAlphaPremultiplied(Vector4{1.0f, 0.0f, 0.0f, 1.0f});
559 					renderer.DrawImage(
560 					  white, AABB2{cursor.x, cursor.y + 3.0f, boxWidth - childBoxWidth, 4.0f});
561 
562 					DrawText("\n");
563 
564 					for (Phase &subphase : phase.subphases) {
565 						Traverse(subphase, level + 1);
566 					}
567 				}
568 
569 				void Draw(Phase &root) {
570 					renderer.SetColorAlphaPremultiplied(Vector4{1.0f, 1.0f, 0.0f, 1.0f});
571 					DrawText("[GLProfiler result] ");
572 					std::sprintf(buf, "%d sampled frame(s). Showing the per-frame timing.\n",
573 					             (*root.measurementSaved).totalNumFrames);
574 					DrawText(buf);
575 					DrawText("Legends: ");
576 					DrawText(gpu ? "GPU time" : "wall clock time");
577 					renderer.SetColorAlphaPremultiplied(Vector4{1.0f, 0.0f, 0.0f, 1.0f});
578 					DrawText(" Self");
579 					renderer.SetColorAlphaPremultiplied(Vector4{0.0f, 1.0f, 0.0f, 1.0f});
580 					DrawText(" Total\n");
581 
582 					Traverse(root, 0);
583 				}
584 			};
585 			ResultRenderer{*this, factor}.Draw(root);
586 		}
587 
Context(GLProfiler & profiler,const char * format,...)588 		GLProfiler::Context::Context(GLProfiler &profiler, const char *format, ...)
589 		    : m_profiler{profiler}, m_active{false} {
590 			SPADES_MARK_FUNCTION_DEBUG();
591 
592 			if (!profiler.m_active) {
593 				return;
594 			}
595 
596 			m_active = true;
597 
598 			char buf[2048];
599 			va_list va;
600 			va_start(va, format);
601 			buf[2047] = 0;
602 			std::vsnprintf(buf, 2047, format, va);
603 			va_end(va);
604 
605 			profiler.BeginPhase(format, buf);
606 		}
~Context()607 		GLProfiler::Context::~Context() {
608 			SPADES_MARK_FUNCTION_DEBUG();
609 
610 			if (!m_active) {
611 				return;
612 			}
613 
614 			m_profiler.EndPhase();
615 		}
616 	}
617 }
618