1 /* 2 Copyright (c) 2013 yvt 3 4 This file is part of OpenSpades. 5 6 OpenSpades is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 OpenSpades is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with OpenSpades. If not, see <http://www.gnu.org/licenses/>. 18 19 */ 20 21 #include <algorithm> 22 #include <chrono> 23 #include <cstdarg> 24 #include <cstdio> 25 #include <cstdlib> 26 #include <ctime> 27 #include <functional> 28 #include <list> 29 #include <unordered_map> 30 #include <cstring> 31 32 #include "GLProfiler.h" 33 34 #include "GLRenderer.h" 35 #include "GLSettings.h" 36 #include "IGLDevice.h" 37 #include <Core/Debug.h> 38 #include <Core/Settings.h> 39 #include <Core/TMPUtils.h> 40 41 namespace chrono = std::chrono; 42 43 namespace spades { 44 namespace draw { 45 46 namespace { 47 chrono::high_resolution_clock::time_point startTimePoint; 48 ResetTimes()49 void ResetTimes() { startTimePoint = chrono::high_resolution_clock::now(); } 50 GetWallClockTime()51 double GetWallClockTime() { 52 return chrono::duration_cast<chrono::microseconds>( 53 chrono::high_resolution_clock::now() - startTimePoint) 54 .count() / 55 1000000.0; 56 } 57 } 58 59 struct GLProfiler::Measurement { 60 double totalWallClockTime = 0.0; 61 double totalGPUTime = 0.0; 62 int totalNumFrames = 0; 63 }; 64 65 struct GLProfiler::Phase { 66 const std::string name; 67 std::string description; 68 69 // can't use vector here; a reference to a vector's element can be invalidated 70 std::list<Phase> subphases; 71 72 std::list<Phase>::iterator nextSubphaseIterator; 73 74 double startWallClockTime; 75 double endWallClockTime; 76 stmp::optional<std::pair<std::size_t, std::size_t>> queryObjectIndices; 77 78 Measurement measurementLatest; 79 bool measured = false; 80 81 stmp::optional<Measurement> measurementSaved; 82 Phasespades::draw::GLProfiler::Phase83 Phase(const std::string &name) : name{name}, nextSubphaseIterator{subphases.begin()} {} 84 }; 85 GLProfiler(GLRenderer & renderer)86 GLProfiler::GLProfiler(GLRenderer &renderer) 87 : m_settings{renderer.GetSettings()}, 88 m_renderer{renderer}, 89 m_device{*renderer.GetGLDevice()}, 90 m_active{false}, 91 m_lastSaveTime{0.0}, 92 m_shouldSaveThisFrame{false}, 93 m_waitingTimerQueryResult{false} { 94 SPADES_MARK_FUNCTION(); 95 96 m_font = m_renderer.RegisterImage("Gfx/Fonts/Debug.png"); 97 m_white = m_renderer.RegisterImage("Gfx/White.tga"); 98 } 99 ~GLProfiler()100 GLProfiler::~GLProfiler() { 101 SPADES_MARK_FUNCTION(); 102 for (IGLDevice::UInteger timerQueryObject : m_timerQueryObjects) { 103 m_device.DeleteQuery(timerQueryObject); 104 } 105 } 106 BeginFrame()107 void GLProfiler::BeginFrame() { 108 SPADES_MARK_FUNCTION(); 109 110 if (!m_settings.r_debugTiming) { 111 // Clear history 112 m_root.reset(); 113 m_waitingTimerQueryResult = false; 114 return; 115 } 116 117 if (m_waitingTimerQueryResult) { 118 FinalizeMeasurement(); 119 120 // Still waiting? 121 if (m_waitingTimerQueryResult) { 122 return; 123 } 124 } 125 126 SPAssert(m_stack.empty()); 127 SPAssert(!m_active); 128 129 m_active = true; 130 131 if (m_settings.r_debugTimingAverage) { 132 m_shouldSaveThisFrame = m_stopwatch.GetTime() > m_lastSaveTime + 1.0; 133 } else { 134 m_shouldSaveThisFrame = true; 135 } 136 137 ResetTimes(); 138 139 if (m_settings.r_debugTimingGPUTime) { 140 m_currentTimerQueryObjectIndex = 0; 141 if (m_timerQueryObjects.empty()) { 142 m_timerQueryObjects.push_back(m_device.GenQuery()); 143 } 144 m_device.BeginQuery(IGLDevice::TimeElapsed, m_timerQueryObjects[0]); 145 } 146 147 if (m_root) { 148 struct Traverser { 149 GLProfiler &self; 150 Traverser(GLProfiler &self) : self{self} {} 151 void Traverse(Phase &phase) { 152 phase.measured = false; 153 phase.queryObjectIndices.reset(); 154 phase.nextSubphaseIterator = phase.subphases.begin(); 155 for (Phase &subphase : phase.subphases) { 156 Traverse(subphase); 157 } 158 } 159 }; 160 Traverser{*this}.Traverse(*m_root); 161 } 162 163 if (!m_root) { 164 m_root.reset(new Phase{"Frame"}); 165 m_root->description = "Frame"; 166 } 167 168 BeginPhaseInner(*m_root); 169 170 m_stack.emplace_back(*m_root); 171 } 172 EndFrame()173 void GLProfiler::EndFrame() { 174 SPADES_MARK_FUNCTION(); 175 176 if (!m_active) { 177 return; 178 } 179 180 SPAssert(m_stack.size() == 1); 181 SPAssert(&GetCurrentPhase() == m_root.get()); 182 183 EndPhaseInner(); 184 SPAssert(m_stack.empty()); 185 186 m_active = false; 187 188 FinalizeMeasurement(); 189 } 190 FinalizeMeasurement()191 void GLProfiler::FinalizeMeasurement() { 192 SPADES_MARK_FUNCTION(); 193 Phase &root = *m_root; 194 195 // Fill gap 196 if (m_settings.r_debugTimingFillGap) { 197 struct Traverser { 198 GLProfiler &self; 199 Traverser(GLProfiler &self) : self{self} {} 200 void Traverse(Phase &phase, stmp::optional<std::pair<Phase&, bool>> base) { 201 if (!phase.queryObjectIndices) { 202 return; 203 } 204 if (base) { 205 auto baseIndices = *(*base).first.queryObjectIndices; 206 (*phase.queryObjectIndices).second = (*base).second ? baseIndices.second : baseIndices.first; 207 } 208 auto it = phase.subphases.begin(); 209 while (it != phase.subphases.end() && !it->queryObjectIndices) { 210 ++it; 211 } 212 while (it != phase.subphases.end()) { 213 auto it2 = it; ++it2; 214 while (it2 != phase.subphases.end() && !it2->queryObjectIndices) { 215 ++it2; 216 } 217 if (it2 == phase.subphases.end()) { 218 Traverse(*it, std::pair<Phase&, bool>{phase, true}); 219 } else { 220 Traverse(*it, std::pair<Phase&, bool>{*it2, false}); 221 } 222 it = it2; 223 } 224 } 225 }; 226 Traverser{*this}.Traverse(root, {}); 227 // TODO: fill gap for wall clock times 228 } 229 230 // Collect GPU time information 231 if (m_settings.r_debugTimingGPUTime) { 232 if (!m_waitingTimerQueryResult) { 233 m_device.EndQuery(IGLDevice::TimeElapsed); 234 } 235 236 m_waitingTimerQueryResult = false; 237 238 // are results available? 239 for (std::size_t i = 0; i <= m_currentTimerQueryObjectIndex; ++i) { 240 if (!m_device.GetQueryObjectUInteger(m_timerQueryObjects[i], 241 IGLDevice::QueryResultAvailable)) { 242 m_waitingTimerQueryResult = true; 243 return; 244 } 245 } 246 247 double t = 0; 248 m_timerQueryTimes.resize(m_currentTimerQueryObjectIndex + 2); 249 m_timerQueryTimes[0] = 0.0; 250 for (std::size_t i = 0; i <= m_currentTimerQueryObjectIndex; ++i) { 251 auto nanoseconds = m_device.GetQueryObjectUInteger64(m_timerQueryObjects.at(i), 252 IGLDevice::QueryResult); 253 t += static_cast<double>(nanoseconds) / 1.0e+9; 254 m_timerQueryTimes[i + 1] = t; 255 } 256 struct Traverser { 257 GLProfiler &self; 258 Traverser(GLProfiler &self) : self{self} {} 259 void Traverse(Phase &phase) { 260 if (phase.queryObjectIndices) { 261 auto indices = *phase.queryObjectIndices; 262 double time1 = self.m_timerQueryTimes.at(indices.first); 263 double time2 = self.m_timerQueryTimes.at(indices.second); 264 phase.measurementLatest.totalGPUTime += time2 - time1; 265 } 266 267 for (Phase &subphase : phase.subphases) { 268 Traverse(subphase); 269 } 270 } 271 }; 272 Traverser{*this}.Traverse(root); 273 } else { 274 m_waitingTimerQueryResult = false; 275 } 276 277 if (m_shouldSaveThisFrame) { 278 struct Traverser { 279 GLProfiler &self; 280 Traverser(GLProfiler &self) : self{self} {} 281 void Traverse(Phase &phase) { 282 phase.measurementSaved = phase.measurementLatest; 283 phase.measurementLatest = Measurement{}; 284 for (Phase &subphase : phase.subphases) { 285 Traverse(subphase); 286 } 287 } 288 }; 289 Traverser{*this}.Traverse(root); 290 291 m_lastSaveTime = m_stopwatch.GetTime(); 292 293 // Output the result to the system log 294 if (m_settings.r_debugTimingOutputLog) { 295 LogResult(root); 296 } 297 } 298 } 299 NewTimerQuery()300 void GLProfiler::NewTimerQuery() { 301 SPADES_MARK_FUNCTION_DEBUG(); 302 303 m_device.EndQuery(IGLDevice::TimeElapsed); 304 ++m_currentTimerQueryObjectIndex; 305 306 if (m_currentTimerQueryObjectIndex >= m_timerQueryObjects.size()) { 307 m_timerQueryObjects.push_back(m_device.GenQuery()); 308 } 309 m_device.BeginQuery(IGLDevice::TimeElapsed, 310 m_timerQueryObjects[m_currentTimerQueryObjectIndex]); 311 } 312 BeginPhase(const std::string & name,const std::string & description)313 void GLProfiler::BeginPhase(const std::string &name, const std::string &description) { 314 SPADES_MARK_FUNCTION_DEBUG(); 315 316 SPAssert(m_active); 317 318 Phase ¤t = GetCurrentPhase(); 319 320 auto it = current.nextSubphaseIterator; 321 322 for (; it != current.subphases.end(); ++it) { 323 if (it->name == name) { 324 break; 325 } 326 } 327 328 if (it == current.subphases.end()) { 329 // Create a new subphase 330 it = current.subphases.emplace(current.nextSubphaseIterator, name); 331 } 332 333 current.nextSubphaseIterator = it; 334 ++current.nextSubphaseIterator; 335 336 Phase &next = *it; 337 m_stack.emplace_back(next); 338 339 if (next.measured) { 340 SPRaise("Cannot measure the timing of phase '%s' twice", name.c_str()); 341 } 342 343 next.measured = true; 344 next.description = description; 345 346 BeginPhaseInner(next); 347 } 348 BeginPhaseInner(Phase & phase)349 void GLProfiler::BeginPhaseInner(Phase &phase) { 350 SPADES_MARK_FUNCTION_DEBUG(); 351 352 if (m_settings.r_debugTimingFlush) { 353 m_device.Flush(); 354 } 355 356 phase.startWallClockTime = GetWallClockTime(); 357 358 if (m_settings.r_debugTimingGPUTime) { 359 NewTimerQuery(); 360 phase.queryObjectIndices = std::pair<std::size_t, std::size_t>{}; 361 (*phase.queryObjectIndices).first = m_currentTimerQueryObjectIndex; 362 (*phase.queryObjectIndices).second = static_cast<std::size_t>(-1); 363 } 364 } 365 EndPhase()366 void GLProfiler::EndPhase() { 367 SPADES_MARK_FUNCTION_DEBUG(); 368 369 SPAssert(m_active); 370 SPAssert(m_stack.size() > 1); 371 372 EndPhaseInner(); 373 } 374 EndPhaseInner()375 void GLProfiler::EndPhaseInner() { 376 SPADES_MARK_FUNCTION_DEBUG(); 377 378 SPAssert(!m_stack.empty()); 379 380 Phase ¤t = GetCurrentPhase(); 381 382 if (m_settings.r_debugTimingFlush) { 383 m_device.Flush(); 384 } 385 386 double wallClockTime = GetWallClockTime(); 387 388 current.endWallClockTime = wallClockTime; 389 390 current.measurementLatest.totalWallClockTime += 391 wallClockTime - current.startWallClockTime; 392 current.measurementLatest.totalNumFrames += 1; 393 394 if (m_settings.r_debugTimingGPUTime) { 395 SPAssert(current.queryObjectIndices); 396 NewTimerQuery(); 397 (*current.queryObjectIndices).second = m_currentTimerQueryObjectIndex; 398 } 399 400 m_stack.pop_back(); 401 } 402 LogResult(Phase & root)403 void GLProfiler::LogResult(Phase &root) { 404 if (!root.measurementSaved) { 405 // No results yet 406 return; 407 } 408 409 double factor = 1.0 / (*root.measurementSaved).totalNumFrames; 410 SPLog("---- Start of GLProfiler Result ----"); 411 SPLog("(%d sampled frame(s). Showing the per-frame timing)", 412 (*root.measurementSaved).totalNumFrames); 413 414 if (m_settings.r_debugTimingGPUTime) { 415 SPLog("(GPU time / wall clock time)"); 416 } else { 417 SPLog("(wall clock time)"); 418 } 419 420 struct Traverser { 421 GLProfiler &self; 422 double factor; 423 char buf[512]; 424 425 Traverser(GLProfiler &self, double factor) : self{self}, factor{factor} {} 426 void Traverse(Phase &phase, int level) { 427 if (!phase.measurementSaved) { 428 // No results yet 429 return; 430 } 431 Measurement &result = *phase.measurementSaved; 432 433 int indent = level * 2; 434 for (int i = 0; i < indent; i++) 435 buf[i] = ' '; 436 buf[511] = 0; 437 if (self.m_settings.r_debugTimingGPUTime) { 438 std::snprintf(buf + indent, 511 - indent, "%s - %.3fms / %.3fms", 439 phase.description.c_str(), 440 result.totalGPUTime * 1000. * factor, 441 result.totalWallClockTime * 1000. * factor); 442 } else { 443 std::snprintf(buf + indent, 511 - indent, "%s - %.3fms", 444 phase.description.c_str(), 445 result.totalWallClockTime * 1000. * factor); 446 } 447 SPLog("%s", buf); 448 449 for (Phase &subphase : phase.subphases) { 450 Traverse(subphase, level + 1); 451 } 452 } 453 }; 454 Traverser{*this, factor}.Traverse(root, 0); 455 456 SPLog("---- End of GLProfiler Result ----"); 457 } 458 DrawResult()459 void GLProfiler::DrawResult() { 460 if (m_root) { 461 DrawResult(*m_root); 462 } 463 } 464 DrawResult(Phase & root)465 void GLProfiler::DrawResult(Phase &root) { 466 if (!root.measurementSaved) { 467 // No results yet 468 return; 469 } 470 471 double factor = 1.0 / (*root.measurementSaved).totalNumFrames; 472 473 struct ResultRenderer { 474 GLProfiler &self; 475 GLRenderer &renderer; 476 bool gpu; 477 double factor; 478 char buf[512]; 479 Vector2 cursor{0.0f, 0.0f}; 480 int column = 0; 481 482 ResultRenderer(GLProfiler &self, double factor) 483 : self{self}, 484 renderer{self.m_renderer}, 485 gpu{self.m_settings.r_debugTimingGPUTime}, 486 factor{factor} {} 487 488 void DrawText(const char *str) { 489 client::IImage *font = self.m_font; 490 491 while (*str) { 492 char c = *str; 493 if (c == '\n') { 494 cursor.y += 10.0f; 495 if (cursor.y + 10.0f > renderer.ScreenHeight()) { 496 cursor.y = 0.0f; 497 ++column; 498 } 499 cursor.x = column * 500.0f; 500 } else { 501 int col = c & 15; 502 int row = (c >> 4) - 2; 503 renderer.DrawImage(font, cursor, 504 AABB2{col * 6.0f, row * 10.0f, 6.0f, 10.0f}); 505 cursor.x += 6.0f; 506 } 507 ++str; 508 } 509 } 510 511 void Traverse(Phase &phase, int level) { 512 if (!phase.measurementSaved) { 513 // No results yet 514 return; 515 } 516 Measurement &result = *phase.measurementSaved; 517 double time = (gpu ? result.totalGPUTime : result.totalWallClockTime) * factor; 518 519 // draw text 520 if (result.totalNumFrames > 0) { 521 renderer.SetColorAlphaPremultiplied(Vector4{1.0f, 1.0f, 1.0f, 1.0f}); 522 } else { 523 renderer.SetColorAlphaPremultiplied(Vector4{0.5f, 0.5f, 0.5f, 1.0f}); 524 } 525 526 int indent = level * 2; 527 int timeColumn = 30; 528 for (int i = 0; i < indent; i++) 529 buf[i] = ' '; 530 std::fill(buf, buf + timeColumn, ' '); 531 std::strcpy(buf + indent, phase.description.c_str()); 532 buf[std::strlen(buf)] = ' '; 533 std::sprintf(buf + timeColumn, "%7.3fms", time * 1000.); 534 DrawText(buf); 535 536 float subphaseTime = 0.0f; 537 for (Phase &subphase : phase.subphases) { 538 if (!subphase.measurementSaved) { 539 continue; 540 } 541 Measurement &subresult = *subphase.measurementSaved; 542 subphaseTime += 543 (gpu ? subresult.totalGPUTime : subresult.totalWallClockTime) * factor; 544 } 545 546 // draw bar 547 float barScale = 4000.0 * self.m_settings.r_debugTimingOutputBarScale; 548 float boxWidth = static_cast<float>(time * barScale); 549 float childBoxWidth = static_cast<float>(subphaseTime * barScale); 550 client::IImage *white = self.m_white; 551 552 renderer.SetColorAlphaPremultiplied(Vector4{0.0f, 0.0f, 0.0f, 0.5f}); 553 renderer.DrawImage(white, AABB2{cursor.x, cursor.y + 1.0f, boxWidth, 8.0f}); 554 555 renderer.SetColorAlphaPremultiplied(Vector4{0.0f, 1.0f, 0.0f, 1.0f}); 556 renderer.DrawImage(white, AABB2{cursor.x, cursor.y + 3.0f, boxWidth, 4.0f}); 557 558 renderer.SetColorAlphaPremultiplied(Vector4{1.0f, 0.0f, 0.0f, 1.0f}); 559 renderer.DrawImage( 560 white, AABB2{cursor.x, cursor.y + 3.0f, boxWidth - childBoxWidth, 4.0f}); 561 562 DrawText("\n"); 563 564 for (Phase &subphase : phase.subphases) { 565 Traverse(subphase, level + 1); 566 } 567 } 568 569 void Draw(Phase &root) { 570 renderer.SetColorAlphaPremultiplied(Vector4{1.0f, 1.0f, 0.0f, 1.0f}); 571 DrawText("[GLProfiler result] "); 572 std::sprintf(buf, "%d sampled frame(s). Showing the per-frame timing.\n", 573 (*root.measurementSaved).totalNumFrames); 574 DrawText(buf); 575 DrawText("Legends: "); 576 DrawText(gpu ? "GPU time" : "wall clock time"); 577 renderer.SetColorAlphaPremultiplied(Vector4{1.0f, 0.0f, 0.0f, 1.0f}); 578 DrawText(" Self"); 579 renderer.SetColorAlphaPremultiplied(Vector4{0.0f, 1.0f, 0.0f, 1.0f}); 580 DrawText(" Total\n"); 581 582 Traverse(root, 0); 583 } 584 }; 585 ResultRenderer{*this, factor}.Draw(root); 586 } 587 Context(GLProfiler & profiler,const char * format,...)588 GLProfiler::Context::Context(GLProfiler &profiler, const char *format, ...) 589 : m_profiler{profiler}, m_active{false} { 590 SPADES_MARK_FUNCTION_DEBUG(); 591 592 if (!profiler.m_active) { 593 return; 594 } 595 596 m_active = true; 597 598 char buf[2048]; 599 va_list va; 600 va_start(va, format); 601 buf[2047] = 0; 602 std::vsnprintf(buf, 2047, format, va); 603 va_end(va); 604 605 profiler.BeginPhase(format, buf); 606 } ~Context()607 GLProfiler::Context::~Context() { 608 SPADES_MARK_FUNCTION_DEBUG(); 609 610 if (!m_active) { 611 return; 612 } 613 614 m_profiler.EndPhase(); 615 } 616 } 617 } 618