1 #include "ppsspp_config.h"
2 #include "GLRenderManager.h"
3 #include "Common/GPU/OpenGL/GLFeatures.h"
4 #include "Common/GPU/thin3d.h"
5 #include "Common/Thread/ThreadUtil.h"
6
7 #include "Common/Log.h"
8 #include "Common/MemoryUtil.h"
9 #include "Common/Math/math_util.h"
10
11 #if 0 // def _DEBUG
12 #define VLOG(...) INFO_LOG(G3D, __VA_ARGS__)
13 #else
14 #define VLOG(...)
15 #endif
16
17 static std::thread::id renderThreadId;
18 #if MAX_LOGLEVEL >= DEBUG_LEVEL
OnRenderThread()19 static bool OnRenderThread() {
20 return std::this_thread::get_id() == renderThreadId;
21 }
22 #endif
23
GLRTexture(int width,int height,int numMips)24 GLRTexture::GLRTexture(int width, int height, int numMips) {
25 if (gl_extensions.OES_texture_npot) {
26 canWrap = true;
27 } else {
28 canWrap = isPowerOf2(width) && isPowerOf2(height);
29 }
30 w = width;
31 h = height;
32 this->numMips = numMips;
33 }
34
~GLRTexture()35 GLRTexture::~GLRTexture() {
36 if (texture) {
37 glDeleteTextures(1, &texture);
38 }
39 }
40
Take(GLDeleter & other)41 void GLDeleter::Take(GLDeleter &other) {
42 _assert_msg_(IsEmpty(), "Deleter already has stuff");
43 shaders = std::move(other.shaders);
44 programs = std::move(other.programs);
45 buffers = std::move(other.buffers);
46 textures = std::move(other.textures);
47 inputLayouts = std::move(other.inputLayouts);
48 framebuffers = std::move(other.framebuffers);
49 pushBuffers = std::move(other.pushBuffers);
50 other.shaders.clear();
51 other.programs.clear();
52 other.buffers.clear();
53 other.textures.clear();
54 other.inputLayouts.clear();
55 other.framebuffers.clear();
56 other.pushBuffers.clear();
57 }
58
59 // Runs on the GPU thread.
Perform(GLRenderManager * renderManager,bool skipGLCalls)60 void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
61 for (auto pushBuffer : pushBuffers) {
62 renderManager->UnregisterPushBuffer(pushBuffer);
63 if (skipGLCalls) {
64 pushBuffer->Destroy(false);
65 }
66 delete pushBuffer;
67 }
68 pushBuffers.clear();
69 for (auto shader : shaders) {
70 if (skipGLCalls)
71 shader->shader = 0; // prevent the glDeleteShader
72 delete shader;
73 }
74 shaders.clear();
75 for (auto program : programs) {
76 if (skipGLCalls)
77 program->program = 0; // prevent the glDeleteProgram
78 delete program;
79 }
80 programs.clear();
81 for (auto buffer : buffers) {
82 if (skipGLCalls)
83 buffer->buffer_ = 0;
84 delete buffer;
85 }
86 buffers.clear();
87 for (auto texture : textures) {
88 if (skipGLCalls)
89 texture->texture = 0;
90 delete texture;
91 }
92 textures.clear();
93 for (auto inputLayout : inputLayouts) {
94 // No GL objects in an inputLayout yet
95 delete inputLayout;
96 }
97 inputLayouts.clear();
98 for (auto framebuffer : framebuffers) {
99 if (skipGLCalls) {
100 framebuffer->handle = 0;
101 framebuffer->color_texture.texture = 0;
102 framebuffer->z_stencil_buffer = 0;
103 framebuffer->z_stencil_texture.texture = 0;
104 framebuffer->z_buffer = 0;
105 framebuffer->stencil_buffer = 0;
106 }
107 delete framebuffer;
108 }
109 framebuffers.clear();
110 }
111
GLRenderManager()112 GLRenderManager::GLRenderManager() {
113 for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
114
115 }
116 }
117
~GLRenderManager()118 GLRenderManager::~GLRenderManager() {
119 for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
120 _assert_(frameData_[i].deleter.IsEmpty());
121 _assert_(frameData_[i].deleter_prev.IsEmpty());
122 }
123 // Was anything deleted during shutdown?
124 deleter_.Perform(this, skipGLCalls_);
125 _assert_(deleter_.IsEmpty());
126 }
127
ThreadStart(Draw::DrawContext * draw)128 void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {
129 queueRunner_.CreateDeviceObjects();
130 threadFrame_ = threadInitFrame_;
131 renderThreadId = std::this_thread::get_id();
132
133 if (newInflightFrames_ != -1) {
134 INFO_LOG(G3D, "Updating inflight frames to %d", newInflightFrames_);
135 inflightFrames_ = newInflightFrames_;
136 newInflightFrames_ = -1;
137 }
138
139 // Don't save draw, we don't want any thread safety confusion.
140 bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW);
141 bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage;
142 if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) {
143 // Force disable if it wouldn't work anyway.
144 mapBuffers = false;
145 }
146
147 // Notes on buffer mapping:
148 // NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best.
149 // PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower.
150 if (mapBuffers) {
151 switch (gl_extensions.gpuVendor) {
152 case GPU_VENDOR_NVIDIA:
153 bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP;
154 break;
155
156 // Temporarily disabled because it doesn't work with task switching on Android.
157 // The mapped buffer seems to just be pulled out like a rug from under us, crashing
158 // as soon as any write happens, which can happen during shutdown since we write from the
159 // Emu thread which may not yet have shut down. There may be solutions to this, but for now,
160 // disable this strategy to avoid crashing.
161 //case GPU_VENDOR_QUALCOMM:
162 // bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP;
163 // break;
164
165 default:
166 bufferStrategy_ = GLBufferStrategy::SUBDATA;
167 }
168 } else {
169 bufferStrategy_ = GLBufferStrategy::SUBDATA;
170 }
171 }
172
ThreadEnd()173 void GLRenderManager::ThreadEnd() {
174 INFO_LOG(G3D, "ThreadEnd");
175
176 // Wait for any shutdown to complete in StopThread().
177 std::unique_lock<std::mutex> lock(mutex_);
178 queueRunner_.DestroyDeviceObjects();
179 VLOG("PULL: Quitting");
180
181 // Good point to run all the deleters to get rid of leftover objects.
182 for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
183 // Since we're in shutdown, we should skip the GL calls on Android.
184 frameData_[i].deleter.Perform(this, skipGLCalls_);
185 frameData_[i].deleter_prev.Perform(this, skipGLCalls_);
186 for (int j = 0; j < (int)frameData_[i].steps.size(); j++) {
187 delete frameData_[i].steps[j];
188 }
189 frameData_[i].steps.clear();
190 frameData_[i].initSteps.clear();
191 }
192 deleter_.Perform(this, skipGLCalls_);
193
194 for (int i = 0; i < (int)steps_.size(); i++) {
195 delete steps_[i];
196 }
197 steps_.clear();
198 initSteps_.clear();
199 }
200
ThreadFrame()201 bool GLRenderManager::ThreadFrame() {
202 std::unique_lock<std::mutex> lock(mutex_);
203 if (!run_)
204 return false;
205
206 // In case of syncs or other partial completion, we keep going until we complete a frame.
207 do {
208 if (nextFrame) {
209 threadFrame_++;
210 if (threadFrame_ >= inflightFrames_)
211 threadFrame_ = 0;
212 }
213 FrameData &frameData = frameData_[threadFrame_];
214 {
215 std::unique_lock<std::mutex> lock(frameData.pull_mutex);
216 while (!frameData.readyForRun && run_) {
217 VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame_);
218 frameData.pull_condVar.wait(lock);
219 }
220 if (!frameData.readyForRun && !run_) {
221 // This means we're out of frames to render and run_ is false, so bail.
222 return false;
223 }
224 VLOG("PULL: Setting frame[%d].readyForRun = false", threadFrame_);
225 frameData.readyForRun = false;
226 frameData.deleter_prev.Perform(this, skipGLCalls_);
227 frameData.deleter_prev.Take(frameData.deleter);
228 // Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false,
229 // but that created a race condition where frames could end up not finished properly on resize etc.
230
231 // Only increment next time if we're done.
232 nextFrame = frameData.type == GLRRunType::END;
233 _assert_(frameData.type == GLRRunType::END || frameData.type == GLRRunType::SYNC);
234 }
235 VLOG("PULL: Running frame %d", threadFrame_);
236 if (firstFrame) {
237 INFO_LOG(G3D, "Running first frame (%d)", threadFrame_);
238 firstFrame = false;
239 }
240 Run(threadFrame_);
241 VLOG("PULL: Finished frame %d", threadFrame_);
242 } while (!nextFrame);
243 return true;
244 }
245
StopThread()246 void GLRenderManager::StopThread() {
247 // Since we don't control the thread directly, this will only pause the thread.
248
249 if (run_) {
250 run_ = false;
251 for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
252 auto &frameData = frameData_[i];
253 {
254 std::unique_lock<std::mutex> lock(frameData.push_mutex);
255 frameData.push_condVar.notify_all();
256 }
257 {
258 std::unique_lock<std::mutex> lock(frameData.pull_mutex);
259 frameData.pull_condVar.notify_all();
260 }
261 }
262
263 // Wait until we've definitely stopped the threadframe.
264 std::unique_lock<std::mutex> lock(mutex_);
265
266 INFO_LOG(G3D, "GL submission thread paused. Frame=%d", curFrame_);
267
268 // Eat whatever has been queued up for this frame if anything.
269 Wipe();
270
271 // Wait for any fences to finish and be resignaled, so we don't have sync issues.
272 // Also clean out any queued data, which might refer to things that might not be valid
273 // when we restart...
274 for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
275 auto &frameData = frameData_[i];
276 std::unique_lock<std::mutex> lock(frameData.push_mutex);
277 if (frameData.readyForRun || frameData.steps.size() != 0) {
278 Crash();
279 }
280 frameData.readyForRun = false;
281 frameData.readyForSubmit = false;
282 for (size_t i = 0; i < frameData.steps.size(); i++) {
283 delete frameData.steps[i];
284 }
285 frameData.steps.clear();
286 frameData.initSteps.clear();
287
288 while (!frameData.readyForFence) {
289 VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
290 frameData.push_condVar.wait(lock);
291 }
292 }
293 } else {
294 INFO_LOG(G3D, "GL submission thread was already paused.");
295 }
296 }
297
BindFramebufferAsRenderTarget(GLRFramebuffer * fb,GLRRenderPassAction color,GLRRenderPassAction depth,GLRRenderPassAction stencil,uint32_t clearColor,float clearDepth,uint8_t clearStencil,const char * tag)298 void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
299 _assert_(insideFrame_);
300 #ifdef _DEBUG
301 curProgram_ = nullptr;
302 #endif
303 // Eliminate dupes.
304 if (steps_.size() && steps_.back()->render.framebuffer == fb && steps_.back()->stepType == GLRStepType::RENDER) {
305 if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) {
306 // We don't move to a new step, this bind was unnecessary and we can safely skip it.
307 curRenderStep_ = steps_.back();
308 return;
309 }
310 }
311 if (curRenderStep_ && curRenderStep_->commands.size() == 0) {
312 VLOG("Empty render step. Usually happens after uploading pixels..");
313 }
314
315 GLRStep *step = new GLRStep{ GLRStepType::RENDER };
316 // This is what queues up new passes, and can end previous ones.
317 step->render.framebuffer = fb;
318 step->render.color = color;
319 step->render.depth = depth;
320 step->render.stencil = stencil;
321 step->render.numDraws = 0;
322 step->tag = tag;
323 steps_.push_back(step);
324
325 GLuint clearMask = 0;
326 GLRRenderData data;
327 data.cmd = GLRRenderCommand::CLEAR;
328 if (color == GLRRenderPassAction::CLEAR) {
329 clearMask |= GL_COLOR_BUFFER_BIT;
330 data.clear.clearColor = clearColor;
331 }
332 if (depth == GLRRenderPassAction::CLEAR) {
333 clearMask |= GL_DEPTH_BUFFER_BIT;
334 data.clear.clearZ = clearDepth;
335 }
336 if (stencil == GLRRenderPassAction::CLEAR) {
337 clearMask |= GL_STENCIL_BUFFER_BIT;
338 data.clear.clearStencil = clearStencil;
339 }
340 if (clearMask) {
341 data.clear.scissorX = 0;
342 data.clear.scissorY = 0;
343 data.clear.scissorW = 0;
344 data.clear.scissorH = 0;
345 data.clear.clearMask = clearMask;
346 data.clear.colorMask = 0xF;
347 step->commands.push_back(data);
348 }
349 curRenderStep_ = step;
350
351 if (fb) {
352 if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {
353 step->dependencies.insert(fb);
354 }
355 }
356 }
357
BindFramebufferAsTexture(GLRFramebuffer * fb,int binding,int aspectBit,int attachment)358 void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit, int attachment) {
359 _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
360 _dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS);
361 GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE };
362 data.bind_fb_texture.slot = binding;
363 data.bind_fb_texture.framebuffer = fb;
364 data.bind_fb_texture.aspect = aspectBit;
365 curRenderStep_->commands.push_back(data);
366 curRenderStep_->dependencies.insert(fb);
367 }
368
CopyFramebuffer(GLRFramebuffer * src,GLRect2D srcRect,GLRFramebuffer * dst,GLOffset2D dstPos,int aspectMask,const char * tag)369 void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) {
370 GLRStep *step = new GLRStep{ GLRStepType::COPY };
371 step->copy.srcRect = srcRect;
372 step->copy.dstPos = dstPos;
373 step->copy.src = src;
374 step->copy.dst = dst;
375 step->copy.aspectMask = aspectMask;
376 step->dependencies.insert(src);
377 step->tag = tag;
378 bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;
379 if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)
380 step->dependencies.insert(dst);
381 steps_.push_back(step);
382 }
383
BlitFramebuffer(GLRFramebuffer * src,GLRect2D srcRect,GLRFramebuffer * dst,GLRect2D dstRect,int aspectMask,bool filter,const char * tag)384 void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) {
385 GLRStep *step = new GLRStep{ GLRStepType::BLIT };
386 step->blit.srcRect = srcRect;
387 step->blit.dstRect = dstRect;
388 step->blit.src = src;
389 step->blit.dst = dst;
390 step->blit.aspectMask = aspectMask;
391 step->blit.filter = filter;
392 step->dependencies.insert(src);
393 step->tag = tag;
394 bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;
395 if (!fillsDst)
396 step->dependencies.insert(dst);
397 steps_.push_back(step);
398 }
399
CopyFramebufferToMemorySync(GLRFramebuffer * src,int aspectBits,int x,int y,int w,int h,Draw::DataFormat destFormat,uint8_t * pixels,int pixelStride,const char * tag)400 bool GLRenderManager::CopyFramebufferToMemorySync(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
401 _assert_(pixels);
402
403 GLRStep *step = new GLRStep{ GLRStepType::READBACK };
404 step->readback.src = src;
405 step->readback.srcRect = { x, y, w, h };
406 step->readback.aspectMask = aspectBits;
407 step->readback.dstFormat = destFormat;
408 step->dependencies.insert(src);
409 step->tag = tag;
410 steps_.push_back(step);
411
412 curRenderStep_ = nullptr;
413 FlushSync();
414
415 Draw::DataFormat srcFormat;
416 if (aspectBits & GL_COLOR_BUFFER_BIT) {
417 srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;
418 } else if (aspectBits & GL_STENCIL_BUFFER_BIT) {
419 // Copies from stencil are always S8.
420 srcFormat = Draw::DataFormat::S8;
421 } else if (aspectBits & GL_DEPTH_BUFFER_BIT) {
422 // TODO: Do this properly.
423 srcFormat = Draw::DataFormat::D24_S8;
424 } else {
425 return false;
426 }
427 queueRunner_.CopyReadbackBuffer(w, h, srcFormat, destFormat, pixelStride, pixels);
428 return true;
429 }
430
CopyImageToMemorySync(GLRTexture * texture,int mipLevel,int x,int y,int w,int h,Draw::DataFormat destFormat,uint8_t * pixels,int pixelStride,const char * tag)431 void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
432 _assert_(texture);
433 _assert_(pixels);
434 GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE };
435 step->readback_image.texture = texture;
436 step->readback_image.mipLevel = mipLevel;
437 step->readback_image.srcRect = { x, y, w, h };
438 step->tag = tag;
439 steps_.push_back(step);
440
441 curRenderStep_ = nullptr;
442 FlushSync();
443
444 queueRunner_.CopyReadbackBuffer(w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels);
445 }
446
BeginFrame()447 void GLRenderManager::BeginFrame() {
448 VLOG("BeginFrame");
449
450 #ifdef _DEBUG
451 curProgram_ = nullptr;
452 #endif
453
454 int curFrame = GetCurFrame();
455 FrameData &frameData = frameData_[curFrame];
456
457 // Make sure the very last command buffer from the frame before the previous has been fully executed.
458 {
459 std::unique_lock<std::mutex> lock(frameData.push_mutex);
460 while (!frameData.readyForFence) {
461 VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
462 frameData.push_condVar.wait(lock);
463 }
464 frameData.readyForFence = false;
465 frameData.readyForSubmit = true;
466 }
467
468 VLOG("PUSH: Fencing %d", curFrame);
469
470 // glFenceSync(&frameData.fence...)
471
472 // Must be after the fence - this performs deletes.
473 VLOG("PUSH: BeginFrame %d", curFrame);
474 if (!run_) {
475 WARN_LOG(G3D, "BeginFrame while !run_!");
476 }
477
478 // vulkan_->BeginFrame();
479 // In GL, we have to do deletes on the submission thread.
480
481 insideFrame_ = true;
482 renderStepOffset_ = 0;
483 }
484
Finish()485 void GLRenderManager::Finish() {
486 curRenderStep_ = nullptr;
487 int curFrame = GetCurFrame();
488 FrameData &frameData = frameData_[curFrame];
489 {
490 std::unique_lock<std::mutex> lock(frameData.pull_mutex);
491 VLOG("PUSH: Frame[%d].readyForRun = true, notifying pull", curFrame);
492 frameData.steps = std::move(steps_);
493 steps_.clear();
494 frameData.initSteps = std::move(initSteps_);
495 initSteps_.clear();
496 frameData.readyForRun = true;
497 frameData.type = GLRRunType::END;
498 frameData_[curFrame_].deleter.Take(deleter_);
499 }
500
501 // Notify calls do not in fact need to be done with the mutex locked.
502 frameData.pull_condVar.notify_all();
503
504 curFrame_++;
505 if (curFrame_ >= inflightFrames_)
506 curFrame_ = 0;
507
508 insideFrame_ = false;
509 }
510
BeginSubmitFrame(int frame)511 void GLRenderManager::BeginSubmitFrame(int frame) {
512 FrameData &frameData = frameData_[frame];
513 if (!frameData.hasBegun) {
514 frameData.hasBegun = true;
515 }
516 }
517
518 // Render thread
Submit(int frame,bool triggerFence)519 void GLRenderManager::Submit(int frame, bool triggerFence) {
520 FrameData &frameData = frameData_[frame];
521
522 // In GL, submission happens automatically in Run().
523
524 // When !triggerFence, we notify after syncing with Vulkan.
525
526 if (triggerFence) {
527 VLOG("PULL: Frame %d.readyForFence = true", frame);
528
529 std::unique_lock<std::mutex> lock(frameData.push_mutex);
530 _assert_(frameData.readyForSubmit);
531 frameData.readyForFence = true;
532 frameData.readyForSubmit = false;
533 frameData.push_condVar.notify_all();
534 }
535 }
536
537 // Render thread
EndSubmitFrame(int frame)538 void GLRenderManager::EndSubmitFrame(int frame) {
539 FrameData &frameData = frameData_[frame];
540 frameData.hasBegun = false;
541
542 Submit(frame, true);
543
544 if (!frameData.skipSwap) {
545 if (swapIntervalChanged_) {
546 swapIntervalChanged_ = false;
547 if (swapIntervalFunction_) {
548 swapIntervalFunction_(swapInterval_);
549 }
550 }
551 if (swapFunction_) {
552 swapFunction_();
553 }
554 } else {
555 frameData.skipSwap = false;
556 }
557 }
558
559 // Render thread
Run(int frame)560 void GLRenderManager::Run(int frame) {
561 BeginSubmitFrame(frame);
562
563 FrameData &frameData = frameData_[frame];
564
565 auto &stepsOnThread = frameData_[frame].steps;
566 auto &initStepsOnThread = frameData_[frame].initSteps;
567 // queueRunner_.LogSteps(stepsOnThread);
568 queueRunner_.RunInitSteps(initStepsOnThread, skipGLCalls_);
569 initStepsOnThread.clear();
570
571 // Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
572 if (!skipGLCalls_) {
573 for (auto iter : frameData.activePushBuffers) {
574 iter->Flush();
575 iter->UnmapDevice();
576 }
577 }
578
579 queueRunner_.RunSteps(stepsOnThread, skipGLCalls_);
580 stepsOnThread.clear();
581
582 if (!skipGLCalls_) {
583 for (auto iter : frameData.activePushBuffers) {
584 iter->MapDevice(bufferStrategy_);
585 }
586 }
587
588 switch (frameData.type) {
589 case GLRRunType::END:
590 EndSubmitFrame(frame);
591 break;
592
593 case GLRRunType::SYNC:
594 EndSyncFrame(frame);
595 break;
596
597 default:
598 _assert_(false);
599 }
600
601 VLOG("PULL: Finished running frame %d", frame);
602 }
603
FlushSync()604 void GLRenderManager::FlushSync() {
605 // TODO: Reset curRenderStep_?
606 renderStepOffset_ += (int)steps_.size();
607
608 int curFrame = curFrame_;
609 FrameData &frameData = frameData_[curFrame];
610 {
611 std::unique_lock<std::mutex> lock(frameData.pull_mutex);
612 VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
613 frameData.initSteps = std::move(initSteps_);
614 initSteps_.clear();
615 frameData.steps = std::move(steps_);
616 steps_.clear();
617 frameData.readyForRun = true;
618 _assert_(frameData.readyForFence == false);
619 frameData.type = GLRRunType::SYNC;
620 frameData.pull_condVar.notify_all();
621 }
622 {
623 std::unique_lock<std::mutex> lock(frameData.push_mutex);
624 // Wait for the flush to be hit, since we're syncing.
625 while (!frameData.readyForFence) {
626 VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
627 frameData.push_condVar.wait(lock);
628 }
629 frameData.readyForFence = false;
630 frameData.readyForSubmit = true;
631 }
632 }
633
634 // Render thread
EndSyncFrame(int frame)635 void GLRenderManager::EndSyncFrame(int frame) {
636 FrameData &frameData = frameData_[frame];
637 Submit(frame, false);
638
639 // glFinish is not actually necessary here, and won't be until we start using
640 // glBufferStorage. Then we need to use fences.
641 // glFinish();
642
643 // At this point we can resume filling the command buffers for the current frame since
644 // we know the device is idle - and thus all previously enqueued command buffers have been processed.
645 // No need to switch to the next frame number.
646
647 {
648 std::unique_lock<std::mutex> lock(frameData.push_mutex);
649 frameData.readyForFence = true;
650 frameData.readyForSubmit = true;
651 frameData.push_condVar.notify_all();
652 }
653 }
654
Wipe()655 void GLRenderManager::Wipe() {
656 initSteps_.clear();
657 for (auto step : steps_) {
658 delete step;
659 }
660 steps_.clear();
661 }
662
WaitUntilQueueIdle()663 void GLRenderManager::WaitUntilQueueIdle() {
664 // Just wait for all frames to be ready.
665 for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
666 FrameData &frameData = frameData_[i];
667
668 std::unique_lock<std::mutex> lock(frameData.push_mutex);
669 // Ignore unsubmitted frames.
670 while (!frameData.readyForFence && frameData.readyForRun) {
671 VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (wait idle)", i);
672 frameData.push_condVar.wait(lock);
673 }
674 }
675 }
676
GLPushBuffer(GLRenderManager * render,GLuint target,size_t size)677 GLPushBuffer::GLPushBuffer(GLRenderManager *render, GLuint target, size_t size) : render_(render), size_(size), target_(target) {
678 bool res = AddBuffer();
679 _assert_(res);
680 }
681
~GLPushBuffer()682 GLPushBuffer::~GLPushBuffer() {
683 Destroy(true);
684 }
685
Map()686 void GLPushBuffer::Map() {
687 _assert_(!writePtr_);
688 auto &info = buffers_[buf_];
689 writePtr_ = info.deviceMemory ? info.deviceMemory : info.localMemory;
690 info.flushOffset = 0;
691 // Force alignment. This is needed for PushAligned() to work as expected.
692 while ((intptr_t)writePtr_ & 15) {
693 writePtr_++;
694 offset_++;
695 info.flushOffset++;
696 }
697 _assert_(writePtr_);
698 }
699
Unmap()700 void GLPushBuffer::Unmap() {
701 _assert_(writePtr_);
702 if (!buffers_[buf_].deviceMemory) {
703 // Here we simply upload the data to the last buffer.
704 // Might be worth trying with size_ instead of offset_, so the driver can replace
705 // the whole buffer. At least if it's close.
706 render_->BufferSubdata(buffers_[buf_].buffer, 0, offset_, buffers_[buf_].localMemory, false);
707 } else {
708 buffers_[buf_].flushOffset = offset_;
709 }
710 writePtr_ = nullptr;
711 }
712
Flush()713 void GLPushBuffer::Flush() {
714 // Must be called from the render thread.
715 _dbg_assert_(OnRenderThread());
716
717 buffers_[buf_].flushOffset = offset_;
718 if (!buffers_[buf_].deviceMemory && writePtr_) {
719 auto &info = buffers_[buf_];
720 if (info.flushOffset != 0) {
721 _assert_(info.buffer->buffer_);
722 glBindBuffer(target_, info.buffer->buffer_);
723 glBufferSubData(target_, 0, info.flushOffset, info.localMemory);
724 }
725
726 // Here we will submit all the draw calls, with the already known buffer and offsets.
727 // Might as well reset the write pointer here and start over the current buffer.
728 writePtr_ = info.localMemory;
729 offset_ = 0;
730 info.flushOffset = 0;
731 }
732
733 // For device memory, we flush all buffers here.
734 if ((strategy_ & GLBufferStrategy::MASK_FLUSH) != 0) {
735 for (auto &info : buffers_) {
736 if (info.flushOffset == 0 || !info.deviceMemory)
737 continue;
738
739 glBindBuffer(target_, info.buffer->buffer_);
740 glFlushMappedBufferRange(target_, 0, info.flushOffset);
741 info.flushOffset = 0;
742 }
743 }
744 }
745
AddBuffer()746 bool GLPushBuffer::AddBuffer() {
747 BufInfo info;
748 info.localMemory = (uint8_t *)AllocateAlignedMemory(size_, 16);
749 if (!info.localMemory)
750 return false;
751 info.buffer = render_->CreateBuffer(target_, size_, GL_DYNAMIC_DRAW);
752 buf_ = buffers_.size();
753 buffers_.push_back(info);
754 return true;
755 }
756
Destroy(bool onRenderThread)757 void GLPushBuffer::Destroy(bool onRenderThread) {
758 if (buf_ == -1)
759 return; // Already destroyed
760 for (BufInfo &info : buffers_) {
761 // This will automatically unmap device memory, if needed.
762 // NOTE: We immediately delete the buffer, don't go through the deleter, if we're on the render thread.
763 if (onRenderThread) {
764 delete info.buffer;
765 } else {
766 render_->DeleteBuffer(info.buffer);
767 }
768
769 FreeAlignedMemory(info.localMemory);
770 }
771 buffers_.clear();
772 buf_ = -1;
773 }
774
NextBuffer(size_t minSize)775 void GLPushBuffer::NextBuffer(size_t minSize) {
776 // First, unmap the current memory.
777 Unmap();
778
779 buf_++;
780 if (buf_ >= buffers_.size() || minSize > size_) {
781 // Before creating the buffer, adjust to the new size_ if necessary.
782 while (size_ < minSize) {
783 size_ <<= 1;
784 }
785
786 bool res = AddBuffer();
787 _assert_(res);
788 if (!res) {
789 // Let's try not to crash at least?
790 buf_ = 0;
791 }
792 }
793
794 // Now, move to the next buffer and map it.
795 offset_ = 0;
796 Map();
797 }
798
Defragment()799 void GLPushBuffer::Defragment() {
800 _dbg_assert_msg_(!OnRenderThread(), "Defragment must not run on the render thread");
801
802 if (buffers_.size() <= 1) {
803 // Let's take this chance to jetison localMemory we don't need.
804 for (auto &info : buffers_) {
805 if (info.deviceMemory) {
806 FreeAlignedMemory(info.localMemory);
807 info.localMemory = nullptr;
808 }
809 }
810
811 return;
812 }
813
814 // Okay, we have more than one. Destroy them all and start over with a larger one.
815 size_t newSize = size_ * buffers_.size();
816 Destroy(false);
817
818 size_ = newSize;
819 bool res = AddBuffer();
820 _assert_msg_(res, "AddBuffer failed");
821 }
822
GetTotalSize() const823 size_t GLPushBuffer::GetTotalSize() const {
824 size_t sum = 0;
825 if (buffers_.size() > 1)
826 sum += size_ * (buffers_.size() - 1);
827 sum += offset_;
828 return sum;
829 }
830
MapDevice(GLBufferStrategy strategy)831 void GLPushBuffer::MapDevice(GLBufferStrategy strategy) {
832 _dbg_assert_msg_(OnRenderThread(), "MapDevice must run on render thread");
833
834 strategy_ = strategy;
835 if (strategy_ == GLBufferStrategy::SUBDATA) {
836 return;
837 }
838
839 bool mapChanged = false;
840 for (auto &info : buffers_) {
841 if (!info.buffer->buffer_ || info.deviceMemory) {
842 // Can't map - no device buffer associated yet or already mapped.
843 continue;
844 }
845
846 info.deviceMemory = (uint8_t *)info.buffer->Map(strategy_);
847 mapChanged = mapChanged || info.deviceMemory != nullptr;
848
849 if (!info.deviceMemory && !info.localMemory) {
850 // Somehow it failed, let's dodge crashing.
851 info.localMemory = (uint8_t *)AllocateAlignedMemory(info.buffer->size_, 16);
852 mapChanged = true;
853 }
854
855 _dbg_assert_msg_(info.localMemory || info.deviceMemory, "Local or device memory must succeed");
856 }
857
858 if (writePtr_ && mapChanged) {
859 // This can happen during a sync. Remap.
860 writePtr_ = nullptr;
861 Map();
862 }
863 }
864
UnmapDevice()865 void GLPushBuffer::UnmapDevice() {
866 _dbg_assert_msg_(OnRenderThread(), "UnmapDevice must run on render thread");
867
868 for (auto &info : buffers_) {
869 if (info.deviceMemory) {
870 // TODO: Technically this can return false?
871 info.buffer->Unmap();
872 info.deviceMemory = nullptr;
873 }
874 }
875 }
876
Map(GLBufferStrategy strategy)877 void *GLRBuffer::Map(GLBufferStrategy strategy) {
878 _assert_(buffer_ != 0);
879
880 GLbitfield access = GL_MAP_WRITE_BIT;
881 if ((strategy & GLBufferStrategy::MASK_FLUSH) != 0) {
882 access |= GL_MAP_FLUSH_EXPLICIT_BIT;
883 }
884 if ((strategy & GLBufferStrategy::MASK_INVALIDATE) != 0) {
885 access |= GL_MAP_INVALIDATE_BUFFER_BIT;
886 }
887
888 void *p = nullptr;
889 bool allowNativeBuffer = strategy != GLBufferStrategy::SUBDATA;
890 if (allowNativeBuffer) {
891 glBindBuffer(target_, buffer_);
892
893 if (gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage) {
894 #if !PPSSPP_PLATFORM(IOS)
895 if (!hasStorage_) {
896 GLbitfield storageFlags = access & ~(GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT);
897 #ifdef USING_GLES2
898 #ifdef GL_EXT_buffer_storage
899 glBufferStorageEXT(target_, size_, nullptr, storageFlags);
900 #endif
901 #else
902 glBufferStorage(target_, size_, nullptr, storageFlags);
903 #endif
904 hasStorage_ = true;
905 }
906 #endif
907 p = glMapBufferRange(target_, 0, size_, access);
908 } else if (gl_extensions.VersionGEThan(3, 0, 0)) {
909 // GLES3 or desktop 3.
910 p = glMapBufferRange(target_, 0, size_, access);
911 } else if (!gl_extensions.IsGLES) {
912 #ifndef USING_GLES2
913 p = glMapBuffer(target_, GL_READ_WRITE);
914 #endif
915 }
916 }
917
918 mapped_ = p != nullptr;
919 return p;
920 }
921
Unmap()922 bool GLRBuffer::Unmap() {
923 glBindBuffer(target_, buffer_);
924 mapped_ = false;
925 return glUnmapBuffer(target_) == GL_TRUE;
926 }
927