1 #include "ppsspp_config.h"
2 #include "GLRenderManager.h"
3 #include "Common/GPU/OpenGL/GLFeatures.h"
4 #include "Common/GPU/thin3d.h"
5 #include "Common/Thread/ThreadUtil.h"
6 
7 #include "Common/Log.h"
8 #include "Common/MemoryUtil.h"
9 #include "Common/Math/math_util.h"
10 
11 #if 0 // def _DEBUG
12 #define VLOG(...) INFO_LOG(G3D, __VA_ARGS__)
13 #else
14 #define VLOG(...)
15 #endif
16 
17 static std::thread::id renderThreadId;
18 #if MAX_LOGLEVEL >= DEBUG_LEVEL
OnRenderThread()19 static bool OnRenderThread() {
20 	return std::this_thread::get_id() == renderThreadId;
21 }
22 #endif
23 
GLRTexture(int width,int height,int numMips)24 GLRTexture::GLRTexture(int width, int height, int numMips) {
25 	if (gl_extensions.OES_texture_npot) {
26 		canWrap = true;
27 	} else {
28 		canWrap = isPowerOf2(width) && isPowerOf2(height);
29 	}
30 	w = width;
31 	h = height;
32 	this->numMips = numMips;
33 }
34 
~GLRTexture()35 GLRTexture::~GLRTexture() {
36 	if (texture) {
37 		glDeleteTextures(1, &texture);
38 	}
39 }
40 
Take(GLDeleter & other)41 void GLDeleter::Take(GLDeleter &other) {
42 	_assert_msg_(IsEmpty(), "Deleter already has stuff");
43 	shaders = std::move(other.shaders);
44 	programs = std::move(other.programs);
45 	buffers = std::move(other.buffers);
46 	textures = std::move(other.textures);
47 	inputLayouts = std::move(other.inputLayouts);
48 	framebuffers = std::move(other.framebuffers);
49 	pushBuffers = std::move(other.pushBuffers);
50 	other.shaders.clear();
51 	other.programs.clear();
52 	other.buffers.clear();
53 	other.textures.clear();
54 	other.inputLayouts.clear();
55 	other.framebuffers.clear();
56 	other.pushBuffers.clear();
57 }
58 
59 // Runs on the GPU thread.
Perform(GLRenderManager * renderManager,bool skipGLCalls)60 void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
61 	for (auto pushBuffer : pushBuffers) {
62 		renderManager->UnregisterPushBuffer(pushBuffer);
63 		if (skipGLCalls) {
64 			pushBuffer->Destroy(false);
65 		}
66 		delete pushBuffer;
67 	}
68 	pushBuffers.clear();
69 	for (auto shader : shaders) {
70 		if (skipGLCalls)
71 			shader->shader = 0;  // prevent the glDeleteShader
72 		delete shader;
73 	}
74 	shaders.clear();
75 	for (auto program : programs) {
76 		if (skipGLCalls)
77 			program->program = 0;  // prevent the glDeleteProgram
78 		delete program;
79 	}
80 	programs.clear();
81 	for (auto buffer : buffers) {
82 		if (skipGLCalls)
83 			buffer->buffer_ = 0;
84 		delete buffer;
85 	}
86 	buffers.clear();
87 	for (auto texture : textures) {
88 		if (skipGLCalls)
89 			texture->texture = 0;
90 		delete texture;
91 	}
92 	textures.clear();
93 	for (auto inputLayout : inputLayouts) {
94 		// No GL objects in an inputLayout yet
95 		delete inputLayout;
96 	}
97 	inputLayouts.clear();
98 	for (auto framebuffer : framebuffers) {
99 		if (skipGLCalls) {
100 			framebuffer->handle = 0;
101 			framebuffer->color_texture.texture = 0;
102 			framebuffer->z_stencil_buffer = 0;
103 			framebuffer->z_stencil_texture.texture = 0;
104 			framebuffer->z_buffer = 0;
105 			framebuffer->stencil_buffer = 0;
106 		}
107 		delete framebuffer;
108 	}
109 	framebuffers.clear();
110 }
111 
GLRenderManager()112 GLRenderManager::GLRenderManager() {
113 	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
114 
115 	}
116 }
117 
~GLRenderManager()118 GLRenderManager::~GLRenderManager() {
119 	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
120 		_assert_(frameData_[i].deleter.IsEmpty());
121 		_assert_(frameData_[i].deleter_prev.IsEmpty());
122 	}
123 	// Was anything deleted during shutdown?
124 	deleter_.Perform(this, skipGLCalls_);
125 	_assert_(deleter_.IsEmpty());
126 }
127 
ThreadStart(Draw::DrawContext * draw)128 void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {
129 	queueRunner_.CreateDeviceObjects();
130 	threadFrame_ = threadInitFrame_;
131 	renderThreadId = std::this_thread::get_id();
132 
133 	if (newInflightFrames_ != -1) {
134 		INFO_LOG(G3D, "Updating inflight frames to %d", newInflightFrames_);
135 		inflightFrames_ = newInflightFrames_;
136 		newInflightFrames_ = -1;
137 	}
138 
139 	// Don't save draw, we don't want any thread safety confusion.
140 	bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW);
141 	bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage;
142 	if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) {
143 		// Force disable if it wouldn't work anyway.
144 		mapBuffers = false;
145 	}
146 
147 	// Notes on buffer mapping:
148 	// NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best.
149 	// PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower.
150 	if (mapBuffers) {
151 		switch (gl_extensions.gpuVendor) {
152 		case GPU_VENDOR_NVIDIA:
153 			bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP;
154 			break;
155 
156 		// Temporarily disabled because it doesn't work with task switching on Android.
157 		// The mapped buffer seems to just be pulled out like a rug from under us, crashing
158 		// as soon as any write happens, which can happen during shutdown since we write from the
159 		// Emu thread which may not yet have shut down. There may be solutions to this, but for now,
160 		// disable this strategy to avoid crashing.
161 		//case GPU_VENDOR_QUALCOMM:
162 		//	bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP;
163 		//	break;
164 
165 		default:
166 			bufferStrategy_ = GLBufferStrategy::SUBDATA;
167 		}
168 	} else {
169 		bufferStrategy_ = GLBufferStrategy::SUBDATA;
170 	}
171 }
172 
ThreadEnd()173 void GLRenderManager::ThreadEnd() {
174 	INFO_LOG(G3D, "ThreadEnd");
175 
176 	// Wait for any shutdown to complete in StopThread().
177 	std::unique_lock<std::mutex> lock(mutex_);
178 	queueRunner_.DestroyDeviceObjects();
179 	VLOG("PULL: Quitting");
180 
181 	// Good point to run all the deleters to get rid of leftover objects.
182 	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
183 		// Since we're in shutdown, we should skip the GL calls on Android.
184 		frameData_[i].deleter.Perform(this, skipGLCalls_);
185 		frameData_[i].deleter_prev.Perform(this, skipGLCalls_);
186 		for (int j = 0; j < (int)frameData_[i].steps.size(); j++) {
187 			delete frameData_[i].steps[j];
188 		}
189 		frameData_[i].steps.clear();
190 		frameData_[i].initSteps.clear();
191 	}
192 	deleter_.Perform(this, skipGLCalls_);
193 
194 	for (int i = 0; i < (int)steps_.size(); i++) {
195 		delete steps_[i];
196 	}
197 	steps_.clear();
198 	initSteps_.clear();
199 }
200 
ThreadFrame()201 bool GLRenderManager::ThreadFrame() {
202 	std::unique_lock<std::mutex> lock(mutex_);
203 	if (!run_)
204 		return false;
205 
206 	// In case of syncs or other partial completion, we keep going until we complete a frame.
207 	do {
208 		if (nextFrame) {
209 			threadFrame_++;
210 			if (threadFrame_ >= inflightFrames_)
211 				threadFrame_ = 0;
212 		}
213 		FrameData &frameData = frameData_[threadFrame_];
214 		{
215 			std::unique_lock<std::mutex> lock(frameData.pull_mutex);
216 			while (!frameData.readyForRun && run_) {
217 				VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame_);
218 				frameData.pull_condVar.wait(lock);
219 			}
220 			if (!frameData.readyForRun && !run_) {
221 				// This means we're out of frames to render and run_ is false, so bail.
222 				return false;
223 			}
224 			VLOG("PULL: Setting frame[%d].readyForRun = false", threadFrame_);
225 			frameData.readyForRun = false;
226 			frameData.deleter_prev.Perform(this, skipGLCalls_);
227 			frameData.deleter_prev.Take(frameData.deleter);
228 			// Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false,
229 			// but that created a race condition where frames could end up not finished properly on resize etc.
230 
231 			// Only increment next time if we're done.
232 			nextFrame = frameData.type == GLRRunType::END;
233 			_assert_(frameData.type == GLRRunType::END || frameData.type == GLRRunType::SYNC);
234 		}
235 		VLOG("PULL: Running frame %d", threadFrame_);
236 		if (firstFrame) {
237 			INFO_LOG(G3D, "Running first frame (%d)", threadFrame_);
238 			firstFrame = false;
239 		}
240 		Run(threadFrame_);
241 		VLOG("PULL: Finished frame %d", threadFrame_);
242 	} while (!nextFrame);
243 	return true;
244 }
245 
StopThread()246 void GLRenderManager::StopThread() {
247 	// Since we don't control the thread directly, this will only pause the thread.
248 
249 	if (run_) {
250 		run_ = false;
251 		for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
252 			auto &frameData = frameData_[i];
253 			{
254 				std::unique_lock<std::mutex> lock(frameData.push_mutex);
255 				frameData.push_condVar.notify_all();
256 			}
257 			{
258 				std::unique_lock<std::mutex> lock(frameData.pull_mutex);
259 				frameData.pull_condVar.notify_all();
260 			}
261 		}
262 
263 		// Wait until we've definitely stopped the threadframe.
264 		std::unique_lock<std::mutex> lock(mutex_);
265 
266 		INFO_LOG(G3D, "GL submission thread paused. Frame=%d", curFrame_);
267 
268 		// Eat whatever has been queued up for this frame if anything.
269 		Wipe();
270 
271 		// Wait for any fences to finish and be resignaled, so we don't have sync issues.
272 		// Also clean out any queued data, which might refer to things that might not be valid
273 		// when we restart...
274 		for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
275 			auto &frameData = frameData_[i];
276 			std::unique_lock<std::mutex> lock(frameData.push_mutex);
277 			if (frameData.readyForRun || frameData.steps.size() != 0) {
278 				Crash();
279 			}
280 			frameData.readyForRun = false;
281 			frameData.readyForSubmit = false;
282 			for (size_t i = 0; i < frameData.steps.size(); i++) {
283 				delete frameData.steps[i];
284 			}
285 			frameData.steps.clear();
286 			frameData.initSteps.clear();
287 
288 			while (!frameData.readyForFence) {
289 				VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i);
290 				frameData.push_condVar.wait(lock);
291 			}
292 		}
293 	} else {
294 		INFO_LOG(G3D, "GL submission thread was already paused.");
295 	}
296 }
297 
BindFramebufferAsRenderTarget(GLRFramebuffer * fb,GLRRenderPassAction color,GLRRenderPassAction depth,GLRRenderPassAction stencil,uint32_t clearColor,float clearDepth,uint8_t clearStencil,const char * tag)298 void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
299 	_assert_(insideFrame_);
300 #ifdef _DEBUG
301 	curProgram_ = nullptr;
302 #endif
303 	// Eliminate dupes.
304 	if (steps_.size() && steps_.back()->render.framebuffer == fb && steps_.back()->stepType == GLRStepType::RENDER) {
305 		if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) {
306 			// We don't move to a new step, this bind was unnecessary and we can safely skip it.
307 			curRenderStep_ = steps_.back();
308 			return;
309 		}
310 	}
311 	if (curRenderStep_ && curRenderStep_->commands.size() == 0) {
312 		VLOG("Empty render step. Usually happens after uploading pixels..");
313 	}
314 
315 	GLRStep *step = new GLRStep{ GLRStepType::RENDER };
316 	// This is what queues up new passes, and can end previous ones.
317 	step->render.framebuffer = fb;
318 	step->render.color = color;
319 	step->render.depth = depth;
320 	step->render.stencil = stencil;
321 	step->render.numDraws = 0;
322 	step->tag = tag;
323 	steps_.push_back(step);
324 
325 	GLuint clearMask = 0;
326 	GLRRenderData data;
327 	data.cmd = GLRRenderCommand::CLEAR;
328 	if (color == GLRRenderPassAction::CLEAR) {
329 		clearMask |= GL_COLOR_BUFFER_BIT;
330 		data.clear.clearColor = clearColor;
331 	}
332 	if (depth == GLRRenderPassAction::CLEAR) {
333 		clearMask |= GL_DEPTH_BUFFER_BIT;
334 		data.clear.clearZ = clearDepth;
335 	}
336 	if (stencil == GLRRenderPassAction::CLEAR) {
337 		clearMask |= GL_STENCIL_BUFFER_BIT;
338 		data.clear.clearStencil = clearStencil;
339 	}
340 	if (clearMask) {
341 		data.clear.scissorX = 0;
342 		data.clear.scissorY = 0;
343 		data.clear.scissorW = 0;
344 		data.clear.scissorH = 0;
345 		data.clear.clearMask = clearMask;
346 		data.clear.colorMask = 0xF;
347 		step->commands.push_back(data);
348 	}
349 	curRenderStep_ = step;
350 
351 	if (fb) {
352 		if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {
353 			step->dependencies.insert(fb);
354 		}
355 	}
356 }
357 
BindFramebufferAsTexture(GLRFramebuffer * fb,int binding,int aspectBit,int attachment)358 void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit, int attachment) {
359 	_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
360 	_dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS);
361 	GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE };
362 	data.bind_fb_texture.slot = binding;
363 	data.bind_fb_texture.framebuffer = fb;
364 	data.bind_fb_texture.aspect = aspectBit;
365 	curRenderStep_->commands.push_back(data);
366 	curRenderStep_->dependencies.insert(fb);
367 }
368 
CopyFramebuffer(GLRFramebuffer * src,GLRect2D srcRect,GLRFramebuffer * dst,GLOffset2D dstPos,int aspectMask,const char * tag)369 void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) {
370 	GLRStep *step = new GLRStep{ GLRStepType::COPY };
371 	step->copy.srcRect = srcRect;
372 	step->copy.dstPos = dstPos;
373 	step->copy.src = src;
374 	step->copy.dst = dst;
375 	step->copy.aspectMask = aspectMask;
376 	step->dependencies.insert(src);
377 	step->tag = tag;
378 	bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;
379 	if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)
380 		step->dependencies.insert(dst);
381 	steps_.push_back(step);
382 }
383 
BlitFramebuffer(GLRFramebuffer * src,GLRect2D srcRect,GLRFramebuffer * dst,GLRect2D dstRect,int aspectMask,bool filter,const char * tag)384 void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) {
385 	GLRStep *step = new GLRStep{ GLRStepType::BLIT };
386 	step->blit.srcRect = srcRect;
387 	step->blit.dstRect = dstRect;
388 	step->blit.src = src;
389 	step->blit.dst = dst;
390 	step->blit.aspectMask = aspectMask;
391 	step->blit.filter = filter;
392 	step->dependencies.insert(src);
393 	step->tag = tag;
394 	bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;
395 	if (!fillsDst)
396 		step->dependencies.insert(dst);
397 	steps_.push_back(step);
398 }
399 
CopyFramebufferToMemorySync(GLRFramebuffer * src,int aspectBits,int x,int y,int w,int h,Draw::DataFormat destFormat,uint8_t * pixels,int pixelStride,const char * tag)400 bool GLRenderManager::CopyFramebufferToMemorySync(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
401 	_assert_(pixels);
402 
403 	GLRStep *step = new GLRStep{ GLRStepType::READBACK };
404 	step->readback.src = src;
405 	step->readback.srcRect = { x, y, w, h };
406 	step->readback.aspectMask = aspectBits;
407 	step->readback.dstFormat = destFormat;
408 	step->dependencies.insert(src);
409 	step->tag = tag;
410 	steps_.push_back(step);
411 
412 	curRenderStep_ = nullptr;
413 	FlushSync();
414 
415 	Draw::DataFormat srcFormat;
416 	if (aspectBits & GL_COLOR_BUFFER_BIT) {
417 		srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;
418 	} else if (aspectBits & GL_STENCIL_BUFFER_BIT) {
419 		// Copies from stencil are always S8.
420 		srcFormat = Draw::DataFormat::S8;
421 	} else if (aspectBits & GL_DEPTH_BUFFER_BIT) {
422 		// TODO: Do this properly.
423 		srcFormat = Draw::DataFormat::D24_S8;
424 	} else {
425 		return false;
426 	}
427 	queueRunner_.CopyReadbackBuffer(w, h, srcFormat, destFormat, pixelStride, pixels);
428 	return true;
429 }
430 
CopyImageToMemorySync(GLRTexture * texture,int mipLevel,int x,int y,int w,int h,Draw::DataFormat destFormat,uint8_t * pixels,int pixelStride,const char * tag)431 void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
432 	_assert_(texture);
433 	_assert_(pixels);
434 	GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE };
435 	step->readback_image.texture = texture;
436 	step->readback_image.mipLevel = mipLevel;
437 	step->readback_image.srcRect = { x, y, w, h };
438 	step->tag = tag;
439 	steps_.push_back(step);
440 
441 	curRenderStep_ = nullptr;
442 	FlushSync();
443 
444 	queueRunner_.CopyReadbackBuffer(w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels);
445 }
446 
BeginFrame()447 void GLRenderManager::BeginFrame() {
448 	VLOG("BeginFrame");
449 
450 #ifdef _DEBUG
451 	curProgram_ = nullptr;
452 #endif
453 
454 	int curFrame = GetCurFrame();
455 	FrameData &frameData = frameData_[curFrame];
456 
457 	// Make sure the very last command buffer from the frame before the previous has been fully executed.
458 	{
459 		std::unique_lock<std::mutex> lock(frameData.push_mutex);
460 		while (!frameData.readyForFence) {
461 			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame);
462 			frameData.push_condVar.wait(lock);
463 		}
464 		frameData.readyForFence = false;
465 		frameData.readyForSubmit = true;
466 	}
467 
468 	VLOG("PUSH: Fencing %d", curFrame);
469 
470 	// glFenceSync(&frameData.fence...)
471 
472 	// Must be after the fence - this performs deletes.
473 	VLOG("PUSH: BeginFrame %d", curFrame);
474 	if (!run_) {
475 		WARN_LOG(G3D, "BeginFrame while !run_!");
476 	}
477 
478 	// vulkan_->BeginFrame();
479 	// In GL, we have to do deletes on the submission thread.
480 
481 	insideFrame_ = true;
482 	renderStepOffset_ = 0;
483 }
484 
Finish()485 void GLRenderManager::Finish() {
486 	curRenderStep_ = nullptr;
487 	int curFrame = GetCurFrame();
488 	FrameData &frameData = frameData_[curFrame];
489 	{
490 		std::unique_lock<std::mutex> lock(frameData.pull_mutex);
491 		VLOG("PUSH: Frame[%d].readyForRun = true, notifying pull", curFrame);
492 		frameData.steps = std::move(steps_);
493 		steps_.clear();
494 		frameData.initSteps = std::move(initSteps_);
495 		initSteps_.clear();
496 		frameData.readyForRun = true;
497 		frameData.type = GLRRunType::END;
498 		frameData_[curFrame_].deleter.Take(deleter_);
499 	}
500 
501 	// Notify calls do not in fact need to be done with the mutex locked.
502 	frameData.pull_condVar.notify_all();
503 
504 	curFrame_++;
505 	if (curFrame_ >= inflightFrames_)
506 		curFrame_ = 0;
507 
508 	insideFrame_ = false;
509 }
510 
BeginSubmitFrame(int frame)511 void GLRenderManager::BeginSubmitFrame(int frame) {
512 	FrameData &frameData = frameData_[frame];
513 	if (!frameData.hasBegun) {
514 		frameData.hasBegun = true;
515 	}
516 }
517 
518 // Render thread
Submit(int frame,bool triggerFence)519 void GLRenderManager::Submit(int frame, bool triggerFence) {
520 	FrameData &frameData = frameData_[frame];
521 
522 	// In GL, submission happens automatically in Run().
523 
524 	// When !triggerFence, we notify after syncing with Vulkan.
525 
526 	if (triggerFence) {
527 		VLOG("PULL: Frame %d.readyForFence = true", frame);
528 
529 		std::unique_lock<std::mutex> lock(frameData.push_mutex);
530 		_assert_(frameData.readyForSubmit);
531 		frameData.readyForFence = true;
532 		frameData.readyForSubmit = false;
533 		frameData.push_condVar.notify_all();
534 	}
535 }
536 
537 // Render thread
EndSubmitFrame(int frame)538 void GLRenderManager::EndSubmitFrame(int frame) {
539 	FrameData &frameData = frameData_[frame];
540 	frameData.hasBegun = false;
541 
542 	Submit(frame, true);
543 
544 	if (!frameData.skipSwap) {
545 		if (swapIntervalChanged_) {
546 			swapIntervalChanged_ = false;
547 			if (swapIntervalFunction_) {
548 				swapIntervalFunction_(swapInterval_);
549 			}
550 		}
551 		if (swapFunction_) {
552 			swapFunction_();
553 		}
554 	} else {
555 		frameData.skipSwap = false;
556 	}
557 }
558 
559 // Render thread
Run(int frame)560 void GLRenderManager::Run(int frame) {
561 	BeginSubmitFrame(frame);
562 
563 	FrameData &frameData = frameData_[frame];
564 
565 	auto &stepsOnThread = frameData_[frame].steps;
566 	auto &initStepsOnThread = frameData_[frame].initSteps;
567 	// queueRunner_.LogSteps(stepsOnThread);
568 	queueRunner_.RunInitSteps(initStepsOnThread, skipGLCalls_);
569 	initStepsOnThread.clear();
570 
571 	// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
572 	if (!skipGLCalls_) {
573 		for (auto iter : frameData.activePushBuffers) {
574 			iter->Flush();
575 			iter->UnmapDevice();
576 		}
577 	}
578 
579 	queueRunner_.RunSteps(stepsOnThread, skipGLCalls_);
580 	stepsOnThread.clear();
581 
582 	if (!skipGLCalls_) {
583 		for (auto iter : frameData.activePushBuffers) {
584 			iter->MapDevice(bufferStrategy_);
585 		}
586 	}
587 
588 	switch (frameData.type) {
589 	case GLRRunType::END:
590 		EndSubmitFrame(frame);
591 		break;
592 
593 	case GLRRunType::SYNC:
594 		EndSyncFrame(frame);
595 		break;
596 
597 	default:
598 		_assert_(false);
599 	}
600 
601 	VLOG("PULL: Finished running frame %d", frame);
602 }
603 
FlushSync()604 void GLRenderManager::FlushSync() {
605 	// TODO: Reset curRenderStep_?
606 	renderStepOffset_ += (int)steps_.size();
607 
608 	int curFrame = curFrame_;
609 	FrameData &frameData = frameData_[curFrame];
610 	{
611 		std::unique_lock<std::mutex> lock(frameData.pull_mutex);
612 		VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame);
613 		frameData.initSteps = std::move(initSteps_);
614 		initSteps_.clear();
615 		frameData.steps = std::move(steps_);
616 		steps_.clear();
617 		frameData.readyForRun = true;
618 		_assert_(frameData.readyForFence == false);
619 		frameData.type = GLRRunType::SYNC;
620 		frameData.pull_condVar.notify_all();
621 	}
622 	{
623 		std::unique_lock<std::mutex> lock(frameData.push_mutex);
624 		// Wait for the flush to be hit, since we're syncing.
625 		while (!frameData.readyForFence) {
626 			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame);
627 			frameData.push_condVar.wait(lock);
628 		}
629 		frameData.readyForFence = false;
630 		frameData.readyForSubmit = true;
631 	}
632 }
633 
634 // Render thread
EndSyncFrame(int frame)635 void GLRenderManager::EndSyncFrame(int frame) {
636 	FrameData &frameData = frameData_[frame];
637 	Submit(frame, false);
638 
639 	// glFinish is not actually necessary here, and won't be until we start using
640 	// glBufferStorage. Then we need to use fences.
641 	// glFinish();
642 
643 	// At this point we can resume filling the command buffers for the current frame since
644 	// we know the device is idle - and thus all previously enqueued command buffers have been processed.
645 	// No need to switch to the next frame number.
646 
647 	{
648 		std::unique_lock<std::mutex> lock(frameData.push_mutex);
649 		frameData.readyForFence = true;
650 		frameData.readyForSubmit = true;
651 		frameData.push_condVar.notify_all();
652 	}
653 }
654 
Wipe()655 void GLRenderManager::Wipe() {
656 	initSteps_.clear();
657 	for (auto step : steps_) {
658 		delete step;
659 	}
660 	steps_.clear();
661 }
662 
WaitUntilQueueIdle()663 void GLRenderManager::WaitUntilQueueIdle() {
664 	// Just wait for all frames to be ready.
665 	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
666 		FrameData &frameData = frameData_[i];
667 
668 		std::unique_lock<std::mutex> lock(frameData.push_mutex);
669 		// Ignore unsubmitted frames.
670 		while (!frameData.readyForFence && frameData.readyForRun) {
671 			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (wait idle)", i);
672 			frameData.push_condVar.wait(lock);
673 		}
674 	}
675 }
676 
GLPushBuffer(GLRenderManager * render,GLuint target,size_t size)677 GLPushBuffer::GLPushBuffer(GLRenderManager *render, GLuint target, size_t size) : render_(render), size_(size), target_(target) {
678 	bool res = AddBuffer();
679 	_assert_(res);
680 }
681 
~GLPushBuffer()682 GLPushBuffer::~GLPushBuffer() {
683 	Destroy(true);
684 }
685 
Map()686 void GLPushBuffer::Map() {
687 	_assert_(!writePtr_);
688 	auto &info = buffers_[buf_];
689 	writePtr_ = info.deviceMemory ? info.deviceMemory : info.localMemory;
690 	info.flushOffset = 0;
691 	// Force alignment.  This is needed for PushAligned() to work as expected.
692 	while ((intptr_t)writePtr_ & 15) {
693 		writePtr_++;
694 		offset_++;
695 		info.flushOffset++;
696 	}
697 	_assert_(writePtr_);
698 }
699 
Unmap()700 void GLPushBuffer::Unmap() {
701 	_assert_(writePtr_);
702 	if (!buffers_[buf_].deviceMemory) {
703 		// Here we simply upload the data to the last buffer.
704 		// Might be worth trying with size_ instead of offset_, so the driver can replace
705 		// the whole buffer. At least if it's close.
706 		render_->BufferSubdata(buffers_[buf_].buffer, 0, offset_, buffers_[buf_].localMemory, false);
707 	} else {
708 		buffers_[buf_].flushOffset = offset_;
709 	}
710 	writePtr_ = nullptr;
711 }
712 
Flush()713 void GLPushBuffer::Flush() {
714 	// Must be called from the render thread.
715 	_dbg_assert_(OnRenderThread());
716 
717 	buffers_[buf_].flushOffset = offset_;
718 	if (!buffers_[buf_].deviceMemory && writePtr_) {
719 		auto &info = buffers_[buf_];
720 		if (info.flushOffset != 0) {
721 			_assert_(info.buffer->buffer_);
722 			glBindBuffer(target_, info.buffer->buffer_);
723 			glBufferSubData(target_, 0, info.flushOffset, info.localMemory);
724 		}
725 
726 		// Here we will submit all the draw calls, with the already known buffer and offsets.
727 		// Might as well reset the write pointer here and start over the current buffer.
728 		writePtr_ = info.localMemory;
729 		offset_ = 0;
730 		info.flushOffset = 0;
731 	}
732 
733 	// For device memory, we flush all buffers here.
734 	if ((strategy_ & GLBufferStrategy::MASK_FLUSH) != 0) {
735 		for (auto &info : buffers_) {
736 			if (info.flushOffset == 0 || !info.deviceMemory)
737 				continue;
738 
739 			glBindBuffer(target_, info.buffer->buffer_);
740 			glFlushMappedBufferRange(target_, 0, info.flushOffset);
741 			info.flushOffset = 0;
742 		}
743 	}
744 }
745 
AddBuffer()746 bool GLPushBuffer::AddBuffer() {
747 	BufInfo info;
748 	info.localMemory = (uint8_t *)AllocateAlignedMemory(size_, 16);
749 	if (!info.localMemory)
750 		return false;
751 	info.buffer = render_->CreateBuffer(target_, size_, GL_DYNAMIC_DRAW);
752 	buf_ = buffers_.size();
753 	buffers_.push_back(info);
754 	return true;
755 }
756 
Destroy(bool onRenderThread)757 void GLPushBuffer::Destroy(bool onRenderThread) {
758 	if (buf_ == -1)
759 		return;  // Already destroyed
760 	for (BufInfo &info : buffers_) {
761 		// This will automatically unmap device memory, if needed.
762 		// NOTE: We immediately delete the buffer, don't go through the deleter, if we're on the render thread.
763 		if (onRenderThread) {
764 			delete info.buffer;
765 		} else {
766 			render_->DeleteBuffer(info.buffer);
767 		}
768 
769 		FreeAlignedMemory(info.localMemory);
770 	}
771 	buffers_.clear();
772 	buf_ = -1;
773 }
774 
NextBuffer(size_t minSize)775 void GLPushBuffer::NextBuffer(size_t minSize) {
776 	// First, unmap the current memory.
777 	Unmap();
778 
779 	buf_++;
780 	if (buf_ >= buffers_.size() || minSize > size_) {
781 		// Before creating the buffer, adjust to the new size_ if necessary.
782 		while (size_ < minSize) {
783 			size_ <<= 1;
784 		}
785 
786 		bool res = AddBuffer();
787 		_assert_(res);
788 		if (!res) {
789 			// Let's try not to crash at least?
790 			buf_ = 0;
791 		}
792 	}
793 
794 	// Now, move to the next buffer and map it.
795 	offset_ = 0;
796 	Map();
797 }
798 
Defragment()799 void GLPushBuffer::Defragment() {
800 	_dbg_assert_msg_(!OnRenderThread(), "Defragment must not run on the render thread");
801 
802 	if (buffers_.size() <= 1) {
803 		// Let's take this chance to jetison localMemory we don't need.
804 		for (auto &info : buffers_) {
805 			if (info.deviceMemory) {
806 				FreeAlignedMemory(info.localMemory);
807 				info.localMemory = nullptr;
808 			}
809 		}
810 
811 		return;
812 	}
813 
814 	// Okay, we have more than one.  Destroy them all and start over with a larger one.
815 	size_t newSize = size_ * buffers_.size();
816 	Destroy(false);
817 
818 	size_ = newSize;
819 	bool res = AddBuffer();
820 	_assert_msg_(res, "AddBuffer failed");
821 }
822 
GetTotalSize() const823 size_t GLPushBuffer::GetTotalSize() const {
824 	size_t sum = 0;
825 	if (buffers_.size() > 1)
826 		sum += size_ * (buffers_.size() - 1);
827 	sum += offset_;
828 	return sum;
829 }
830 
MapDevice(GLBufferStrategy strategy)831 void GLPushBuffer::MapDevice(GLBufferStrategy strategy) {
832 	_dbg_assert_msg_(OnRenderThread(), "MapDevice must run on render thread");
833 
834 	strategy_ = strategy;
835 	if (strategy_ == GLBufferStrategy::SUBDATA) {
836 		return;
837 	}
838 
839 	bool mapChanged = false;
840 	for (auto &info : buffers_) {
841 		if (!info.buffer->buffer_ || info.deviceMemory) {
842 			// Can't map - no device buffer associated yet or already mapped.
843 			continue;
844 		}
845 
846 		info.deviceMemory = (uint8_t *)info.buffer->Map(strategy_);
847 		mapChanged = mapChanged || info.deviceMemory != nullptr;
848 
849 		if (!info.deviceMemory && !info.localMemory) {
850 			// Somehow it failed, let's dodge crashing.
851 			info.localMemory = (uint8_t *)AllocateAlignedMemory(info.buffer->size_, 16);
852 			mapChanged = true;
853 		}
854 
855 		_dbg_assert_msg_(info.localMemory || info.deviceMemory, "Local or device memory must succeed");
856 	}
857 
858 	if (writePtr_ && mapChanged) {
859 		// This can happen during a sync.  Remap.
860 		writePtr_ = nullptr;
861 		Map();
862 	}
863 }
864 
UnmapDevice()865 void GLPushBuffer::UnmapDevice() {
866 	_dbg_assert_msg_(OnRenderThread(), "UnmapDevice must run on render thread");
867 
868 	for (auto &info : buffers_) {
869 		if (info.deviceMemory) {
870 			// TODO: Technically this can return false?
871 			info.buffer->Unmap();
872 			info.deviceMemory = nullptr;
873 		}
874 	}
875 }
876 
Map(GLBufferStrategy strategy)877 void *GLRBuffer::Map(GLBufferStrategy strategy) {
878 	_assert_(buffer_ != 0);
879 
880 	GLbitfield access = GL_MAP_WRITE_BIT;
881 	if ((strategy & GLBufferStrategy::MASK_FLUSH) != 0) {
882 		access |= GL_MAP_FLUSH_EXPLICIT_BIT;
883 	}
884 	if ((strategy & GLBufferStrategy::MASK_INVALIDATE) != 0) {
885 		access |= GL_MAP_INVALIDATE_BUFFER_BIT;
886 	}
887 
888 	void *p = nullptr;
889 	bool allowNativeBuffer = strategy != GLBufferStrategy::SUBDATA;
890 	if (allowNativeBuffer) {
891 		glBindBuffer(target_, buffer_);
892 
893 		if (gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage) {
894 #if !PPSSPP_PLATFORM(IOS)
895 			if (!hasStorage_) {
896 				GLbitfield storageFlags = access & ~(GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT);
897 #ifdef USING_GLES2
898 #ifdef GL_EXT_buffer_storage
899 				glBufferStorageEXT(target_, size_, nullptr, storageFlags);
900 #endif
901 #else
902 				glBufferStorage(target_, size_, nullptr, storageFlags);
903 #endif
904 				hasStorage_ = true;
905 			}
906 #endif
907 			p = glMapBufferRange(target_, 0, size_, access);
908 		} else if (gl_extensions.VersionGEThan(3, 0, 0)) {
909 			// GLES3 or desktop 3.
910 			p = glMapBufferRange(target_, 0, size_, access);
911 		} else if (!gl_extensions.IsGLES) {
912 #ifndef USING_GLES2
913 			p = glMapBuffer(target_, GL_READ_WRITE);
914 #endif
915 		}
916 	}
917 
918 	mapped_ = p != nullptr;
919 	return p;
920 }
921 
Unmap()922 bool GLRBuffer::Unmap() {
923 	glBindBuffer(target_, buffer_);
924 	mapped_ = false;
925 	return glUnmapBuffer(target_) == GL_TRUE;
926 }
927