1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include <algorithm>
19 #include <sstream>
20 #include <cmath>
21 
22 #include "Common/GPU/thin3d.h"
23 #include "Common/GPU/OpenGL/GLFeatures.h"
24 #include "Common/Data/Convert/ColorConv.h"
25 #include "Common/Data/Text/I18n.h"
26 #include "Common/Common.h"
27 #include "Core/Config.h"
28 #include "Core/ConfigValues.h"
29 #include "Core/Core.h"
30 #include "Core/CoreParameter.h"
31 #include "Core/Debugger/MemBlockInfo.h"
32 #include "Core/Host.h"
33 #include "Core/MIPS/MIPS.h"
34 #include "Core/Reporting.h"
35 #include "GPU/Common/DrawEngineCommon.h"
36 #include "GPU/Common/FramebufferManagerCommon.h"
37 #include "GPU/Common/PostShader.h"
38 #include "GPU/Common/PresentationCommon.h"
39 #include "GPU/Common/TextureCacheCommon.h"
40 #include "GPU/Common/ReinterpretFramebuffer.h"
41 #include "GPU/Debugger/Record.h"
42 #include "GPU/Debugger/Stepping.h"
43 #include "GPU/GPUInterface.h"
44 #include "GPU/GPUState.h"
45 
FramebufferManagerCommon(Draw::DrawContext * draw)46 FramebufferManagerCommon::FramebufferManagerCommon(Draw::DrawContext *draw)
47 	: draw_(draw),
48 		displayFormat_(GE_FORMAT_565) {
49 	presentation_ = new PresentationCommon(draw);
50 }
51 
~FramebufferManagerCommon()52 FramebufferManagerCommon::~FramebufferManagerCommon() {
53 	DeviceLost();
54 
55 	DecimateFBOs();
56 	for (auto vfb : vfbs_) {
57 		DestroyFramebuf(vfb);
58 	}
59 	vfbs_.clear();
60 
61 	for (auto &tempFB : tempFBOs_) {
62 		tempFB.second.fbo->Release();
63 	}
64 	tempFBOs_.clear();
65 
66 	// Do the same for ReadFramebuffersToMemory's VFBs
67 	for (auto vfb : bvfbs_) {
68 		DestroyFramebuf(vfb);
69 	}
70 	bvfbs_.clear();
71 
72 	delete presentation_;
73 }
74 
Init()75 void FramebufferManagerCommon::Init() {
76 	// We may need to override the render size if the shader is upscaling or SSAA.
77 	Resized();
78 }
79 
UpdateSize()80 bool FramebufferManagerCommon::UpdateSize() {
81 	const bool newRender = renderWidth_ != (float)PSP_CoreParameter().renderWidth || renderHeight_ != (float)PSP_CoreParameter().renderHeight;
82 	const bool newSettings = bloomHack_ != g_Config.iBloomHack || useBufferedRendering_ != (g_Config.iRenderingMode != FB_NON_BUFFERED_MODE);
83 
84 	renderWidth_ = (float)PSP_CoreParameter().renderWidth;
85 	renderHeight_ = (float)PSP_CoreParameter().renderHeight;
86 	renderScaleFactor_ = (float)PSP_CoreParameter().renderScaleFactor;
87 	pixelWidth_ = PSP_CoreParameter().pixelWidth;
88 	pixelHeight_ = PSP_CoreParameter().pixelHeight;
89 	bloomHack_ = g_Config.iBloomHack;
90 	useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
91 
92 	presentation_->UpdateSize(pixelWidth_, pixelHeight_, renderWidth_, renderHeight_);
93 
94 	return newRender || newSettings;
95 }
96 
BeginFrame()97 void FramebufferManagerCommon::BeginFrame() {
98 	DecimateFBOs();
99 	currentRenderVfb_ = nullptr;
100 }
101 
SetDisplayFramebuffer(u32 framebuf,u32 stride,GEBufferFormat format)102 void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
103 	displayFramebufPtr_ = framebuf;
104 	displayStride_ = stride;
105 	displayFormat_ = format;
106 	GPURecord::NotifyDisplay(framebuf, stride, format);
107 }
108 
GetVFBAt(u32 addr)109 VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) {
110 	addr &= 0x3FFFFFFF;
111 	VirtualFramebuffer *match = nullptr;
112 	for (size_t i = 0; i < vfbs_.size(); ++i) {
113 		VirtualFramebuffer *v = vfbs_[i];
114 		if (v->fb_address == addr) {
115 			// Could check w too but whatever
116 			if (match == nullptr || match->last_frame_render < v->last_frame_render) {
117 				match = v;
118 			}
119 		}
120 	}
121 	return match;
122 }
123 
ColorBufferByteSize(const VirtualFramebuffer * vfb) const124 u32 FramebufferManagerCommon::ColorBufferByteSize(const VirtualFramebuffer *vfb) const {
125 	return vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2);
126 }
127 
ShouldDownloadFramebuffer(const VirtualFramebuffer * vfb) const128 bool FramebufferManagerCommon::ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const {
129 	return PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000;
130 }
131 
132 // Heuristics to figure out the size of FBO to create.
133 // TODO: Possibly differentiate on whether through mode is used (since in through mode, viewport is meaningless?)
EstimateDrawingSize(u32 fb_address,GEBufferFormat fb_format,int viewport_width,int viewport_height,int region_width,int region_height,int scissor_width,int scissor_height,int fb_stride,int & drawing_width,int & drawing_height)134 void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int fb_stride, int &drawing_width, int &drawing_height) {
135 	static const int MAX_FRAMEBUF_HEIGHT = 512;
136 
137 	// Games don't always set any of these.  Take the greatest parameter that looks valid based on stride.
138 	if (viewport_width > 4 && viewport_width <= fb_stride && viewport_height > 0) {
139 		drawing_width = viewport_width;
140 		drawing_height = viewport_height;
141 		// Some games specify a viewport with 0.5, but don't have VRAM for 273.  480x272 is the buffer size.
142 		if (viewport_width == 481 && region_width == 480 && viewport_height == 273 && region_height == 272) {
143 			drawing_width = 480;
144 			drawing_height = 272;
145 		}
146 		// Sometimes region is set larger than the VRAM for the framebuffer.
147 		// However, in one game it's correctly set as a larger height (see #7277) with the same width.
148 		// A bit of a hack, but we try to handle that unusual case here.
149 		if (region_width <= fb_stride && (region_width > drawing_width || (region_width == drawing_width && region_height > drawing_height)) && region_height <= MAX_FRAMEBUF_HEIGHT) {
150 			drawing_width = region_width;
151 			drawing_height = std::max(drawing_height, region_height);
152 		}
153 		// Scissor is often set to a subsection of the framebuffer, so we pay the least attention to it.
154 		if (scissor_width <= fb_stride && scissor_width > drawing_width && scissor_height <= MAX_FRAMEBUF_HEIGHT) {
155 			drawing_width = scissor_width;
156 			drawing_height = std::max(drawing_height, scissor_height);
157 		}
158 	} else {
159 		// If viewport wasn't valid, let's just take the greatest anything regardless of stride.
160 		drawing_width = std::min(std::max(region_width, scissor_width), fb_stride);
161 		drawing_height = std::max(region_height, scissor_height);
162 	}
163 
164 	if (scissor_width == 481 && region_width == 480 && scissor_height == 273 && region_height == 272) {
165 		drawing_width = 480;
166 		drawing_height = 272;
167 	}
168 
169 	// Assume no buffer is > 512 tall, it couldn't be textured or displayed fully if so.
170 	if (drawing_height >= MAX_FRAMEBUF_HEIGHT) {
171 		if (region_height < MAX_FRAMEBUF_HEIGHT) {
172 			drawing_height = region_height;
173 		} else if (scissor_height < MAX_FRAMEBUF_HEIGHT) {
174 			drawing_height = scissor_height;
175 		}
176 	}
177 
178 	if (viewport_width != region_width) {
179 		// The majority of the time, these are equal.  If not, let's check what we know.
180 		u32 nearest_address = 0xFFFFFFFF;
181 		for (size_t i = 0; i < vfbs_.size(); ++i) {
182 			const u32 other_address = vfbs_[i]->fb_address & 0x3FFFFFFF;
183 			if (other_address > fb_address && other_address < nearest_address) {
184 				nearest_address = other_address;
185 			}
186 		}
187 
188 		// Unless the game is using overlapping buffers, the next buffer should be far enough away.
189 		// This catches some cases where we can know this.
190 		// Hmm.  The problem is that we could only catch it for the first of two buffers...
191 		const u32 bpp = fb_format == GE_FORMAT_8888 ? 4 : 2;
192 		int avail_height = (nearest_address - fb_address) / (fb_stride * bpp);
193 		if (avail_height < drawing_height && avail_height == region_height) {
194 			drawing_width = std::min(region_width, fb_stride);
195 			drawing_height = avail_height;
196 		}
197 
198 		// Some games draw buffers interleaved, with a high stride/region/scissor but default viewport.
199 		if (fb_stride == 1024 && region_width == 1024 && scissor_width == 1024) {
200 			drawing_width = 1024;
201 		}
202 	}
203 
204 	DEBUG_LOG(G3D, "Est: %08x V: %ix%i, R: %ix%i, S: %ix%i, STR: %i, THR:%i, Z:%08x = %ix%i", fb_address, viewport_width,viewport_height, region_width, region_height, scissor_width, scissor_height, fb_stride, gstate.isModeThrough(), gstate.isDepthWriteEnabled() ? gstate.getDepthBufAddress() : 0, drawing_width, drawing_height);
205 }
206 
GetFramebufferHeuristicInputs(FramebufferHeuristicParams * params,const GPUgstate & gstate)207 void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPUgstate &gstate) {
208 	params->fb_address = (gstate.getFrameBufRawAddress() & 0x3FFFFFFF) | 0x04000000;  // GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM.
209 	params->fb_stride = gstate.FrameBufStride();
210 
211 	params->z_address = (gstate.getDepthBufRawAddress() & 0x3FFFFFFF) | 0x04000000;
212 	params->z_stride = gstate.DepthBufStride();
213 
214 	if (params->z_address == params->fb_address) {
215 		// Probably indicates that the game doesn't care about Z for this VFB.
216 		// Let's avoid matching it for Z copies and other shenanigans.
217 		params->z_address = 0;
218 		params->z_stride = 0;
219 	}
220 
221 	params->fmt = gstate.FrameBufFormat();
222 
223 	params->isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
224 	// Technically, it may write depth later, but we're trying to detect it only when it's really true.
225 	if (gstate.isModeClear()) {
226 		// Not quite seeing how this makes sense..
227 		params->isWritingDepth = !gstate.isClearModeDepthMask() && gstate.isDepthWriteEnabled();
228 	} else {
229 		params->isWritingDepth = gstate.isDepthWriteEnabled();
230 	}
231 	params->isDrawing = !gstate.isModeClear() || !gstate.isClearModeColorMask() || !gstate.isClearModeAlphaMask();
232 	params->isModeThrough = gstate.isModeThrough();
233 
234 	// Viewport-X1 and Y1 are not the upper left corner, but half the width/height. A bit confusing.
235 	float vpx = gstate.getViewportXScale();
236 	float vpy = gstate.getViewportYScale();
237 
238 	// Work around problem in F1 Grand Prix, where it draws in through mode with a bogus viewport.
239 	// We set bad values to 0 which causes the framebuffer size heuristic to rely on the other parameters instead.
240 	if (std::isnan(vpx) || vpx > 10000000.0f) {
241 		vpx = 0.f;
242 	}
243 	if (std::isnan(vpy) || vpy > 10000000.0f) {
244 		vpy = 0.f;
245 	}
246 	params->viewportWidth = (int)(fabsf(vpx) * 2.0f);
247 	params->viewportHeight = (int)(fabsf(vpy) * 2.0f);
248 	params->regionWidth = gstate.getRegionX2() + 1;
249 	params->regionHeight = gstate.getRegionY2() + 1;
250 	params->scissorWidth = gstate.getScissorX2() + 1;
251 	params->scissorHeight = gstate.getScissorY2() + 1;
252 }
253 
DoSetRenderFrameBuffer(const FramebufferHeuristicParams & params,u32 skipDrawReason)254 VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const FramebufferHeuristicParams &params, u32 skipDrawReason) {
255 	gstate_c.Clean(DIRTY_FRAMEBUF);
256 
257 	// Collect all parameters. This whole function has really become a cesspool of heuristics...
258 	// but it appears that's what it takes, unless we emulate VRAM layout more accurately somehow.
259 
260 	// As there are no clear "framebuffer width" and "framebuffer height" registers,
261 	// we need to infer the size of the current framebuffer somehow.
262 	int drawing_width, drawing_height;
263 	EstimateDrawingSize(params.fb_address, params.fmt, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorWidth, params.scissorHeight, std::max(params.fb_stride, 4), drawing_width, drawing_height);
264 
265 	gstate_c.SetCurRTOffsetX(0);
266 	bool vfbFormatChanged = false;
267 
268 	// Find a matching framebuffer
269 	VirtualFramebuffer *vfb = nullptr;
270 	for (size_t i = 0; i < vfbs_.size(); ++i) {
271 		VirtualFramebuffer *v = vfbs_[i];
272 		if (v->fb_address == params.fb_address) {
273 			vfb = v;
274 			// Update fb stride in case it changed
275 			if (vfb->fb_stride != params.fb_stride) {
276 				vfb->fb_stride = params.fb_stride;
277 				vfbFormatChanged = true;
278 			}
279 			if (vfb->format != params.fmt) {
280 				vfb->format = params.fmt;
281 				vfbFormatChanged = true;
282 			}
283 
284 			if (vfb->z_address == 0 && vfb->z_stride == 0 && params.z_stride != 0) {
285 				// Got one that was created by CreateRAMFramebuffer. Since it has no depth buffer,
286 				// we just recreate it immediately.
287 				ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
288 			}
289 
290 			// Keep track, but this isn't really used.
291 			vfb->z_stride = params.z_stride;
292 			// Heuristic: In throughmode, a higher height could be used.  Let's avoid shrinking the buffer.
293 			if (params.isModeThrough && (int)vfb->width <= params.fb_stride) {
294 				vfb->width = std::max((int)vfb->width, drawing_width);
295 				vfb->height = std::max((int)vfb->height, drawing_height);
296 			} else {
297 				vfb->width = drawing_width;
298 				vfb->height = drawing_height;
299 			}
300 			break;
301 		} else if (v->fb_address < params.fb_address && v->fb_address + v->fb_stride * 4 > params.fb_address) {
302 			// Possibly a render-to-offset.
303 			const u32 bpp = v->format == GE_FORMAT_8888 ? 4 : 2;
304 			const int x_offset = (params.fb_address - v->fb_address) / bpp;
305 			if (v->format == params.fmt && v->fb_stride == params.fb_stride && x_offset < params.fb_stride && v->height >= drawing_height) {
306 				WARN_LOG_REPORT_ONCE(renderoffset, HLE, "Rendering to framebuffer offset: %08x +%dx%d", v->fb_address, x_offset, 0);
307 				vfb = v;
308 				gstate_c.SetCurRTOffsetX(x_offset);
309 				vfb->width = std::max((int)vfb->width, x_offset + drawing_width);
310 				// To prevent the newSize code from being confused.
311 				drawing_width += x_offset;
312 				break;
313 			}
314 		}
315 	}
316 
317 	if (vfb) {
318 		if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
319 			// Even if it's not newly wrong, if this is larger we need to resize up.
320 			if (vfb->width > vfb->bufferWidth || vfb->height > vfb->bufferHeight) {
321 				ResizeFramebufFBO(vfb, vfb->width, vfb->height);
322 			} else if (vfb->newWidth != drawing_width || vfb->newHeight != drawing_height) {
323 				// If it's newly wrong, or changing every frame, just keep track.
324 				vfb->newWidth = drawing_width;
325 				vfb->newHeight = drawing_height;
326 				vfb->lastFrameNewSize = gpuStats.numFlips;
327 			} else if (vfb->lastFrameNewSize + FBO_OLD_AGE < gpuStats.numFlips) {
328 				// Okay, it's changed for a while (and stayed that way.)  Let's start over.
329 				// But only if we really need to, to avoid blinking.
330 				bool needsRecreate = vfb->bufferWidth > params.fb_stride;
331 				needsRecreate = needsRecreate || vfb->newWidth > vfb->bufferWidth || vfb->newWidth * 2 < vfb->bufferWidth;
332 				needsRecreate = needsRecreate || vfb->newHeight > vfb->bufferHeight || vfb->newHeight * 2 < vfb->bufferHeight;
333 				if (needsRecreate) {
334 					ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
335 					// Let's discard this information, might be wrong now.
336 					vfb->safeWidth = 0;
337 					vfb->safeHeight = 0;
338 				} else {
339 					// Even though we won't resize it, let's at least change the size params.
340 					vfb->width = drawing_width;
341 					vfb->height = drawing_height;
342 				}
343 			}
344 		} else {
345 			// It's not different, let's keep track of that too.
346 			vfb->lastFrameNewSize = gpuStats.numFlips;
347 		}
348 	}
349 
350 	// None found? Create one.
351 	if (!vfb) {
352 		vfb = new VirtualFramebuffer{};
353 		vfb->fbo = nullptr;
354 		vfb->fb_address = params.fb_address;
355 		vfb->fb_stride = params.fb_stride;
356 		vfb->z_address = params.z_address;
357 		vfb->z_stride = params.z_stride;
358 
359 		// The other width/height parameters are set in ResizeFramebufFBO below.
360 		vfb->width = drawing_width;
361 		vfb->height = drawing_height;
362 		vfb->newWidth = drawing_width;
363 		vfb->newHeight = drawing_height;
364 		vfb->lastFrameNewSize = gpuStats.numFlips;
365 		vfb->format = params.fmt;
366 		vfb->drawnFormat = params.fmt;
367 		vfb->usageFlags = FB_USAGE_RENDERTARGET;
368 
369 		u32 byteSize = ColorBufferByteSize(vfb);
370 		if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + byteSize > framebufRangeEnd_) {
371 			framebufRangeEnd_ = params.fb_address + byteSize;
372 		}
373 
374 		// This is where we actually create the framebuffer. The true is "force".
375 		ResizeFramebufFBO(vfb, drawing_width, drawing_height, true);
376 		NotifyRenderFramebufferCreated(vfb);
377 
378 		SetColorUpdated(vfb, skipDrawReason);
379 
380 		INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %i x %i x %i", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, vfb->format);
381 
382 		vfb->last_frame_render = gpuStats.numFlips;
383 		frameLastFramebufUsed_ = gpuStats.numFlips;
384 		vfbs_.push_back(vfb);
385 		currentRenderVfb_ = vfb;
386 
387 		if (useBufferedRendering_ && !g_Config.bDisableSlowFramebufEffects) {
388 			gpu->PerformMemoryUpload(params.fb_address, byteSize);
389 			NotifyStencilUpload(params.fb_address, byteSize, StencilUpload::STENCIL_IS_ZERO);
390 			// TODO: Is it worth trying to upload the depth buffer?
391 		}
392 
393 		// Let's check for depth buffer overlap.  Might be interesting.
394 		bool sharingReported = false;
395 		for (size_t i = 0, end = vfbs_.size(); i < end; ++i) {
396 			if (vfbs_[i]->z_stride != 0 && params.fb_address == vfbs_[i]->z_address) {
397 				// If it's clearing it, most likely it just needs more video memory.
398 				// Technically it could write something interesting and the other might not clear, but that's not likely.
399 				if (params.isDrawing) {
400 					if (params.fb_address != params.z_address && vfbs_[i]->fb_address != vfbs_[i]->z_address) {
401 						WARN_LOG_REPORT(SCEGE, "FBO created from existing depthbuffer as color, %08x/%08x and %08x/%08x", params.fb_address, params.z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address);
402 					}
403 				}
404 			} else if (params.z_stride != 0 && params.z_address == vfbs_[i]->fb_address) {
405 				// If it's clearing it, then it's probably just the reverse of the above case.
406 				if (params.isWritingDepth) {
407 					WARN_LOG_REPORT(SCEGE, "FBO using existing buffer as depthbuffer, %08x/%08x and %08x/%08x", params.fb_address, params.z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address);
408 				}
409 			} else if (vfbs_[i]->z_stride != 0 && params.z_address == vfbs_[i]->z_address && params.fb_address != vfbs_[i]->fb_address && !sharingReported) {
410 				// This happens a lot, but virtually always it's cleared.
411 				// It's possible the other might not clear, but when every game is reported it's not useful.
412 				if (params.isWritingDepth) {
413 					WARN_LOG(SCEGE, "FBO reusing depthbuffer, c=%08x/d=%08x and c=%08x/d=%08x", params.fb_address, params.z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address);
414 					sharingReported = true;
415 				}
416 			}
417 		}
418 
419 	// We already have it!
420 	} else if (vfb != currentRenderVfb_) {
421 		// Use it as a render target.
422 		DEBUG_LOG(FRAMEBUF, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
423 		vfb->usageFlags |= FB_USAGE_RENDERTARGET;
424 		vfb->last_frame_render = gpuStats.numFlips;
425 		frameLastFramebufUsed_ = gpuStats.numFlips;
426 		vfb->dirtyAfterDisplay = true;
427 		if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
428 			vfb->reallyDirtyAfterDisplay = true;
429 
430 		VirtualFramebuffer *prev = currentRenderVfb_;
431 		currentRenderVfb_ = vfb;
432 		NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth);
433 	} else {
434 		vfb->last_frame_render = gpuStats.numFlips;
435 		frameLastFramebufUsed_ = gpuStats.numFlips;
436 		vfb->dirtyAfterDisplay = true;
437 		if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
438 			vfb->reallyDirtyAfterDisplay = true;
439 
440 		NotifyRenderFramebufferUpdated(vfb, vfbFormatChanged);
441 	}
442 
443 	gstate_c.curRTWidth = vfb->width;
444 	gstate_c.curRTHeight = vfb->height;
445 	gstate_c.curRTRenderWidth = vfb->renderWidth;
446 	gstate_c.curRTRenderHeight = vfb->renderHeight;
447 	return vfb;
448 }
449 
DestroyFramebuf(VirtualFramebuffer * v)450 void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
451 	// Notify the texture cache of both the color and depth buffers.
452 	textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
453 	if (v->fbo) {
454 		v->fbo->Release();
455 		v->fbo = nullptr;
456 	}
457 
458 	// Wipe some pointers
459 	if (currentRenderVfb_ == v)
460 		currentRenderVfb_ = nullptr;
461 	if (displayFramebuf_ == v)
462 		displayFramebuf_ = nullptr;
463 	if (prevDisplayFramebuf_ == v)
464 		prevDisplayFramebuf_ = nullptr;
465 	if (prevPrevDisplayFramebuf_ == v)
466 		prevPrevDisplayFramebuf_ = nullptr;
467 
468 	delete v;
469 }
470 
BlitFramebufferDepth(VirtualFramebuffer * src,VirtualFramebuffer * dst)471 void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst) {
472 	int w = std::min(src->renderWidth, dst->renderWidth);
473 	int h = std::min(src->renderHeight, dst->renderHeight);
474 
475 	// Note: We prefer Blit ahead of Copy here, since at least on GL, Copy will always also copy stencil which we don't want. See #9740.
476 	if (gstate_c.Supports(GPU_SUPPORTS_FRAMEBUFFER_BLIT_TO_DEPTH)) {
477 		draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth");
478 		RebindFramebuffer("After BlitFramebufferDepth");
479 	} else if (gstate_c.Supports(GPU_SUPPORTS_COPY_IMAGE)) {
480 		draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, w, h, 1, Draw::FB_DEPTH_BIT, "BlitFramebufferDepth");
481 		RebindFramebuffer("After BlitFramebufferDepth");
482 	}
483 	dst->last_frame_depth_updated = gpuStats.numFlips;
484 }
485 
NotifyRenderFramebufferCreated(VirtualFramebuffer * vfb)486 void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) {
487 	if (!useBufferedRendering_) {
488 		// Let's ignore rendering to targets that have not (yet) been displayed.
489 		gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
490 	} else if (currentRenderVfb_) {
491 		DownloadFramebufferOnSwitch(currentRenderVfb_);
492 	}
493 
494 	textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
495 
496 	// Ugly...
497 	if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
498 		gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
499 	}
500 	if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
501 		gstate_c.Dirty(DIRTY_PROJMATRIX);
502 		gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
503 	}
504 }
505 
NotifyRenderFramebufferUpdated(VirtualFramebuffer * vfb,bool vfbFormatChanged)506 void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) {
507 	if (vfbFormatChanged) {
508 		textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
509 		if (vfb->drawnFormat != vfb->format) {
510 			ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format);
511 		}
512 	}
513 
514 	// ugly...
515 	if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
516 		gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
517 	}
518 	if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
519 		gstate_c.Dirty(DIRTY_PROJMATRIX);
520 		gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
521 	}
522 }
523 
524 // Can't easily dynamically create these strings, we just pass along the pointer.
525 static const char *reinterpretStrings[3][3] = {
526 	{
527 		"self_reinterpret_565",
528 		"reinterpret_565_to_5551",
529 		"reinterpret_565_to_4444",
530 	},
531 	{
532 		"reinterpret_5551_to_565",
533 		"self_reinterpret_5551",
534 		"reinterpret_5551_to_4444",
535 	},
536 	{
537 		"reinterpret_4444_to_565",
538 		"reinterpret_4444_to_5551",
539 		"self_reinterpret_4444",
540 	},
541 };
542 
ReinterpretFramebuffer(VirtualFramebuffer * vfb,GEBufferFormat oldFormat,GEBufferFormat newFormat)543 void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, GEBufferFormat oldFormat, GEBufferFormat newFormat) {
544 	if (!useBufferedRendering_ || !vfb->fbo) {
545 		return;
546 	}
547 
548 	_assert_(newFormat != oldFormat);
549 	// The caller is responsible for updating the format.
550 	_assert_(newFormat == vfb->format);
551 
552 	ShaderLanguage lang = draw_->GetShaderLanguageDesc().shaderLanguage;
553 
554 	bool doReinterpret = PSP_CoreParameter().compat.flags().ReinterpretFramebuffers &&
555 		(lang == HLSL_D3D11 || lang == GLSL_VULKAN || lang == GLSL_3xx);
556 	// Copy image required for now.
557 	if (!gstate_c.Supports(GPU_SUPPORTS_COPY_IMAGE))
558 		doReinterpret = false;
559 	if (!doReinterpret) {
560 		// Fake reinterpret - just clear the way we always did on Vulkan. Just clear color and stencil.
561 		if (oldFormat == GE_FORMAT_565) {
562 			// We have to bind here instead of clear, since it can be that no framebuffer is bound.
563 			// The backend can sometimes directly optimize it to a clear.
564 
565 			// Games that are marked as doing reinterpret just ignore this - better to keep the data than to clear.
566 			// Fixes #13717.
567 			if (!PSP_CoreParameter().compat.flags().ReinterpretFramebuffers) {
568 				draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "FakeReinterpret");
569 				// Need to dirty anything that has command buffer dynamic state, in case we started a new pass above.
570 				// Should find a way to feed that information back, maybe... Or simply correct the issue in the rendermanager.
571 				gstate_c.Dirty(DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE);
572 
573 				if (currentRenderVfb_ != vfb) {
574 					// In case ReinterpretFramebuffer was called from the texture manager.
575 					draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "After FakeReinterpret");
576 				}
577 			}
578 		}
579 		return;
580 	}
581 
582 	// We only reinterpret between 16 - bit formats, for now.
583 	if (!IsGeBufferFormat16BitColor(oldFormat) || !IsGeBufferFormat16BitColor(newFormat)) {
584 		// 16->32 and 32->16 will require some more specialized shaders.
585 		return;
586 	}
587 
588 	char *vsCode = nullptr;
589 	char *fsCode = nullptr;
590 
591 	if (!reinterpretVS_) {
592 		vsCode = new char[4000];
593 		const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
594 		GenerateReinterpretVertexShader(vsCode, shaderLanguageDesc);
595 		reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vsCode, strlen(vsCode), "reinterpret_vs");
596 		_assert_(reinterpretVS_);
597 	}
598 
599 	if (!reinterpretSampler_) {
600 		Draw::SamplerStateDesc samplerDesc{};
601 		samplerDesc.magFilter = Draw::TextureFilter::LINEAR;
602 		samplerDesc.minFilter = Draw::TextureFilter::LINEAR;
603 		reinterpretSampler_ = draw_->CreateSamplerState(samplerDesc);
604 	}
605 
606 	if (!reinterpretVBuf_) {
607 		reinterpretVBuf_ = draw_->CreateBuffer(12 * 3, Draw::BufferUsageFlag::DYNAMIC | Draw::BufferUsageFlag::VERTEXDATA);
608 	}
609 
610 	// See if we need to create a new pipeline.
611 
612 	Draw::Pipeline *pipeline = reinterpretFromTo_[(int)oldFormat][(int)newFormat];
613 	if (!pipeline) {
614 		fsCode = new char[4000];
615 		const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
616 		GenerateReinterpretFragmentShader(fsCode, oldFormat, newFormat, shaderLanguageDesc);
617 		Draw::ShaderModule *reinterpretFS = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), "reinterpret_fs");
618 		_assert_(reinterpretFS);
619 
620 		std::vector<Draw::ShaderModule *> shaders;
621 		shaders.push_back(reinterpretVS_);
622 		shaders.push_back(reinterpretFS);
623 
624 		using namespace Draw;
625 		Draw::PipelineDesc desc{};
626 		// We use a "fullscreen triangle".
627 		// TODO: clear the stencil buffer. Hard to actually initialize it with the new alpha, though possible - let's see if
628 		// we need it.
629 		DepthStencilState *depth = draw_->CreateDepthStencilState({ false, false, Comparison::LESS });
630 		BlendState *blendstateOff = draw_->CreateBlendState({ false, 0xF });
631 		RasterState *rasterNoCull = draw_->CreateRasterState({});
632 
633 		// No uniforms for these, only a single texture input.
634 		PipelineDesc pipelineDesc{ Primitive::TRIANGLE_LIST, shaders, nullptr, depth, blendstateOff, rasterNoCull, nullptr };
635 		pipeline = draw_->CreateGraphicsPipeline(pipelineDesc);
636 		_assert_(pipeline != nullptr);
637 		reinterpretFromTo_[(int)oldFormat][(int)newFormat] = pipeline;
638 
639 		depth->Release();
640 		blendstateOff->Release();
641 		rasterNoCull->Release();
642 		reinterpretFS->Release();
643 	}
644 
645 	// Copy to a temp framebuffer.
646 	Draw::Framebuffer *temp = GetTempFBO(TempFBO::REINTERPRET, vfb->renderWidth, vfb->renderHeight);
647 
648 	draw_->InvalidateCachedState();
649 	draw_->CopyFramebufferImage(vfb->fbo, 0, 0, 0, 0, temp, 0, 0, 0, 0, vfb->renderWidth, vfb->renderHeight, 1, Draw::FBChannel::FB_COLOR_BIT, "reinterpret_prep");
650 	draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, reinterpretStrings[(int)oldFormat][(int)newFormat]);
651 	draw_->BindPipeline(pipeline);
652 	draw_->BindFramebufferAsTexture(temp, 0, Draw::FBChannel::FB_COLOR_BIT, 0);
653 	draw_->BindSamplerStates(0, 1, &reinterpretSampler_);
654 	draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
655 	Draw::Viewport vp = Draw::Viewport{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f };
656 	draw_->SetViewports(1, &vp);
657 	// Vertex buffer not used - vertices generated in shader.
658 	// TODO: Switch to a vertex buffer for GLES2/D3D9 compat.
659 	draw_->BindVertexBuffers(0, 1, &reinterpretVBuf_, nullptr);
660 	draw_->Draw(3, 0);
661 	draw_->InvalidateCachedState();
662 
663 	// Unbind.
664 	draw_->BindTexture(0, nullptr);
665 
666 	shaderManager_->DirtyLastShader();
667 	textureCache_->ForgetLastTexture();
668 
669 	gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE);
670 
671 	if (currentRenderVfb_ != vfb) {
672 		// In case ReinterpretFramebuffer was called from the texture manager.
673 		draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "After reinterpret");
674 	}
675 	delete[] vsCode;
676 	delete[] fsCode;
677 }
678 
NotifyRenderFramebufferSwitched(VirtualFramebuffer * prevVfb,VirtualFramebuffer * vfb,bool isClearingDepth)679 void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) {
680 	if (ShouldDownloadFramebuffer(vfb) && !vfb->memoryUpdated) {
681 		ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height);
682 		vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
683 		vfb->firstFrameSaved = true;
684 	} else {
685 		DownloadFramebufferOnSwitch(prevVfb);
686 	}
687 	textureCache_->ForgetLastTexture();
688 	shaderManager_->DirtyLastShader();
689 
690 	if (prevVfb) {
691 		// Copy depth value from the previously bound framebuffer to the current one.
692 		// We check that the address is the same within BlitFramebufferDepth before actually blitting.
693 
694 		bool hasNewerDepth = prevVfb->last_frame_depth_render != 0 && prevVfb->last_frame_depth_render >= vfb->last_frame_depth_updated;
695 		if (!prevVfb->fbo || !vfb->fbo || !useBufferedRendering_ || !hasNewerDepth || isClearingDepth) {
696 			// If depth wasn't updated, then we're at least "two degrees" away from the data.
697 			// This is an optimization: it probably doesn't need to be copied in this case.
698 		} else {
699 			bool matchingDepthBuffer = prevVfb->z_address == vfb->z_address && prevVfb->z_stride != 0 && vfb->z_stride != 0;
700 			bool matchingSize = prevVfb->width == vfb->width && prevVfb->height == vfb->height;
701 			if (matchingDepthBuffer && matchingSize) {
702 				BlitFramebufferDepth(prevVfb, vfb);
703 			}
704 		}
705 	}
706 
707 	if (vfb->drawnFormat != vfb->format) {
708 		ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format);
709 	}
710 
711 	if (useBufferedRendering_) {
712 		if (vfb->fbo) {
713 			shaderManager_->DirtyLastShader();
714 			draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "FramebufferSwitch");
715 		} else {
716 			// This should only happen very briefly when toggling useBufferedRendering_.
717 			ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
718 		}
719 	} else {
720 		if (vfb->fbo) {
721 			// This should only happen very briefly when toggling useBufferedRendering_.
722 			textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_DESTROYED);
723 			vfb->fbo->Release();
724 			vfb->fbo = nullptr;
725 		}
726 
727 		// Let's ignore rendering to targets that have not (yet) been displayed.
728 		if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER) {
729 			gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
730 		} else {
731 			gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
732 		}
733 	}
734 	textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
735 
736 	// ugly... is all this needed?
737 	if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
738 		gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
739 	}
740 	if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
741 		gstate_c.Dirty(DIRTY_PROJMATRIX);
742 		gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
743 	}
744 }
745 
NotifyVideoUpload(u32 addr,int size,int width,GEBufferFormat fmt)746 void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt) {
747 	// Note: UpdateFromMemory() is still called later.
748 	// This is a special case where we have extra information prior to the invalidation.
749 
750 	// TODO: Could possibly be an offset...
751 	VirtualFramebuffer *vfb = GetVFBAt(addr);
752 	if (vfb) {
753 		if (vfb->format != fmt || vfb->drawnFormat != fmt) {
754 			DEBUG_LOG(ME, "Changing format for %08x from %d to %d", addr, vfb->drawnFormat, fmt);
755 			vfb->format = fmt;
756 			vfb->drawnFormat = fmt;
757 
758 			// Let's count this as a "render".  This will also force us to use the correct format.
759 			vfb->last_frame_render = gpuStats.numFlips;
760 		}
761 
762 		if (vfb->fb_stride < width) {
763 			DEBUG_LOG(ME, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, width);
764 			const int bpp = fmt == GE_FORMAT_8888 ? 4 : 2;
765 			ResizeFramebufFBO(vfb, width, size / (bpp * width));
766 			// Resizing may change the viewport/etc.
767 			gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
768 			vfb->fb_stride = width;
769 			// This might be a bit wider than necessary, but we'll redetect on next render.
770 			vfb->width = width;
771 		}
772 	}
773 }
774 
UpdateFromMemory(u32 addr,int size,bool safe)775 void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
776 	// Take off the uncached flag from the address. Not to be confused with the start of VRAM.
777 	addr &= 0x3FFFFFFF;
778 	// TODO: Could go through all FBOs, but probably not important?
779 	// TODO: Could also check for inner changes, but video is most important.
780 	bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
781 	if (isDisplayBuf || safe) {
782 		// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
783 		if (!Memory::IsValidAddress(displayFramebufPtr_))
784 			return;
785 
786 		for (size_t i = 0; i < vfbs_.size(); ++i) {
787 			VirtualFramebuffer *vfb = vfbs_[i];
788 			if (vfb->fb_address == addr) {
789 				FlushBeforeCopy();
790 
791 				if (useBufferedRendering_ && vfb->fbo) {
792 					GEBufferFormat fmt = vfb->format;
793 					if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
794 						// If we're not rendering to it, format may be wrong.  Use displayFormat_ instead.
795 						fmt = displayFormat_;
796 					}
797 					DrawPixels(vfb, 0, 0, Memory::GetPointer(addr), fmt, vfb->fb_stride, vfb->width, vfb->height);
798 					SetColorUpdated(vfb, gstate_c.skipDrawReason);
799 				} else {
800 					INFO_LOG(FRAMEBUF, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format);
801 					DestroyFramebuf(vfb);
802 					vfbs_.erase(vfbs_.begin() + i--);
803 				}
804 			}
805 		}
806 
807 		RebindFramebuffer("RebindFramebuffer - UpdateFromMemory");
808 	}
809 	// TODO: Necessary?
810 	gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
811 }
812 
DrawPixels(VirtualFramebuffer * vfb,int dstX,int dstY,const u8 * srcPixels,GEBufferFormat srcPixelFormat,int srcStride,int width,int height)813 void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
814 	textureCache_->ForgetLastTexture();
815 	shaderManager_->DirtyLastShader();  // On GL, important that this is BEFORE drawing
816 	float u0 = 0.0f, u1 = 1.0f;
817 	float v0 = 0.0f, v1 = 1.0f;
818 
819 	DrawTextureFlags flags;
820 	if (useBufferedRendering_ && vfb && vfb->fbo) {
821 		flags = DRAWTEX_LINEAR;
822 		draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "DrawPixels");
823 		gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
824 		SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight);
825 		draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
826 	} else {
827 		// We are drawing directly to the back buffer so need to flip.
828 		// Should more of this be handled by the presentation engine?
829 		if (needBackBufferYSwap_)
830 			std::swap(v0, v1);
831 		flags = g_Config.iBufFilter == SCALE_LINEAR ? DRAWTEX_LINEAR : DRAWTEX_NEAREST;
832 		flags = flags | DRAWTEX_TO_BACKBUFFER;
833 		FRect frame = GetScreenFrame(pixelWidth_, pixelHeight_);
834 		FRect rc;
835 		CenterDisplayOutputRect(&rc, 480.0f, 272.0f, frame, ROTATION_LOCKED_HORIZONTAL);
836 		SetViewport2D(rc.x, rc.y, rc.w, rc.h);
837 		draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_);
838 	}
839 
840 	Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height, u1, v1);
841 	if (pixelsTex) {
842 		draw_->BindTextures(0, 1, &pixelsTex);
843 		Bind2DShader();
844 		DrawActiveTexture(dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags);
845 		gpuStats.numUploads++;
846 		pixelsTex->Release();
847 		draw_->InvalidateCachedState();
848 
849 		gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
850 	}
851 }
852 
BindFramebufferAsColorTexture(int stage,VirtualFramebuffer * framebuffer,int flags)853 bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) {
854 	if (!framebuffer->fbo || !useBufferedRendering_) {
855 		draw_->BindTexture(stage, nullptr);
856 		gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
857 		return false;
858 	}
859 
860 	// currentRenderVfb_ will always be set when this is called, except from the GE debugger.
861 	// Let's just not bother with the copy in that case.
862 	bool skipCopy = !(flags & BINDFBCOLOR_MAY_COPY) || GPUStepping::IsStepping();
863 
864 	// Currently rendering to this framebuffer. Need to make a copy.
865 	if (!skipCopy && framebuffer == currentRenderVfb_) {
866 		// TODO: Maybe merge with bvfbs_?  Not sure if those could be packing, and they're created at a different size.
867 		Draw::Framebuffer *renderCopy = GetTempFBO(TempFBO::COPY, framebuffer->renderWidth, framebuffer->renderHeight);
868 		if (renderCopy) {
869 			VirtualFramebuffer copyInfo = *framebuffer;
870 			copyInfo.fbo = renderCopy;
871 			CopyFramebufferForColorTexture(&copyInfo, framebuffer, flags);
872 			RebindFramebuffer("After BindFramebufferAsColorTexture");
873 			draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, 0);
874 		} else {
875 			draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
876 		}
877 		return true;
878 	} else if (framebuffer != currentRenderVfb_ || (flags & BINDFBCOLOR_FORCE_SELF) != 0) {
879 		draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
880 		return true;
881 	} else {
882 		ERROR_LOG_REPORT_ONCE(vulkanSelfTexture, G3D, "Attempting to texture from target (src=%08x / target=%08x / flags=%d)", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
883 		// To do this safely in Vulkan, we need to use input attachments.
884 		// Actually if the texture region and render regions don't overlap, this is safe, but we need
885 		// to transition to GENERAL image layout which will take some trickery.
886 		// Badness on D3D11 to bind the currently rendered-to framebuffer as a texture.
887 		draw_->BindTexture(stage, nullptr);
888 		gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
889 		return false;
890 	}
891 }
892 
CopyFramebufferForColorTexture(VirtualFramebuffer * dst,VirtualFramebuffer * src,int flags)893 void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags) {
894 	int x = 0;
895 	int y = 0;
896 	int w = src->drawnWidth;
897 	int h = src->drawnHeight;
898 
899 	// If max is not > min, we probably could not detect it.  Skip.
900 	// See the vertex decoder, where this is updated.
901 	if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) == BINDFBCOLOR_MAY_COPY_WITH_UV && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) {
902 		x = std::max(gstate_c.vertBounds.minU, (u16)0);
903 		y = std::max(gstate_c.vertBounds.minV, (u16)0);
904 		w = std::min(gstate_c.vertBounds.maxU, src->drawnWidth) - x;
905 		h = std::min(gstate_c.vertBounds.maxV, src->drawnHeight) - y;
906 
907 		// If we bound a framebuffer, apply the byte offset as pixels to the copy too.
908 		if (flags & BINDFBCOLOR_APPLY_TEX_OFFSET) {
909 			x += gstate_c.curTextureXOffset;
910 			y += gstate_c.curTextureYOffset;
911 		}
912 
913 		// We'll have to reapply these next time since we cropped to UV.
914 		gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
915 	}
916 
917 	if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
918 		BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, "Blit_CopyFramebufferForColorTexture");
919 	}
920 }
921 
MakePixelTexture(const u8 * srcPixels,GEBufferFormat srcPixelFormat,int srcStride,int width,int height,float & u1,float & v1)922 Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) {
923 	// TODO: We can just change the texture format and flip some bits around instead of this.
924 	// Could share code with the texture cache perhaps.
925 	auto generateTexture = [&](uint8_t *data, const uint8_t *initData, uint32_t w, uint32_t h, uint32_t d, uint32_t byteStride, uint32_t sliceByteStride) {
926 		for (int y = 0; y < height; y++) {
927 			const u16_le *src16 = (const u16_le *)srcPixels + srcStride * y;
928 			const u32_le *src32 = (const u32_le *)srcPixels + srcStride * y;
929 			u32 *dst = (u32 *)(data + byteStride * y);
930 			switch (srcPixelFormat) {
931 			case GE_FORMAT_565:
932 				if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
933 					ConvertRGB565ToBGRA8888(dst, src16, width);
934 				else
935 					ConvertRGB565ToRGBA8888(dst, src16, width);
936 				break;
937 
938 			case GE_FORMAT_5551:
939 				if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
940 					ConvertRGBA5551ToBGRA8888(dst, src16, width);
941 				else
942 					ConvertRGBA5551ToRGBA8888(dst, src16, width);
943 				break;
944 
945 			case GE_FORMAT_4444:
946 				if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
947 					ConvertRGBA4444ToBGRA8888(dst, src16, width);
948 				else
949 					ConvertRGBA4444ToRGBA8888(dst, src16, width);
950 				break;
951 
952 			case GE_FORMAT_8888:
953 				if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
954 					ConvertRGBA8888ToBGRA8888(dst, src32, width);
955 				// This means use original pointer as-is.  May avoid or optimize a copy.
956 				else if (srcStride == width)
957 					return false;
958 				else
959 					memcpy(dst, src32, width * 4);
960 				break;
961 
962 			case GE_FORMAT_INVALID:
963 			case GE_FORMAT_DEPTH16:
964 				_dbg_assert_msg_(false, "Invalid pixelFormat passed to DrawPixels().");
965 				break;
966 			}
967 		}
968 		return true;
969 	};
970 
971 	Draw::TextureDesc desc{
972 		Draw::TextureType::LINEAR2D,
973 		preferredPixelsFormat_,
974 		width,
975 		height,
976 		1,
977 		1,
978 		false,
979 		"DrawPixels",
980 		{ (uint8_t *)srcPixels },
981 		generateTexture,
982 	};
983 	// TODO: On Vulkan, use a custom allocator?  Important to use an allocator:
984 	// Hot Shot Golf (#12355) does tons of these in a frame in some situations! So actually,
985 	// we do use an allocator. In fact, I've now banned allocator-less textures.
986 	Draw::Texture *tex = draw_->CreateTexture(desc);
987 	if (!tex)
988 		ERROR_LOG(G3D, "Failed to create drawpixels texture");
989 	return tex;
990 }
991 
DrawFramebufferToOutput(const u8 * srcPixels,GEBufferFormat srcPixelFormat,int srcStride)992 void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride) {
993 	textureCache_->ForgetLastTexture();
994 	shaderManager_->DirtyLastShader();
995 
996 	float u0 = 0.0f, u1 = 480.0f / 512.0f;
997 	float v0 = 0.0f, v1 = 1.0f;
998 	Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, 512, 272, u1, v1);
999 	if (!pixelsTex)
1000 		return;
1001 
1002 	int uvRotation = useBufferedRendering_ ? g_Config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1003 	OutputFlags flags = g_Config.iBufFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1004 	if (needBackBufferYSwap_) {
1005 		flags |= OutputFlags::BACKBUFFER_FLIPPED;
1006 	}
1007 	// DrawActiveTexture reverses these, probably to match "up".
1008 	if (GetGPUBackend() == GPUBackend::DIRECT3D9 || GetGPUBackend() == GPUBackend::DIRECT3D11) {
1009 		flags |= OutputFlags::POSITION_FLIPPED;
1010 	}
1011 
1012 	presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1013 	presentation_->SourceTexture(pixelsTex, 512, 272);
1014 	presentation_->CopyToOutput(flags, uvRotation, u0, v0, u1, v1);
1015 	pixelsTex->Release();
1016 
1017 	// PresentationCommon sets all kinds of state, we can't rely on anything.
1018 	gstate_c.Dirty(DIRTY_ALL);
1019 
1020 	currentRenderVfb_ = nullptr;
1021 }
1022 
DownloadFramebufferOnSwitch(VirtualFramebuffer * vfb)1023 void FramebufferManagerCommon::DownloadFramebufferOnSwitch(VirtualFramebuffer *vfb) {
1024 	if (vfb && vfb->safeWidth > 0 && vfb->safeHeight > 0 && !vfb->firstFrameSaved && !vfb->memoryUpdated) {
1025 		// Some games will draw to some memory once, and use it as a render-to-texture later.
1026 		// To support this, we save the first frame to memory when we have a safe w/h.
1027 		// Saving each frame would be slow.
1028 		if (!g_Config.bDisableSlowFramebufEffects && !PSP_CoreParameter().compat.flags().DisableFirstFrameReadback) {
1029 			ReadFramebufferToMemory(vfb, 0, 0, vfb->safeWidth, vfb->safeHeight);
1030 			vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
1031 			vfb->firstFrameSaved = true;
1032 			vfb->safeWidth = 0;
1033 			vfb->safeHeight = 0;
1034 		}
1035 	}
1036 }
1037 
SetViewport2D(int x,int y,int w,int h)1038 void FramebufferManagerCommon::SetViewport2D(int x, int y, int w, int h) {
1039 	Draw::Viewport vp{ (float)x, (float)y, (float)w, (float)h, 0.0f, 1.0f };
1040 	draw_->SetViewports(1, &vp);
1041 }
1042 
CopyDisplayToOutput(bool reallyDirty)1043 void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
1044 	DownloadFramebufferOnSwitch(currentRenderVfb_);
1045 	shaderManager_->DirtyLastShader();
1046 
1047 	currentRenderVfb_ = nullptr;
1048 
1049 	if (displayFramebufPtr_ == 0) {
1050 		if (Core_IsStepping())
1051 			VERBOSE_LOG(FRAMEBUF, "Display disabled, displaying only black");
1052 		else
1053 			DEBUG_LOG(FRAMEBUF, "Display disabled, displaying only black");
1054 		// No framebuffer to display! Clear to black.
1055 		if (useBufferedRendering_) {
1056 			draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput");
1057 		}
1058 		gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1059 		return;
1060 	}
1061 
1062 	u32 offsetX = 0;
1063 	u32 offsetY = 0;
1064 
1065 	// If it's not really dirty, we're probably frameskipping.  Use the last working one.
1066 	u32 fbaddr = reallyDirty ? displayFramebufPtr_ : prevDisplayFramebufPtr_;
1067 	prevDisplayFramebufPtr_ = fbaddr;
1068 
1069 	VirtualFramebuffer *vfb = GetVFBAt(fbaddr);
1070 	if (!vfb) {
1071 		// Let's search for a framebuf within this range. Note that we also look for
1072 		// "framebuffers" sitting in RAM (created from block transfer or similar) so we only take off the kernel
1073 		// and uncached bits of the address when comparing.
1074 		const u32 addr = fbaddr & 0x3FFFFFFF;
1075 		for (size_t i = 0; i < vfbs_.size(); ++i) {
1076 			VirtualFramebuffer *v = vfbs_[i];
1077 			const u32 v_addr = v->fb_address & 0x3FFFFFFF;
1078 			const u32 v_size = ColorBufferByteSize(v);
1079 			if (addr >= v_addr && addr < v_addr + v_size) {
1080 				const u32 dstBpp = v->format == GE_FORMAT_8888 ? 4 : 2;
1081 				const u32 v_offsetX = ((addr - v_addr) / dstBpp) % v->fb_stride;
1082 				const u32 v_offsetY = ((addr - v_addr) / dstBpp) / v->fb_stride;
1083 				// We have enough space there for the display, right?
1084 				if (v_offsetX + 480 > (u32)v->fb_stride || v->bufferHeight < v_offsetY + 272) {
1085 					continue;
1086 				}
1087 				// Check for the closest one.
1088 				if (offsetY == 0 || offsetY > v_offsetY) {
1089 					offsetX = v_offsetX;
1090 					offsetY = v_offsetY;
1091 					vfb = v;
1092 				}
1093 			}
1094 		}
1095 
1096 		if (vfb) {
1097 			// Okay, we found one above.
1098 			// Log should be "Displaying from framebuf" but not worth changing the report.
1099 			INFO_LOG_REPORT_ONCE(displayoffset, FRAMEBUF, "Rendering from framebuf with offset %08x -> %08x+%dx%d", addr, vfb->fb_address, offsetX, offsetY);
1100 		}
1101 	}
1102 
1103 	if (vfb && vfb->format != displayFormat_) {
1104 		if (vfb->last_frame_render + FBO_OLD_AGE < gpuStats.numFlips) {
1105 			// The game probably switched formats on us.
1106 			vfb->format = displayFormat_;
1107 		} else {
1108 			vfb = 0;
1109 		}
1110 	}
1111 
1112 	if (!vfb) {
1113 		if (Memory::IsValidAddress(fbaddr)) {
1114 			// The game is displaying something directly from RAM. In GTA, it's decoded video.
1115 			if (!vfb) {
1116 				DrawFramebufferToOutput(Memory::GetPointer(fbaddr), displayFormat_, displayStride_);
1117 				return;
1118 			}
1119 		} else {
1120 			DEBUG_LOG(FRAMEBUF, "Found no FBO to display! displayFBPtr = %08x", fbaddr);
1121 			// No framebuffer to display! Clear to black.
1122 			if (useBufferedRendering_) {
1123 				// Bind and clear the backbuffer. This should be the first time during the frame that it's bound.
1124 				draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput_NoFBO");
1125 			}
1126 			gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1127 			return;
1128 		}
1129 	}
1130 
1131 	vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER;
1132 	vfb->last_frame_displayed = gpuStats.numFlips;
1133 	vfb->dirtyAfterDisplay = false;
1134 	vfb->reallyDirtyAfterDisplay = false;
1135 
1136 	if (prevDisplayFramebuf_ != displayFramebuf_) {
1137 		prevPrevDisplayFramebuf_ = prevDisplayFramebuf_;
1138 	}
1139 	if (displayFramebuf_ != vfb) {
1140 		prevDisplayFramebuf_ = displayFramebuf_;
1141 	}
1142 	displayFramebuf_ = vfb;
1143 
1144 	if (vfb->fbo) {
1145 		if (Core_IsStepping())
1146 			VERBOSE_LOG(FRAMEBUF, "Displaying FBO %08x", vfb->fb_address);
1147 		else
1148 			DEBUG_LOG(FRAMEBUF, "Displaying FBO %08x", vfb->fb_address);
1149 
1150 		// TODO ES3: Use glInvalidateFramebuffer to discard depth/stencil data at the end of frame.
1151 
1152 		float u0 = offsetX / (float)vfb->bufferWidth;
1153 		float v0 = offsetY / (float)vfb->bufferHeight;
1154 		float u1 = (480.0f + offsetX) / (float)vfb->bufferWidth;
1155 		float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight;
1156 
1157 		textureCache_->ForgetLastTexture();
1158 
1159 		int uvRotation = useBufferedRendering_ ? g_Config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1160 		OutputFlags flags = g_Config.iBufFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1161 		if (needBackBufferYSwap_) {
1162 			flags |= OutputFlags::BACKBUFFER_FLIPPED;
1163 		}
1164 		// DrawActiveTexture reverses these, probably to match "up".
1165 		if (GetGPUBackend() == GPUBackend::DIRECT3D9 || GetGPUBackend() == GPUBackend::DIRECT3D11) {
1166 			flags |= OutputFlags::POSITION_FLIPPED;
1167 		}
1168 
1169 		int actualWidth = (vfb->bufferWidth * vfb->renderWidth) / vfb->width;
1170 		int actualHeight = (vfb->bufferHeight * vfb->renderHeight) / vfb->height;
1171 		presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1172 		presentation_->SourceFramebuffer(vfb->fbo, actualWidth, actualHeight);
1173 		presentation_->CopyToOutput(flags, uvRotation, u0, v0, u1, v1);
1174 	} else if (useBufferedRendering_) {
1175 		WARN_LOG(FRAMEBUF, "Current VFB lacks an FBO: %08x", vfb->fb_address);
1176 	}
1177 
1178 	// This may get called mid-draw if the game uses an immediate flip.
1179 	// PresentationCommon sets all kinds of state, we can't rely on anything.
1180 	gstate_c.Dirty(DIRTY_ALL);
1181 }
1182 
DecimateFBOs()1183 void FramebufferManagerCommon::DecimateFBOs() {
1184 	currentRenderVfb_ = nullptr;
1185 
1186 	for (auto iter : fbosToDelete_) {
1187 		iter->Release();
1188 	}
1189 	fbosToDelete_.clear();
1190 
1191 	for (size_t i = 0; i < vfbs_.size(); ++i) {
1192 		VirtualFramebuffer *vfb = vfbs_[i];
1193 		int age = frameLastFramebufUsed_ - std::max(vfb->last_frame_render, vfb->last_frame_used);
1194 
1195 		if (ShouldDownloadFramebuffer(vfb) && age == 0 && !vfb->memoryUpdated) {
1196 			ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height);
1197 			vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
1198 			vfb->firstFrameSaved = true;
1199 		}
1200 
1201 		// Let's also "decimate" the usageFlags.
1202 		UpdateFramebufUsage(vfb);
1203 
1204 		if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) {
1205 			if (age > FBO_OLD_AGE) {
1206 				INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age);
1207 				DestroyFramebuf(vfb);
1208 				vfbs_.erase(vfbs_.begin() + i--);
1209 			}
1210 		}
1211 	}
1212 
1213 	for (auto it = tempFBOs_.begin(); it != tempFBOs_.end(); ) {
1214 		int age = frameLastFramebufUsed_ - it->second.last_frame_used;
1215 		if (age > FBO_OLD_AGE) {
1216 			it->second.fbo->Release();
1217 			it = tempFBOs_.erase(it);
1218 		} else {
1219 			++it;
1220 		}
1221 	}
1222 
1223 	// Do the same for ReadFramebuffersToMemory's VFBs
1224 	for (size_t i = 0; i < bvfbs_.size(); ++i) {
1225 		VirtualFramebuffer *vfb = bvfbs_[i];
1226 		int age = frameLastFramebufUsed_ - vfb->last_frame_render;
1227 		if (age > FBO_OLD_AGE) {
1228 			INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age);
1229 			DestroyFramebuf(vfb);
1230 			bvfbs_.erase(bvfbs_.begin() + i--);
1231 		}
1232 	}
1233 }
1234 
1235 // Requires width/height to be set already.
ResizeFramebufFBO(VirtualFramebuffer * vfb,int w,int h,bool force,bool skipCopy)1236 void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, int h, bool force, bool skipCopy) {
1237 	_dbg_assert_(w > 0);
1238 	_dbg_assert_(h > 0);
1239 	VirtualFramebuffer old = *vfb;
1240 
1241 	int oldWidth = vfb->bufferWidth;
1242 	int oldHeight = vfb->bufferHeight;
1243 
1244 	if (force) {
1245 		vfb->bufferWidth = w;
1246 		vfb->bufferHeight = h;
1247 	} else {
1248 		if (vfb->bufferWidth >= w && vfb->bufferHeight >= h) {
1249 			return;
1250 		}
1251 
1252 		// In case it gets thin and wide, don't resize down either side.
1253 		vfb->bufferWidth = std::max((int)vfb->bufferWidth, w);
1254 		vfb->bufferHeight = std::max((int)vfb->bufferHeight, h);
1255 	}
1256 
1257 	bool force1x = false;
1258 	switch (bloomHack_) {
1259 	case 1:
1260 		force1x = vfb->bufferWidth <= 128 || vfb->bufferHeight <= 64;
1261 		break;
1262 	case 2:
1263 		force1x = vfb->bufferWidth <= 256 || vfb->bufferHeight <= 128;
1264 		break;
1265 	case 3:
1266 		force1x = vfb->bufferWidth < 480 || vfb->bufferWidth > 800 || vfb->bufferHeight < 272; // GOW uses 864x272
1267 		break;
1268 	}
1269 
1270 	if (PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000) {
1271 		force1x = true;
1272 	}
1273 
1274 	if (force1x && g_Config.iInternalResolution != 1) {
1275 		vfb->renderScaleFactor = 1.0f;
1276 		vfb->renderWidth = vfb->bufferWidth;
1277 		vfb->renderHeight = vfb->bufferHeight;
1278 	} else {
1279 		vfb->renderScaleFactor = renderScaleFactor_;
1280 		vfb->renderWidth = (u16)(vfb->bufferWidth * renderScaleFactor_);
1281 		vfb->renderHeight = (u16)(vfb->bufferHeight * renderScaleFactor_);
1282 	}
1283 
1284 	// During hardware rendering, we always render at full color depth even if the game wouldn't on real hardware.
1285 	// It's not worth the trouble trying to support lower bit-depth rendering, just
1286 	// more cases to test that nobody will ever use.
1287 
1288 	textureCache_->ForgetLastTexture();
1289 
1290 	if (!useBufferedRendering_) {
1291 		if (vfb->fbo) {
1292 			vfb->fbo->Release();
1293 			vfb->fbo = nullptr;
1294 		}
1295 		return;
1296 	}
1297 	if (!old.fbo && vfb->last_frame_failed != 0 && vfb->last_frame_failed - gpuStats.numFlips < 63) {
1298 		// Don't constantly retry FBOs which failed to create.
1299 		return;
1300 	}
1301 
1302 	shaderManager_->DirtyLastShader();
1303 	char tag[128];
1304 	size_t len = snprintf(tag, sizeof(tag), "FB_%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->format));
1305 	vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, tag });
1306 	if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
1307 		NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, ColorBufferByteSize(vfb), tag, len);
1308 	}
1309 	if (Memory::IsVRAMAddress(vfb->z_address) && vfb->z_stride != 0) {
1310 		char buf[128];
1311 		size_t len = snprintf(buf, sizeof(buf), "Z_%s", tag);
1312 		NotifyMemInfo(MemBlockFlags::ALLOC, vfb->z_address, vfb->fb_stride * vfb->height * sizeof(uint16_t), buf, len);
1313 	}
1314 	if (old.fbo) {
1315 		INFO_LOG(FRAMEBUF, "Resizing FBO for %08x : %dx%dx%s", vfb->fb_address, w, h, GeBufferFormatToString(vfb->format));
1316 		if (vfb->fbo) {
1317 			// TODO: Swap the order of the below? That way we can avoid the needGLESRebinds_ check below I think.
1318 			draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1319 			if (!skipCopy) {
1320 				BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, "Blit_ResizeFramebufFBO");
1321 			}
1322 		}
1323 		fbosToDelete_.push_back(old.fbo);
1324 		if (needGLESRebinds_) {
1325 			draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "ResizeFramebufFBO");
1326 		}
1327 	} else {
1328 		draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1329 	}
1330 
1331 	if (!vfb->fbo) {
1332 		ERROR_LOG(FRAMEBUF, "Error creating FBO during resize! %dx%d", vfb->renderWidth, vfb->renderHeight);
1333 		vfb->last_frame_failed = gpuStats.numFlips;
1334 	}
1335 }
1336 
1337 // This is called from detected memcopies and framebuffer initialization from VRAM. Not block transfers.
1338 // MotoGP goes this path so we need to catch those copies here.
NotifyFramebufferCopy(u32 src,u32 dst,int size,bool isMemset,u32 skipDrawReason)1339 bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset, u32 skipDrawReason) {
1340 	if (size == 0) {
1341 		return false;
1342 	}
1343 
1344 	dst &= 0x3FFFFFFF;
1345 	src &= 0x3FFFFFFF;
1346 
1347 	VirtualFramebuffer *dstBuffer = 0;
1348 	VirtualFramebuffer *srcBuffer = 0;
1349 	u32 dstY = (u32)-1;
1350 	u32 dstH = 0;
1351 	u32 srcY = (u32)-1;
1352 	u32 srcH = 0;
1353 	for (size_t i = 0; i < vfbs_.size(); ++i) {
1354 		VirtualFramebuffer *vfb = vfbs_[i];
1355 		if (vfb->fb_stride == 0) {
1356 			continue;
1357 		}
1358 
1359 		// We only remove the kernel and uncached bits when comparing.
1360 		const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
1361 		const u32 vfb_size = ColorBufferByteSize(vfb);
1362 		const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
1363 		const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
1364 		const int vfb_byteWidth = vfb->width * vfb_bpp;
1365 
1366 		if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) {
1367 			const u32 offset = dst - vfb_address;
1368 			const u32 yOffset = offset / vfb_byteStride;
1369 			if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) {
1370 				dstBuffer = vfb;
1371 				dstY = yOffset;
1372 				dstH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
1373 			}
1374 		}
1375 
1376 		if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) {
1377 			const u32 offset = src - vfb_address;
1378 			const u32 yOffset = offset / vfb_byteStride;
1379 			if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) {
1380 				srcBuffer = vfb;
1381 				srcY = yOffset;
1382 				srcH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
1383 			} else if ((offset % vfb_byteStride) == 0 && size == vfb->fb_stride && yOffset < srcY) {
1384 				// Valkyrie Profile reads 512 bytes at a time, rather than 2048.  So, let's whitelist fb_stride also.
1385 				srcBuffer = vfb;
1386 				srcY = yOffset;
1387 				srcH = 1;
1388 			} else if (yOffset == 0 && yOffset < srcY) {
1389 				// Okay, last try - it might be a clut.
1390 				if (vfb->usageFlags & FB_USAGE_CLUT) {
1391 					srcBuffer = vfb;
1392 					srcY = yOffset;
1393 					srcH = 1;
1394 				}
1395 			}
1396 		}
1397 	}
1398 
1399 	if (!useBufferedRendering_) {
1400 		// If we're copying into a recently used display buf, it's probably destined for the screen.
1401 		if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) {
1402 			return false;
1403 		}
1404 	}
1405 
1406 	if (!dstBuffer && srcBuffer) {
1407 		// Note - if we're here, we're in a memcpy, not a block transfer. Not allowing IntraVRAMBlockTransferAllowCreateFB.
1408 		// Technically, that makes BlockTransferAllowCreateFB a bit of a misnomer.
1409 		if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB) {
1410 			dstBuffer = CreateRAMFramebuffer(dst, srcBuffer->width, srcBuffer->height, srcBuffer->fb_stride, srcBuffer->format);
1411 			dstY = 0;
1412 		}
1413 	}
1414 	if (dstBuffer) {
1415 		dstBuffer->last_frame_used = gpuStats.numFlips;
1416 	}
1417 
1418 	if (dstBuffer && srcBuffer && !isMemset) {
1419 		if (srcBuffer == dstBuffer) {
1420 			WARN_LOG_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size);
1421 		} else {
1422 			WARN_LOG_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size);
1423 			// Just do the blit!
1424 			BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0, "Blit_InterBufferMemcpy");
1425 			SetColorUpdated(dstBuffer, skipDrawReason);
1426 			RebindFramebuffer("RebindFramebuffer - Inter-buffer memcpy");
1427 		}
1428 		return false;
1429 	} else if (dstBuffer) {
1430 		if (isMemset) {
1431 			gpuStats.numClears++;
1432 		}
1433 		WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size);
1434 		FlushBeforeCopy();
1435 		const u8 *srcBase = Memory::GetPointerUnchecked(src);
1436 		DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH);
1437 		SetColorUpdated(dstBuffer, skipDrawReason);
1438 		RebindFramebuffer("RebindFramebuffer - Memcpy fbo upload");
1439 		// This is a memcpy, let's still copy just in case.
1440 		return false;
1441 	} else if (srcBuffer) {
1442 		WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst);
1443 		FlushBeforeCopy();
1444 		if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) {
1445 			WARN_LOG_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight);
1446 		} else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated && !PSP_CoreParameter().compat.flags().DisableReadbacks) {
1447 			ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH);
1448 			srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
1449 		}
1450 		return false;
1451 	} else {
1452 		return false;
1453 	}
1454 }
1455 
1456 // Can't be const, in case it has to create a vfb unfortunately.
FindTransferFramebuffers(VirtualFramebuffer * & dstBuffer,VirtualFramebuffer * & srcBuffer,u32 dstBasePtr,int dstStride,int & dstX,int & dstY,u32 srcBasePtr,int srcStride,int & srcX,int & srcY,int & srcWidth,int & srcHeight,int & dstWidth,int & dstHeight,int bpp)1457 void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) {
1458 	u32 dstYOffset = -1;
1459 	u32 dstXOffset = -1;
1460 	u32 srcYOffset = -1;
1461 	u32 srcXOffset = -1;
1462 	int width = srcWidth;
1463 	int height = srcHeight;
1464 
1465 	dstBasePtr &= 0x3FFFFFFF;
1466 	srcBasePtr &= 0x3FFFFFFF;
1467 
1468 	for (size_t i = 0; i < vfbs_.size(); ++i) {
1469 		VirtualFramebuffer *vfb = vfbs_[i];
1470 		const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
1471 		const u32 vfb_size = ColorBufferByteSize(vfb);
1472 		const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
1473 		const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
1474 		const u32 vfb_byteWidth = vfb->width * vfb_bpp;
1475 
1476 		// These heuristics are a bit annoying.
1477 		// The goal is to avoid using GPU block transfers for things that ought to be memory.
1478 		// Maybe we should even check for textures at these places instead?
1479 
1480 		if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
1481 			const u32 byteOffset = dstBasePtr - vfb_address;
1482 			const u32 byteStride = dstStride * bpp;
1483 			const u32 yOffset = byteOffset / byteStride;
1484 
1485 			// Some games use mismatching bitdepths.  But make sure the stride matches.
1486 			// If it doesn't, generally this means we detected the framebuffer with too large a height.
1487 			// Use bufferHeight in case of buffers that resize up and down often per frame (Valkyrie Profile.)
1488 			bool match = yOffset < dstYOffset && (int)yOffset <= (int)vfb->bufferHeight - dstHeight;
1489 			if (match && vfb_byteStride != byteStride) {
1490 				// Grand Knights History copies with a mismatching stride but a full line at a time.
1491 				// Makes it hard to detect the wrong transfers in e.g. God of War.
1492 				if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
1493 					// However, some other games write cluts to framebuffers.
1494 					// Let's catch this and upload.  Otherwise reject the match.
1495 					match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
1496 					if (match) {
1497 						dstWidth = byteStride * height / vfb_bpp;
1498 						dstHeight = 1;
1499 					}
1500 				} else {
1501 					dstWidth = byteStride * height / vfb_bpp;
1502 					dstHeight = 1;
1503 				}
1504 			} else if (match) {
1505 				dstWidth = width;
1506 				dstHeight = height;
1507 			}
1508 			if (match) {
1509 				dstYOffset = yOffset;
1510 				dstXOffset = dstStride == 0 ? 0 : (byteOffset / bpp) % dstStride;
1511 				dstBuffer = vfb;
1512 			}
1513 		}
1514 		if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
1515 			const u32 byteOffset = srcBasePtr - vfb_address;
1516 			const u32 byteStride = srcStride * bpp;
1517 			const u32 yOffset = byteOffset / byteStride;
1518 			bool match = yOffset < srcYOffset && (int)yOffset <= (int)vfb->bufferHeight - srcHeight;
1519 			if (match && vfb_byteStride != byteStride) {
1520 				if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
1521 					match = false;
1522 				} else {
1523 					srcWidth = byteStride * height / vfb_bpp;
1524 					srcHeight = 1;
1525 				}
1526 			} else if (match) {
1527 				srcWidth = width;
1528 				srcHeight = height;
1529 			}
1530 			if (match) {
1531 				srcYOffset = yOffset;
1532 				srcXOffset = srcStride == 0 ? 0 : (byteOffset / bpp) % srcStride;
1533 				srcBuffer = vfb;
1534 			}
1535 		}
1536 	}
1537 
1538 	if (srcBuffer && !dstBuffer) {
1539 		if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
1540 			(PSP_CoreParameter().compat.flags().IntraVRAMBlockTransferAllowCreateFB &&
1541 				Memory::IsVRAMAddress(srcBuffer->fb_address) && Memory::IsVRAMAddress(dstBasePtr))) {
1542 			GEBufferFormat ramFormat;
1543 			// Try to guess the appropriate format. We only know the bpp from the block transfer command (16 or 32 bit).
1544 			if (bpp == 4) {
1545 				// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
1546 				ramFormat = GE_FORMAT_8888;
1547 			} else if (srcBuffer->format != GE_FORMAT_8888) {
1548 				// We guess that the game will interpret the data the same as it was in the source of the copy.
1549 				// Seems like a likely good guess, and works in Test Drive Unlimited.
1550 				ramFormat = srcBuffer->format;
1551 			} else {
1552 				// No info left - just fall back to something. But this is definitely split pixel tricks.
1553 				ramFormat = GE_FORMAT_5551;
1554 			}
1555 			dstBuffer = CreateRAMFramebuffer(dstBasePtr, dstWidth, dstHeight, dstStride, ramFormat);
1556 		}
1557 	}
1558 
1559 	if (dstBuffer)
1560 		dstBuffer->last_frame_used = gpuStats.numFlips;
1561 
1562 	if (dstYOffset != (u32)-1) {
1563 		dstY += dstYOffset;
1564 		dstX += dstXOffset;
1565 	}
1566 	if (srcYOffset != (u32)-1) {
1567 		srcY += srcYOffset;
1568 		srcX += srcXOffset;
1569 	}
1570 }
1571 
CreateRAMFramebuffer(uint32_t fbAddress,int width,int height,int stride,GEBufferFormat format)1572 VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAddress, int width, int height, int stride, GEBufferFormat format) {
1573 	INFO_LOG(G3D, "Creating RAM framebuffer at %08x (%dx%d, stride %d, format %d)", fbAddress, width, height, stride, format);
1574 
1575 	// A target for the destination is missing - so just create one!
1576 	// Make sure this one would be found by the algorithm above so we wouldn't
1577 	// create a new one each frame.
1578 	VirtualFramebuffer *vfb = new VirtualFramebuffer{};
1579 	vfb->fbo = nullptr;
1580 	vfb->fb_address = fbAddress;  // NOTE - not necessarily in VRAM!
1581 	vfb->fb_stride = stride;
1582 	vfb->z_address = 0;  // marks that if anyone tries to render to this framebuffer, it should be dropped and recreated.
1583 	vfb->z_stride = 0;
1584 	vfb->width = std::max(width, stride);
1585 	vfb->height = height;
1586 	vfb->newWidth = vfb->width;
1587 	vfb->newHeight = vfb->height;
1588 	vfb->lastFrameNewSize = gpuStats.numFlips;
1589 	vfb->renderScaleFactor = renderScaleFactor_;
1590 	vfb->renderWidth = (u16)(vfb->width * renderScaleFactor_);
1591 	vfb->renderHeight = (u16)(vfb->height * renderScaleFactor_);
1592 	vfb->bufferWidth = vfb->width;
1593 	vfb->bufferHeight = vfb->height;
1594 	vfb->format = format;
1595 	vfb->drawnFormat = GE_FORMAT_8888;
1596 	vfb->usageFlags = FB_USAGE_RENDERTARGET;
1597 	SetColorUpdated(vfb, 0);
1598 	char name[64];
1599 	snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address);
1600 	textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
1601 	vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, name });
1602 	vfbs_.push_back(vfb);
1603 
1604 	u32 byteSize = ColorBufferByteSize(vfb);
1605 	if (fbAddress + byteSize > framebufRangeEnd_) {
1606 		framebufRangeEnd_ = fbAddress + byteSize;
1607 	}
1608 
1609 	return vfb;
1610 }
1611 
1612 // 1:1 pixel sides buffers, we resize buffers to these before we read them back.
FindDownloadTempBuffer(VirtualFramebuffer * vfb)1613 VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFramebuffer *vfb) {
1614 	// For now we'll keep these on the same struct as the ones that can get displayed
1615 	// (and blatantly copy work already done above while at it).
1616 	VirtualFramebuffer *nvfb = nullptr;
1617 
1618 	// We maintain a separate vector of framebuffer objects for blitting.
1619 	for (VirtualFramebuffer *v : bvfbs_) {
1620 		if (v->fb_address == vfb->fb_address && v->format == vfb->format) {
1621 			if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) {
1622 				nvfb = v;
1623 				v->fb_stride = vfb->fb_stride;
1624 				v->width = vfb->width;
1625 				v->height = vfb->height;
1626 				break;
1627 			}
1628 		}
1629 	}
1630 
1631 	// Create a new fbo if none was found for the size
1632 	if (!nvfb) {
1633 		nvfb = new VirtualFramebuffer();
1634 		memset(nvfb, 0, sizeof(VirtualFramebuffer));
1635 		nvfb->fbo = nullptr;
1636 		nvfb->fb_address = vfb->fb_address;
1637 		nvfb->fb_stride = vfb->fb_stride;
1638 		nvfb->z_address = vfb->z_address;
1639 		nvfb->z_stride = vfb->z_stride;
1640 		nvfb->width = vfb->width;
1641 		nvfb->height = vfb->height;
1642 		nvfb->renderWidth = vfb->bufferWidth;
1643 		nvfb->renderHeight = vfb->bufferHeight;
1644 		nvfb->renderScaleFactor = 1.0f;  // For readbacks we resize to the original size, of course.
1645 		nvfb->bufferWidth = vfb->bufferWidth;
1646 		nvfb->bufferHeight = vfb->bufferHeight;
1647 		nvfb->format = vfb->format;
1648 		nvfb->drawnWidth = vfb->drawnWidth;
1649 		nvfb->drawnHeight = vfb->drawnHeight;
1650 		nvfb->drawnFormat = vfb->format;
1651 
1652 		char name[64];
1653 		snprintf(name, sizeof(name), "download_temp");
1654 		nvfb->fbo = draw_->CreateFramebuffer({ nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, false, name });
1655 		if (!nvfb->fbo) {
1656 			ERROR_LOG(FRAMEBUF, "Error creating FBO! %d x %d", nvfb->renderWidth, nvfb->renderHeight);
1657 			return nullptr;
1658 		}
1659 
1660 		bvfbs_.push_back(nvfb);
1661 	} else {
1662 		UpdateDownloadTempBuffer(nvfb);
1663 	}
1664 
1665 	nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
1666 	nvfb->last_frame_render = gpuStats.numFlips;
1667 	nvfb->dirtyAfterDisplay = true;
1668 
1669 	return nvfb;
1670 }
1671 
ApplyClearToMemory(int x1,int y1,int x2,int y2,u32 clearColor)1672 void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor) {
1673 	if (currentRenderVfb_) {
1674 		if ((currentRenderVfb_->usageFlags & FB_USAGE_DOWNLOAD_CLEAR) != 0) {
1675 			// Already zeroed in memory.
1676 			return;
1677 		}
1678 	}
1679 	if (!Memory::IsValidAddress(gstate.getFrameBufAddress())) {
1680 		return;
1681 	}
1682 
1683 	u8 *addr = Memory::GetPointerUnchecked(gstate.getFrameBufAddress());
1684 	const int bpp = gstate.FrameBufFormat() == GE_FORMAT_8888 ? 4 : 2;
1685 
1686 	u32 clearBits = clearColor;
1687 	if (bpp == 2) {
1688 		u16 clear16 = 0;
1689 		switch (gstate.FrameBufFormat()) {
1690 		case GE_FORMAT_565: ConvertRGBA8888ToRGB565(&clear16, &clearColor, 1); break;
1691 		case GE_FORMAT_5551: ConvertRGBA8888ToRGBA5551(&clear16, &clearColor, 1); break;
1692 		case GE_FORMAT_4444: ConvertRGBA8888ToRGBA4444(&clear16, &clearColor, 1); break;
1693 		default: _dbg_assert_(0); break;
1694 		}
1695 		clearBits = clear16 | (clear16 << 16);
1696 	}
1697 
1698 	const bool singleByteClear = (clearBits >> 16) == (clearBits & 0xFFFF) && (clearBits >> 24) == (clearBits & 0xFF);
1699 	const int stride = gstate.FrameBufStride();
1700 	const int width = x2 - x1;
1701 
1702 	const int byteStride = stride * bpp;
1703 	const int byteWidth = width * bpp;
1704 	for (int y = y1; y < y2; ++y) {
1705 		NotifyMemInfo(MemBlockFlags::WRITE, gstate.getFrameBufAddress() + x1 * bpp + y * byteStride, byteWidth, "FramebufferClear");
1706 	}
1707 
1708 	// Can use memset for simple cases. Often alpha is different and gums up the works.
1709 	if (singleByteClear) {
1710 		addr += x1 * bpp;
1711 		for (int y = y1; y < y2; ++y) {
1712 			memset(addr + y * byteStride, clearBits, byteWidth);
1713 		}
1714 	} else {
1715 		// This will most often be true - rarely is the width not aligned.
1716 		// TODO: We should really use non-temporal stores here to avoid the cache,
1717 		// as it's unlikely that these bytes will be read.
1718 		if ((width & 3) == 0 && (x1 & 3) == 0) {
1719 			u64 val64 = clearBits | ((u64)clearBits << 32);
1720 			int xstride = 8 / bpp;
1721 
1722 			u64 *addr64 = (u64 *)addr;
1723 			const int stride64 = stride / xstride;
1724 			const int x1_64 = x1 / xstride;
1725 			const int x2_64 = x2 / xstride;
1726 			for (int y = y1; y < y2; ++y) {
1727 				for (int x = x1_64; x < x2_64; ++x) {
1728 					addr64[y * stride64 + x] = val64;
1729 				}
1730 			}
1731 		} else if (bpp == 4) {
1732 			u32 *addr32 = (u32 *)addr;
1733 			for (int y = y1; y < y2; ++y) {
1734 				for (int x = x1; x < x2; ++x) {
1735 					addr32[y * stride + x] = clearBits;
1736 				}
1737 			}
1738 		} else if (bpp == 2) {
1739 			u16 *addr16 = (u16 *)addr;
1740 			for (int y = y1; y < y2; ++y) {
1741 				for (int x = x1; x < x2; ++x) {
1742 					addr16[y * stride + x] = (u16)clearBits;
1743 				}
1744 			}
1745 		}
1746 	}
1747 
1748 	if (currentRenderVfb_) {
1749 		// The current content is in memory now, so update the flag.
1750 		if (x1 == 0 && y1 == 0 && x2 >= currentRenderVfb_->width && y2 >= currentRenderVfb_->height) {
1751 			currentRenderVfb_->usageFlags |= FB_USAGE_DOWNLOAD_CLEAR;
1752 			currentRenderVfb_->memoryUpdated = true;
1753 		}
1754 	}
1755 }
1756 
NotifyBlockTransferBefore(u32 dstBasePtr,int dstStride,int dstX,int dstY,u32 srcBasePtr,int srcStride,int srcX,int srcY,int width,int height,int bpp,u32 skipDrawReason)1757 bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
1758 	if (!useBufferedRendering_) {
1759 		return false;
1760 	}
1761 
1762 	// Skip checking if there's no framebuffers in that area.
1763 	if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) {
1764 		return false;
1765 	}
1766 
1767 	VirtualFramebuffer *dstBuffer = 0;
1768 	VirtualFramebuffer *srcBuffer = 0;
1769 	int srcWidth = width;
1770 	int srcHeight = height;
1771 	int dstWidth = width;
1772 	int dstHeight = height;
1773 	FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
1774 
1775 	if (dstBuffer && srcBuffer) {
1776 		if (srcBuffer == dstBuffer) {
1777 			if (srcX != dstX || srcY != dstY) {
1778 				WARN_LOG_N_TIMES(dstsrc, 100, G3D, "Intra-buffer block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
1779 					width, height, bpp,
1780 					srcBasePtr, srcX, srcY, srcStride,
1781 					dstBasePtr, dstX, dstY, dstStride);
1782 				FlushBeforeCopy();
1783 				// Some backends can handle blitting within a framebuffer. Others will just have to deal with it or ignore it, apparently.
1784 				BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp, "Blit_IntraBufferBlockTransfer");
1785 				RebindFramebuffer("rebind after intra block transfer");
1786 				SetColorUpdated(dstBuffer, skipDrawReason);
1787 				return true;  // Skip the memory copy.
1788 			} else {
1789 				// Ignore, nothing to do.  Tales of Phantasia X does this by accident.
1790 				return true;  // Skip the memory copy.
1791 			}
1792 		} else {
1793 			WARN_LOG_N_TIMES(dstnotsrc, 100, G3D, "Inter-buffer block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
1794 				width, height, bpp,
1795 				srcBasePtr, srcX, srcY, srcStride,
1796 				dstBasePtr, dstX, dstY, dstStride);
1797 			// Straightforward blit between two framebuffers.
1798 			FlushBeforeCopy();
1799 			BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp, "Blit_InterBufferBlockTransfer");
1800 			RebindFramebuffer("RebindFramebuffer - Inter-buffer block transfer");
1801 			SetColorUpdated(dstBuffer, skipDrawReason);
1802 			return true;  // No need to actually do the memory copy behind, probably.
1803 		}
1804 		return false;
1805 	} else if (dstBuffer) {
1806 		// Here we should just draw the pixels into the buffer.  Copy first.
1807 		return false;
1808 	} else if (srcBuffer) {
1809 		WARN_LOG_N_TIMES(btd, 100, G3D, "Block transfer readback %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
1810 			width, height, bpp,
1811 			srcBasePtr, srcX, srcY, srcStride,
1812 			dstBasePtr, dstX, dstY, dstStride);
1813 		FlushBeforeCopy();
1814 		if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) {
1815 			const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2;
1816 			const float srcXFactor = (float)bpp / srcBpp;
1817 			const bool tooTall = srcY + srcHeight > srcBuffer->bufferHeight;
1818 			if (srcHeight <= 0 || (tooTall && srcY != 0)) {
1819 				WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight);
1820 			} else {
1821 				if (tooTall) {
1822 					WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x dangerous, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight);
1823 				}
1824 				ReadFramebufferToMemory(srcBuffer, static_cast<int>(srcX * srcXFactor), srcY, static_cast<int>(srcWidth * srcXFactor), srcHeight);
1825 				srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
1826 			}
1827 		}
1828 		return false;  // Let the bit copy happen
1829 	} else {
1830 		return false;
1831 	}
1832 }
1833 
NotifyBlockTransferAfter(u32 dstBasePtr,int dstStride,int dstX,int dstY,u32 srcBasePtr,int srcStride,int srcX,int srcY,int width,int height,int bpp,u32 skipDrawReason)1834 void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
1835 	// If it's a block transfer direct to the screen, and we're not using buffers, draw immediately.
1836 	// We may still do a partial block draw below if this doesn't pass.
1837 	if (!useBufferedRendering_ && dstStride >= 480 && width >= 480 && height == 272) {
1838 		bool isPrevDisplayBuffer = PrevDisplayFramebufAddr() == dstBasePtr;
1839 		bool isDisplayBuffer = DisplayFramebufAddr() == dstBasePtr;
1840 		if (isPrevDisplayBuffer || isDisplayBuffer) {
1841 			FlushBeforeCopy();
1842 			DrawFramebufferToOutput(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, dstStride);
1843 			return;
1844 		}
1845 	}
1846 
1847 	if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
1848 		VirtualFramebuffer *dstBuffer = 0;
1849 		VirtualFramebuffer *srcBuffer = 0;
1850 		int srcWidth = width;
1851 		int srcHeight = height;
1852 		int dstWidth = width;
1853 		int dstHeight = height;
1854 		FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
1855 
1856 		// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
1857 		// the backbuffer. Detect this and have the framebuffermanager draw the pixels.
1858 		if (!useBufferedRendering_ && currentRenderVfb_ != dstBuffer) {
1859 			return;
1860 		}
1861 
1862 		if (dstBuffer && !srcBuffer) {
1863 			WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr);
1864 			FlushBeforeCopy();
1865 			const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
1866 			int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2;
1867 			float dstXFactor = (float)bpp / dstBpp;
1868 			if (dstWidth > dstBuffer->width || dstHeight > dstBuffer->height) {
1869 				// The buffer isn't big enough, and we have a clear hint of size.  Resize.
1870 				// This happens in Valkyrie Profile when uploading video at the ending.
1871 				ResizeFramebufFBO(dstBuffer, dstWidth, dstHeight, false, true);
1872 				// Make sure we don't flop back and forth.
1873 				dstBuffer->newWidth = std::max(dstWidth, (int)dstBuffer->width);
1874 				dstBuffer->newHeight = std::max(dstHeight, (int)dstBuffer->height);
1875 				dstBuffer->lastFrameNewSize = gpuStats.numFlips;
1876 				// Resizing may change the viewport/etc.
1877 				gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
1878 			}
1879 			DrawPixels(dstBuffer, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstBuffer->format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstWidth * dstXFactor), dstHeight);
1880 			SetColorUpdated(dstBuffer, skipDrawReason);
1881 			RebindFramebuffer("RebindFramebuffer - NotifyBlockTransferAfter");
1882 		}
1883 	}
1884 }
1885 
SetSafeSize(u16 w,u16 h)1886 void FramebufferManagerCommon::SetSafeSize(u16 w, u16 h) {
1887 	VirtualFramebuffer *vfb = currentRenderVfb_;
1888 	if (vfb) {
1889 		vfb->safeWidth = std::min(vfb->bufferWidth, std::max(vfb->safeWidth, w));
1890 		vfb->safeHeight = std::min(vfb->bufferHeight, std::max(vfb->safeHeight, h));
1891 	}
1892 }
1893 
Resized()1894 void FramebufferManagerCommon::Resized() {
1895 	gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
1896 
1897 	int w, h, scaleFactor;
1898 	presentation_->CalculateRenderResolution(&w, &h, &scaleFactor, &postShaderIsUpscalingFilter_, &postShaderIsSupersampling_);
1899 	PSP_CoreParameter().renderWidth = w;
1900 	PSP_CoreParameter().renderHeight = h;
1901 	PSP_CoreParameter().renderScaleFactor = scaleFactor;
1902 
1903 	if (UpdateSize()) {
1904 		DestroyAllFBOs();
1905 	}
1906 
1907 	// Might have a new post shader - let's compile it.
1908 	presentation_->UpdatePostShader();
1909 
1910 #ifdef _WIN32
1911 	// Seems related - if you're ok with numbers all the time, show some more :)
1912 	if (g_Config.iShowFPSCounter != 0) {
1913 		ShowScreenResolution();
1914 	}
1915 #endif
1916 }
1917 
DestroyAllFBOs()1918 void FramebufferManagerCommon::DestroyAllFBOs() {
1919 	currentRenderVfb_ = nullptr;
1920 	displayFramebuf_ = nullptr;
1921 	prevDisplayFramebuf_ = nullptr;
1922 	prevPrevDisplayFramebuf_ = nullptr;
1923 
1924 	for (VirtualFramebuffer *vfb : vfbs_) {
1925 		INFO_LOG(FRAMEBUF, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format);
1926 		DestroyFramebuf(vfb);
1927 	}
1928 	vfbs_.clear();
1929 
1930 	for (VirtualFramebuffer *vfb : bvfbs_) {
1931 		DestroyFramebuf(vfb);
1932 	}
1933 	bvfbs_.clear();
1934 
1935 	for (auto &tempFB : tempFBOs_) {
1936 		tempFB.second.fbo->Release();
1937 	}
1938 	tempFBOs_.clear();
1939 
1940 	for (auto iter : fbosToDelete_) {
1941 		iter->Release();
1942 	}
1943 	fbosToDelete_.clear();
1944 }
1945 
GetTempFBO(TempFBO reason,u16 w,u16 h)1946 Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u16 h) {
1947 	u64 key = ((u64)reason << 48) | ((u32)w << 16) | h;
1948 	auto it = tempFBOs_.find(key);
1949 	if (it != tempFBOs_.end()) {
1950 		it->second.last_frame_used = gpuStats.numFlips;
1951 		return it->second.fbo;
1952 	}
1953 
1954 	bool z_stencil = reason == TempFBO::STENCIL;
1955 	char name[128];
1956 	snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w, h, z_stencil ? "_depth" : "");
1957 	Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, z_stencil, name });
1958 	if (!fbo) {
1959 		return nullptr;
1960 	}
1961 
1962 	const TempFBOInfo info = { fbo, gpuStats.numFlips };
1963 	tempFBOs_[key] = info;
1964 	return fbo;
1965 }
1966 
UpdateFramebufUsage(VirtualFramebuffer * vfb)1967 void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) {
1968 	auto checkFlag = [&](u16 flag, int last_frame) {
1969 		if (vfb->usageFlags & flag) {
1970 			const int age = frameLastFramebufUsed_ - last_frame;
1971 			if (age > FBO_OLD_USAGE_FLAG) {
1972 				vfb->usageFlags &= ~flag;
1973 			}
1974 		}
1975 	};
1976 
1977 	checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed);
1978 	checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used);
1979 	checkFlag(FB_USAGE_RENDERTARGET, vfb->last_frame_render);
1980 	checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut);
1981 }
1982 
ShowScreenResolution()1983 void FramebufferManagerCommon::ShowScreenResolution() {
1984 	auto gr = GetI18NCategory("Graphics");
1985 
1986 	std::ostringstream messageStream;
1987 	messageStream << gr->T("Internal Resolution") << ": ";
1988 	messageStream << PSP_CoreParameter().renderWidth << "x" << PSP_CoreParameter().renderHeight << " ";
1989 	if (postShaderIsUpscalingFilter_) {
1990 		messageStream << gr->T("(upscaling)") << " ";
1991 	} else if (postShaderIsSupersampling_) {
1992 		messageStream << gr->T("(supersampling)") << " ";
1993 	}
1994 	messageStream << gr->T("Window Size") << ": ";
1995 	messageStream << PSP_CoreParameter().pixelWidth << "x" << PSP_CoreParameter().pixelHeight;
1996 
1997 	host->NotifyUserMessage(messageStream.str(), 2.0f, 0xFFFFFF, "resize");
1998 	INFO_LOG(SYSTEM, "%s", messageStream.str().c_str());
1999 }
2000 
2001 // We might also want to implement an asynchronous callback-style version of this. Would probably
2002 // only be possible to implement optimally on Vulkan, but on GL and D3D11 we could do pixel buffers
2003 // and read on the next frame, then call the callback. PackFramebufferAsync_ on OpenGL already does something similar.
2004 //
2005 // The main use cases for this are:
2006 // * GE debugging(in practice async will not matter because it will stall anyway.)
2007 // * Video file recording(would probably be great if it was async.)
2008 // * Screenshots(benefit slightly from async.)
2009 // * Save state screenshots(could probably be async but need to manage the stall.)
GetFramebuffer(u32 fb_address,int fb_stride,GEBufferFormat format,GPUDebugBuffer & buffer,int maxRes)2010 bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) {
2011 	VirtualFramebuffer *vfb = currentRenderVfb_;
2012 	if (!vfb) {
2013 		vfb = GetVFBAt(fb_address);
2014 	}
2015 
2016 	if (!vfb) {
2017 		if (!Memory::IsValidAddress(fb_address))
2018 			return false;
2019 		// If there's no vfb and we're drawing there, must be memory?
2020 		buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, format);
2021 		return true;
2022 	}
2023 
2024 	int w = vfb->renderWidth, h = vfb->renderHeight;
2025 
2026 	Draw::Framebuffer *bound = nullptr;
2027 
2028 	if (vfb->fbo) {
2029 		if (maxRes > 0 && vfb->renderWidth > vfb->width * maxRes) {
2030 			w = vfb->width * maxRes;
2031 			h = vfb->height * maxRes;
2032 
2033 			Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::COPY, w, h);
2034 			VirtualFramebuffer tempVfb = *vfb;
2035 			tempVfb.fbo = tempFBO;
2036 			tempVfb.bufferWidth = vfb->width;
2037 			tempVfb.bufferHeight = vfb->height;
2038 			tempVfb.renderWidth = w;
2039 			tempVfb.renderHeight = h;
2040 			tempVfb.renderScaleFactor = (float)maxRes;
2041 			BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0, "Blit_GetFramebuffer");
2042 
2043 			bound = tempFBO;
2044 		} else {
2045 			bound = vfb->fbo;
2046 		}
2047 	}
2048 
2049 	if (!useBufferedRendering_) {
2050 		// Safety check.
2051 		w = std::min(w, PSP_CoreParameter().pixelWidth);
2052 		h = std::min(h, PSP_CoreParameter().pixelHeight);
2053 	}
2054 
2055 	// TODO: Maybe should handle flipY inside CopyFramebufferToMemorySync somehow?
2056 	bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
2057 	buffer.Allocate(w, h, GE_FORMAT_8888, flipY);
2058 	bool retval = draw_->CopyFramebufferToMemorySync(bound, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w, "GetFramebuffer");
2059 	gpuStats.numReadbacks++;
2060 	// After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
2061 	gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
2062 	// We may have blitted to a temp FBO.
2063 	RebindFramebuffer("RebindFramebuffer - GetFramebuffer");
2064 	return retval;
2065 }
2066 
GetDepthbuffer(u32 fb_address,int fb_stride,u32 z_address,int z_stride,GPUDebugBuffer & buffer)2067 bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) {
2068 	VirtualFramebuffer *vfb = currentRenderVfb_;
2069 	if (!vfb) {
2070 		vfb = GetVFBAt(fb_address);
2071 	}
2072 
2073 	if (!vfb) {
2074 		if (!Memory::IsValidAddress(z_address))
2075 			return false;
2076 		// If there's no vfb and we're drawing there, must be memory?
2077 		buffer = GPUDebugBuffer(Memory::GetPointer(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT);
2078 		return true;
2079 	}
2080 
2081 	int w = vfb->renderWidth;
2082 	int h = vfb->renderHeight;
2083 	if (!useBufferedRendering_) {
2084 		// Safety check.
2085 		w = std::min(w, PSP_CoreParameter().pixelWidth);
2086 		h = std::min(h, PSP_CoreParameter().pixelHeight);
2087 	}
2088 
2089 	bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
2090 	if (gstate_c.Supports(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
2091 		buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY);
2092 	} else {
2093 		buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY);
2094 	}
2095 	// No need to free on failure, that's the caller's job (it likely will reuse a buffer.)
2096 	bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer");
2097 	// After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
2098 	gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
2099 	// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
2100 	RebindFramebuffer("RebindFramebuffer - GetDepthbuffer");
2101 	return retval;
2102 }
2103 
GetStencilbuffer(u32 fb_address,int fb_stride,GPUDebugBuffer & buffer)2104 bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) {
2105 	VirtualFramebuffer *vfb = currentRenderVfb_;
2106 	if (!vfb) {
2107 		vfb = GetVFBAt(fb_address);
2108 	}
2109 
2110 	if (!vfb) {
2111 		if (!Memory::IsValidAddress(fb_address))
2112 			return false;
2113 		// If there's no vfb and we're drawing there, must be memory?
2114 		// TODO: Actually get the stencil.
2115 		buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, GPU_DBG_FORMAT_8888);
2116 		return true;
2117 	}
2118 
2119 	int w = vfb->renderWidth;
2120 	int h = vfb->renderHeight;
2121 	if (!useBufferedRendering_) {
2122 		// Safety check.
2123 		w = std::min(w, PSP_CoreParameter().pixelWidth);
2124 		h = std::min(h, PSP_CoreParameter().pixelHeight);
2125 	}
2126 
2127 	bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
2128 	// No need to free on failure, the caller/destructor will do that.  Usually this is a reused buffer, anyway.
2129 	buffer.Allocate(w, h, GPU_DBG_FORMAT_8BIT, flipY);
2130 	bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_STENCIL_BIT, 0, 0, w,h, Draw::DataFormat::S8, buffer.GetData(), w, "GetStencilbuffer");
2131 	// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
2132 	RebindFramebuffer("RebindFramebuffer - GetStencilbuffer");
2133 	return retval;
2134 }
2135 
GetOutputFramebuffer(GPUDebugBuffer & buffer)2136 bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
2137 	int w, h;
2138 	draw_->GetFramebufferDimensions(nullptr, &w, &h);
2139 	Draw::DataFormat fmt = draw_->PreferredFramebufferReadbackFormat(nullptr);
2140 	// Ignore preferred formats other than BGRA.
2141 	if (fmt != Draw::DataFormat::B8G8R8A8_UNORM)
2142 		fmt = Draw::DataFormat::R8G8B8A8_UNORM;
2143 	buffer.Allocate(w, h, fmt == Draw::DataFormat::R8G8B8A8_UNORM ? GPU_DBG_FORMAT_8888 : GPU_DBG_FORMAT_8888_BGRA, false);
2144 	bool retval = draw_->CopyFramebufferToMemorySync(nullptr, Draw::FB_COLOR_BIT, 0, 0, w, h, fmt, buffer.GetData(), w, "GetOutputFramebuffer");
2145 	// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
2146 	RebindFramebuffer("RebindFramebuffer - GetOutputFramebuffer");
2147 	return retval;
2148 }
2149 
2150 // This function takes an already correctly-sized framebuffer and packs it into RAM.
2151 // Does not need to account for scaling.
2152 // Color conversion is currently done on CPU but should theoretically be done on GPU.
2153 // (Except using the GPU might cause problems because of various implementations'
2154 // dithering behavior and games that expect exact colors like Danganronpa, so we
2155 // can't entirely be rid of the CPU path.) -- unknown
PackFramebufferSync_(VirtualFramebuffer * vfb,int x,int y,int w,int h)2156 void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
2157 	if (!vfb->fbo) {
2158 		ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferSync_: vfb->fbo == 0");
2159 		return;
2160 	}
2161 
2162 	if (w <= 0 || h <= 0) {
2163 		ERROR_LOG(G3D, "Bad inputs to PackFramebufferSync_: %d %d %d %d", x, y, w, h);
2164 		return;
2165 	}
2166 
2167 	const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;
2168 
2169 	Draw::DataFormat destFormat = GEFormatToThin3D(vfb->format);
2170 	const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
2171 
2172 	const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
2173 	const int dstSize = (h * vfb->fb_stride + w - 1) * dstBpp;
2174 
2175 	if (!Memory::IsValidRange(fb_address + dstByteOffset, dstSize)) {
2176 		ERROR_LOG_REPORT(G3D, "PackFramebufferSync_ would write outside of memory, ignoring");
2177 		return;
2178 	}
2179 
2180 	u8 *destPtr = Memory::GetPointer(fb_address + dstByteOffset);
2181 
2182 	// We always need to convert from the framebuffer native format.
2183 	// Right now that's always 8888.
2184 	DEBUG_LOG(G3D, "Reading framebuffer to mem, fb_address = %08x, ptr=%p", fb_address, destPtr);
2185 
2186 	if (destPtr) {
2187 		draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, x, y, w, h, destFormat, destPtr, vfb->fb_stride, "PackFramebufferSync_");
2188 		char tag[128];
2189 		size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->format));
2190 		NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
2191 	} else {
2192 		ERROR_LOG(G3D, "PackFramebufferSync_: Tried to readback to bad address %08x (stride = %d)", fb_address + dstByteOffset, vfb->fb_stride);
2193 	}
2194 
2195 	gpuStats.numReadbacks++;
2196 }
2197 
ReadFramebufferToMemory(VirtualFramebuffer * vfb,int x,int y,int w,int h)2198 void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
2199 	// Clamp to bufferWidth. Sometimes block transfers can cause this to hit.
2200 	if (x + w >= vfb->bufferWidth) {
2201 		w = vfb->bufferWidth - x;
2202 	}
2203 	if (vfb && vfb->fbo) {
2204 		// We'll pseudo-blit framebuffers here to get a resized version of vfb.
2205 		if (gameUsesSequentialCopies_) {
2206 			// Ignore the x/y/etc., read the entire thing.
2207 			x = 0;
2208 			y = 0;
2209 			w = vfb->width;
2210 			h = vfb->height;
2211 			vfb->memoryUpdated = true;
2212 			vfb->usageFlags |= FB_USAGE_DOWNLOAD;
2213 		} else if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
2214 			// Mark it as fully downloaded until next render to it.
2215 			vfb->memoryUpdated = true;
2216 			vfb->usageFlags |= FB_USAGE_DOWNLOAD;
2217 		} else {
2218 			// Let's try to set the flag eventually, if the game copies a lot.
2219 			// Some games copy subranges very frequently.
2220 			const static int FREQUENT_SEQUENTIAL_COPIES = 3;
2221 			static int frameLastCopy = 0;
2222 			static u32 bufferLastCopy = 0;
2223 			static int copiesThisFrame = 0;
2224 			if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) {
2225 				frameLastCopy = gpuStats.numFlips;
2226 				bufferLastCopy = vfb->fb_address;
2227 				copiesThisFrame = 0;
2228 			}
2229 			if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) {
2230 				gameUsesSequentialCopies_ = true;
2231 			}
2232 		}
2233 
2234 		if (vfb->renderWidth == vfb->width && vfb->renderHeight == vfb->height) {
2235 			// No need to blit
2236 			PackFramebufferSync_(vfb, x, y, w, h);
2237 		} else {
2238 			VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb);
2239 			if (nvfb) {
2240 				BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, "Blit_ReadFramebufferToMemory");
2241 				PackFramebufferSync_(nvfb, x, y, w, h);
2242 			}
2243 		}
2244 
2245 		textureCache_->ForgetLastTexture();
2246 		RebindFramebuffer("RebindFramebuffer - ReadFramebufferToMemory");
2247 	}
2248 }
2249 
FlushBeforeCopy()2250 void FramebufferManagerCommon::FlushBeforeCopy() {
2251 	// Flush anything not yet drawn before blitting, downloading, or uploading.
2252 	// This might be a stalled list, or unflushed before a block transfer, etc.
2253 
2254 	// TODO: It's really bad that we are calling SetRenderFramebuffer here with
2255 	// all the irrelevant state checking it'll use to decide what to do. Should
2256 	// do something more focused here.
2257 	SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
2258 	drawEngine_->DispatchFlush();
2259 }
2260 
DownloadFramebufferForClut(u32 fb_address,u32 loadBytes)2261 void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
2262 	VirtualFramebuffer *vfb = GetVFBAt(fb_address);
2263 	if (vfb && vfb->fb_stride != 0) {
2264 		const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
2265 		int x = 0;
2266 		int y = 0;
2267 		int pixels = loadBytes / bpp;
2268 		// The height will be 1 for each stride or part thereof.
2269 		int w = std::min(pixels % vfb->fb_stride, (int)vfb->width);
2270 		int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height);
2271 
2272 		// We might still have a pending draw to the fb in question, flush if so.
2273 		FlushBeforeCopy();
2274 
2275 		// No need to download if we already have it.
2276 		if (w > 0 && h > 0 && !vfb->memoryUpdated && vfb->clutUpdatedBytes < loadBytes) {
2277 			// We intentionally don't try to optimize into a full download here - we don't want to over download.
2278 
2279 			// CLUT framebuffers are often incorrectly estimated in size.
2280 			if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
2281 				vfb->memoryUpdated = true;
2282 			}
2283 			vfb->clutUpdatedBytes = loadBytes;
2284 
2285 			// We'll pseudo-blit framebuffers here to get a resized version of vfb.
2286 			VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb);
2287 			if (nvfb) {
2288 				BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, "Blit_DownloadFramebufferForClut");
2289 				PackFramebufferSync_(nvfb, x, y, w, h);
2290 			}
2291 
2292 			textureCache_->ForgetLastTexture();
2293 			RebindFramebuffer("RebindFramebuffer - DownloadFramebufferForClut");
2294 		}
2295 	}
2296 }
2297 
RebindFramebuffer(const char * tag)2298 void FramebufferManagerCommon::RebindFramebuffer(const char *tag) {
2299 	shaderManager_->DirtyLastShader();
2300 	if (currentRenderVfb_ && currentRenderVfb_->fbo) {
2301 		draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag);
2302 	} else {
2303 		// Should this even happen?  It could while debugging, but maybe we can just skip binding at all.
2304 		draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "RebindFramebuffer_Bad");
2305 	}
2306 }
2307 
GetFramebufferList()2308 std::vector<FramebufferInfo> FramebufferManagerCommon::GetFramebufferList() {
2309 	std::vector<FramebufferInfo> list;
2310 
2311 	for (size_t i = 0; i < vfbs_.size(); ++i) {
2312 		VirtualFramebuffer *vfb = vfbs_[i];
2313 
2314 		FramebufferInfo info;
2315 		info.fb_address = vfb->fb_address;
2316 		info.z_address = vfb->z_address;
2317 		info.format = vfb->format;
2318 		info.width = vfb->width;
2319 		info.height = vfb->height;
2320 		info.fbo = vfb->fbo;
2321 		list.push_back(info);
2322 	}
2323 
2324 	return list;
2325 }
2326 
2327 template <typename T>
DoRelease(T * & obj)2328 static void DoRelease(T *&obj) {
2329 	if (obj)
2330 		obj->Release();
2331 	obj = nullptr;
2332 }
2333 
DeviceLost()2334 void FramebufferManagerCommon::DeviceLost() {
2335 	DestroyAllFBOs();
2336 	for (int i = 0; i < 3; i++) {
2337 		for (int j = 0; j < 3; j++) {
2338 			DoRelease(reinterpretFromTo_[i][j]);
2339 		}
2340 	}
2341 	DoRelease(reinterpretVBuf_);
2342 	DoRelease(reinterpretSampler_);
2343 	DoRelease(reinterpretVS_);
2344 	presentation_->DeviceLost();
2345 	draw_ = nullptr;
2346 }
2347 
DeviceRestore(Draw::DrawContext * draw)2348 void FramebufferManagerCommon::DeviceRestore(Draw::DrawContext *draw) {
2349 	draw_ = draw;
2350 	presentation_->DeviceRestore(draw);
2351 }
2352