1 // Copyright (c) 2012- PPSSPP Project.
2
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
11
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18 #include <algorithm>
19 #include <sstream>
20 #include <cmath>
21
22 #include "Common/GPU/thin3d.h"
23 #include "Common/GPU/OpenGL/GLFeatures.h"
24 #include "Common/Data/Convert/ColorConv.h"
25 #include "Common/Data/Text/I18n.h"
26 #include "Common/Common.h"
27 #include "Core/Config.h"
28 #include "Core/ConfigValues.h"
29 #include "Core/Core.h"
30 #include "Core/CoreParameter.h"
31 #include "Core/Debugger/MemBlockInfo.h"
32 #include "Core/Host.h"
33 #include "Core/MIPS/MIPS.h"
34 #include "Core/Reporting.h"
35 #include "GPU/Common/DrawEngineCommon.h"
36 #include "GPU/Common/FramebufferManagerCommon.h"
37 #include "GPU/Common/PostShader.h"
38 #include "GPU/Common/PresentationCommon.h"
39 #include "GPU/Common/TextureCacheCommon.h"
40 #include "GPU/Common/ReinterpretFramebuffer.h"
41 #include "GPU/Debugger/Record.h"
42 #include "GPU/Debugger/Stepping.h"
43 #include "GPU/GPUInterface.h"
44 #include "GPU/GPUState.h"
45
FramebufferManagerCommon(Draw::DrawContext * draw)46 FramebufferManagerCommon::FramebufferManagerCommon(Draw::DrawContext *draw)
47 : draw_(draw),
48 displayFormat_(GE_FORMAT_565) {
49 presentation_ = new PresentationCommon(draw);
50 }
51
~FramebufferManagerCommon()52 FramebufferManagerCommon::~FramebufferManagerCommon() {
53 DeviceLost();
54
55 DecimateFBOs();
56 for (auto vfb : vfbs_) {
57 DestroyFramebuf(vfb);
58 }
59 vfbs_.clear();
60
61 for (auto &tempFB : tempFBOs_) {
62 tempFB.second.fbo->Release();
63 }
64 tempFBOs_.clear();
65
66 // Do the same for ReadFramebuffersToMemory's VFBs
67 for (auto vfb : bvfbs_) {
68 DestroyFramebuf(vfb);
69 }
70 bvfbs_.clear();
71
72 delete presentation_;
73 }
74
Init()75 void FramebufferManagerCommon::Init() {
76 // We may need to override the render size if the shader is upscaling or SSAA.
77 Resized();
78 }
79
UpdateSize()80 bool FramebufferManagerCommon::UpdateSize() {
81 const bool newRender = renderWidth_ != (float)PSP_CoreParameter().renderWidth || renderHeight_ != (float)PSP_CoreParameter().renderHeight;
82 const bool newSettings = bloomHack_ != g_Config.iBloomHack || useBufferedRendering_ != (g_Config.iRenderingMode != FB_NON_BUFFERED_MODE);
83
84 renderWidth_ = (float)PSP_CoreParameter().renderWidth;
85 renderHeight_ = (float)PSP_CoreParameter().renderHeight;
86 renderScaleFactor_ = (float)PSP_CoreParameter().renderScaleFactor;
87 pixelWidth_ = PSP_CoreParameter().pixelWidth;
88 pixelHeight_ = PSP_CoreParameter().pixelHeight;
89 bloomHack_ = g_Config.iBloomHack;
90 useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
91
92 presentation_->UpdateSize(pixelWidth_, pixelHeight_, renderWidth_, renderHeight_);
93
94 return newRender || newSettings;
95 }
96
BeginFrame()97 void FramebufferManagerCommon::BeginFrame() {
98 DecimateFBOs();
99 currentRenderVfb_ = nullptr;
100 }
101
SetDisplayFramebuffer(u32 framebuf,u32 stride,GEBufferFormat format)102 void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
103 displayFramebufPtr_ = framebuf;
104 displayStride_ = stride;
105 displayFormat_ = format;
106 GPURecord::NotifyDisplay(framebuf, stride, format);
107 }
108
GetVFBAt(u32 addr)109 VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) {
110 addr &= 0x3FFFFFFF;
111 VirtualFramebuffer *match = nullptr;
112 for (size_t i = 0; i < vfbs_.size(); ++i) {
113 VirtualFramebuffer *v = vfbs_[i];
114 if (v->fb_address == addr) {
115 // Could check w too but whatever
116 if (match == nullptr || match->last_frame_render < v->last_frame_render) {
117 match = v;
118 }
119 }
120 }
121 return match;
122 }
123
ColorBufferByteSize(const VirtualFramebuffer * vfb) const124 u32 FramebufferManagerCommon::ColorBufferByteSize(const VirtualFramebuffer *vfb) const {
125 return vfb->fb_stride * vfb->height * (vfb->format == GE_FORMAT_8888 ? 4 : 2);
126 }
127
ShouldDownloadFramebuffer(const VirtualFramebuffer * vfb) const128 bool FramebufferManagerCommon::ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const {
129 return PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000;
130 }
131
132 // Heuristics to figure out the size of FBO to create.
133 // TODO: Possibly differentiate on whether through mode is used (since in through mode, viewport is meaningless?)
EstimateDrawingSize(u32 fb_address,GEBufferFormat fb_format,int viewport_width,int viewport_height,int region_width,int region_height,int scissor_width,int scissor_height,int fb_stride,int & drawing_width,int & drawing_height)134 void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int fb_stride, int &drawing_width, int &drawing_height) {
135 static const int MAX_FRAMEBUF_HEIGHT = 512;
136
137 // Games don't always set any of these. Take the greatest parameter that looks valid based on stride.
138 if (viewport_width > 4 && viewport_width <= fb_stride && viewport_height > 0) {
139 drawing_width = viewport_width;
140 drawing_height = viewport_height;
141 // Some games specify a viewport with 0.5, but don't have VRAM for 273. 480x272 is the buffer size.
142 if (viewport_width == 481 && region_width == 480 && viewport_height == 273 && region_height == 272) {
143 drawing_width = 480;
144 drawing_height = 272;
145 }
146 // Sometimes region is set larger than the VRAM for the framebuffer.
147 // However, in one game it's correctly set as a larger height (see #7277) with the same width.
148 // A bit of a hack, but we try to handle that unusual case here.
149 if (region_width <= fb_stride && (region_width > drawing_width || (region_width == drawing_width && region_height > drawing_height)) && region_height <= MAX_FRAMEBUF_HEIGHT) {
150 drawing_width = region_width;
151 drawing_height = std::max(drawing_height, region_height);
152 }
153 // Scissor is often set to a subsection of the framebuffer, so we pay the least attention to it.
154 if (scissor_width <= fb_stride && scissor_width > drawing_width && scissor_height <= MAX_FRAMEBUF_HEIGHT) {
155 drawing_width = scissor_width;
156 drawing_height = std::max(drawing_height, scissor_height);
157 }
158 } else {
159 // If viewport wasn't valid, let's just take the greatest anything regardless of stride.
160 drawing_width = std::min(std::max(region_width, scissor_width), fb_stride);
161 drawing_height = std::max(region_height, scissor_height);
162 }
163
164 if (scissor_width == 481 && region_width == 480 && scissor_height == 273 && region_height == 272) {
165 drawing_width = 480;
166 drawing_height = 272;
167 }
168
169 // Assume no buffer is > 512 tall, it couldn't be textured or displayed fully if so.
170 if (drawing_height >= MAX_FRAMEBUF_HEIGHT) {
171 if (region_height < MAX_FRAMEBUF_HEIGHT) {
172 drawing_height = region_height;
173 } else if (scissor_height < MAX_FRAMEBUF_HEIGHT) {
174 drawing_height = scissor_height;
175 }
176 }
177
178 if (viewport_width != region_width) {
179 // The majority of the time, these are equal. If not, let's check what we know.
180 u32 nearest_address = 0xFFFFFFFF;
181 for (size_t i = 0; i < vfbs_.size(); ++i) {
182 const u32 other_address = vfbs_[i]->fb_address & 0x3FFFFFFF;
183 if (other_address > fb_address && other_address < nearest_address) {
184 nearest_address = other_address;
185 }
186 }
187
188 // Unless the game is using overlapping buffers, the next buffer should be far enough away.
189 // This catches some cases where we can know this.
190 // Hmm. The problem is that we could only catch it for the first of two buffers...
191 const u32 bpp = fb_format == GE_FORMAT_8888 ? 4 : 2;
192 int avail_height = (nearest_address - fb_address) / (fb_stride * bpp);
193 if (avail_height < drawing_height && avail_height == region_height) {
194 drawing_width = std::min(region_width, fb_stride);
195 drawing_height = avail_height;
196 }
197
198 // Some games draw buffers interleaved, with a high stride/region/scissor but default viewport.
199 if (fb_stride == 1024 && region_width == 1024 && scissor_width == 1024) {
200 drawing_width = 1024;
201 }
202 }
203
204 DEBUG_LOG(G3D, "Est: %08x V: %ix%i, R: %ix%i, S: %ix%i, STR: %i, THR:%i, Z:%08x = %ix%i", fb_address, viewport_width,viewport_height, region_width, region_height, scissor_width, scissor_height, fb_stride, gstate.isModeThrough(), gstate.isDepthWriteEnabled() ? gstate.getDepthBufAddress() : 0, drawing_width, drawing_height);
205 }
206
GetFramebufferHeuristicInputs(FramebufferHeuristicParams * params,const GPUgstate & gstate)207 void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPUgstate &gstate) {
208 params->fb_address = (gstate.getFrameBufRawAddress() & 0x3FFFFFFF) | 0x04000000; // GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM.
209 params->fb_stride = gstate.FrameBufStride();
210
211 params->z_address = (gstate.getDepthBufRawAddress() & 0x3FFFFFFF) | 0x04000000;
212 params->z_stride = gstate.DepthBufStride();
213
214 if (params->z_address == params->fb_address) {
215 // Probably indicates that the game doesn't care about Z for this VFB.
216 // Let's avoid matching it for Z copies and other shenanigans.
217 params->z_address = 0;
218 params->z_stride = 0;
219 }
220
221 params->fmt = gstate.FrameBufFormat();
222
223 params->isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
224 // Technically, it may write depth later, but we're trying to detect it only when it's really true.
225 if (gstate.isModeClear()) {
226 // Not quite seeing how this makes sense..
227 params->isWritingDepth = !gstate.isClearModeDepthMask() && gstate.isDepthWriteEnabled();
228 } else {
229 params->isWritingDepth = gstate.isDepthWriteEnabled();
230 }
231 params->isDrawing = !gstate.isModeClear() || !gstate.isClearModeColorMask() || !gstate.isClearModeAlphaMask();
232 params->isModeThrough = gstate.isModeThrough();
233
234 // Viewport-X1 and Y1 are not the upper left corner, but half the width/height. A bit confusing.
235 float vpx = gstate.getViewportXScale();
236 float vpy = gstate.getViewportYScale();
237
238 // Work around problem in F1 Grand Prix, where it draws in through mode with a bogus viewport.
239 // We set bad values to 0 which causes the framebuffer size heuristic to rely on the other parameters instead.
240 if (std::isnan(vpx) || vpx > 10000000.0f) {
241 vpx = 0.f;
242 }
243 if (std::isnan(vpy) || vpy > 10000000.0f) {
244 vpy = 0.f;
245 }
246 params->viewportWidth = (int)(fabsf(vpx) * 2.0f);
247 params->viewportHeight = (int)(fabsf(vpy) * 2.0f);
248 params->regionWidth = gstate.getRegionX2() + 1;
249 params->regionHeight = gstate.getRegionY2() + 1;
250 params->scissorWidth = gstate.getScissorX2() + 1;
251 params->scissorHeight = gstate.getScissorY2() + 1;
252 }
253
DoSetRenderFrameBuffer(const FramebufferHeuristicParams & params,u32 skipDrawReason)254 VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const FramebufferHeuristicParams ¶ms, u32 skipDrawReason) {
255 gstate_c.Clean(DIRTY_FRAMEBUF);
256
257 // Collect all parameters. This whole function has really become a cesspool of heuristics...
258 // but it appears that's what it takes, unless we emulate VRAM layout more accurately somehow.
259
260 // As there are no clear "framebuffer width" and "framebuffer height" registers,
261 // we need to infer the size of the current framebuffer somehow.
262 int drawing_width, drawing_height;
263 EstimateDrawingSize(params.fb_address, params.fmt, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorWidth, params.scissorHeight, std::max(params.fb_stride, 4), drawing_width, drawing_height);
264
265 gstate_c.SetCurRTOffsetX(0);
266 bool vfbFormatChanged = false;
267
268 // Find a matching framebuffer
269 VirtualFramebuffer *vfb = nullptr;
270 for (size_t i = 0; i < vfbs_.size(); ++i) {
271 VirtualFramebuffer *v = vfbs_[i];
272 if (v->fb_address == params.fb_address) {
273 vfb = v;
274 // Update fb stride in case it changed
275 if (vfb->fb_stride != params.fb_stride) {
276 vfb->fb_stride = params.fb_stride;
277 vfbFormatChanged = true;
278 }
279 if (vfb->format != params.fmt) {
280 vfb->format = params.fmt;
281 vfbFormatChanged = true;
282 }
283
284 if (vfb->z_address == 0 && vfb->z_stride == 0 && params.z_stride != 0) {
285 // Got one that was created by CreateRAMFramebuffer. Since it has no depth buffer,
286 // we just recreate it immediately.
287 ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
288 }
289
290 // Keep track, but this isn't really used.
291 vfb->z_stride = params.z_stride;
292 // Heuristic: In throughmode, a higher height could be used. Let's avoid shrinking the buffer.
293 if (params.isModeThrough && (int)vfb->width <= params.fb_stride) {
294 vfb->width = std::max((int)vfb->width, drawing_width);
295 vfb->height = std::max((int)vfb->height, drawing_height);
296 } else {
297 vfb->width = drawing_width;
298 vfb->height = drawing_height;
299 }
300 break;
301 } else if (v->fb_address < params.fb_address && v->fb_address + v->fb_stride * 4 > params.fb_address) {
302 // Possibly a render-to-offset.
303 const u32 bpp = v->format == GE_FORMAT_8888 ? 4 : 2;
304 const int x_offset = (params.fb_address - v->fb_address) / bpp;
305 if (v->format == params.fmt && v->fb_stride == params.fb_stride && x_offset < params.fb_stride && v->height >= drawing_height) {
306 WARN_LOG_REPORT_ONCE(renderoffset, HLE, "Rendering to framebuffer offset: %08x +%dx%d", v->fb_address, x_offset, 0);
307 vfb = v;
308 gstate_c.SetCurRTOffsetX(x_offset);
309 vfb->width = std::max((int)vfb->width, x_offset + drawing_width);
310 // To prevent the newSize code from being confused.
311 drawing_width += x_offset;
312 break;
313 }
314 }
315 }
316
317 if (vfb) {
318 if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
319 // Even if it's not newly wrong, if this is larger we need to resize up.
320 if (vfb->width > vfb->bufferWidth || vfb->height > vfb->bufferHeight) {
321 ResizeFramebufFBO(vfb, vfb->width, vfb->height);
322 } else if (vfb->newWidth != drawing_width || vfb->newHeight != drawing_height) {
323 // If it's newly wrong, or changing every frame, just keep track.
324 vfb->newWidth = drawing_width;
325 vfb->newHeight = drawing_height;
326 vfb->lastFrameNewSize = gpuStats.numFlips;
327 } else if (vfb->lastFrameNewSize + FBO_OLD_AGE < gpuStats.numFlips) {
328 // Okay, it's changed for a while (and stayed that way.) Let's start over.
329 // But only if we really need to, to avoid blinking.
330 bool needsRecreate = vfb->bufferWidth > params.fb_stride;
331 needsRecreate = needsRecreate || vfb->newWidth > vfb->bufferWidth || vfb->newWidth * 2 < vfb->bufferWidth;
332 needsRecreate = needsRecreate || vfb->newHeight > vfb->bufferHeight || vfb->newHeight * 2 < vfb->bufferHeight;
333 if (needsRecreate) {
334 ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
335 // Let's discard this information, might be wrong now.
336 vfb->safeWidth = 0;
337 vfb->safeHeight = 0;
338 } else {
339 // Even though we won't resize it, let's at least change the size params.
340 vfb->width = drawing_width;
341 vfb->height = drawing_height;
342 }
343 }
344 } else {
345 // It's not different, let's keep track of that too.
346 vfb->lastFrameNewSize = gpuStats.numFlips;
347 }
348 }
349
350 // None found? Create one.
351 if (!vfb) {
352 vfb = new VirtualFramebuffer{};
353 vfb->fbo = nullptr;
354 vfb->fb_address = params.fb_address;
355 vfb->fb_stride = params.fb_stride;
356 vfb->z_address = params.z_address;
357 vfb->z_stride = params.z_stride;
358
359 // The other width/height parameters are set in ResizeFramebufFBO below.
360 vfb->width = drawing_width;
361 vfb->height = drawing_height;
362 vfb->newWidth = drawing_width;
363 vfb->newHeight = drawing_height;
364 vfb->lastFrameNewSize = gpuStats.numFlips;
365 vfb->format = params.fmt;
366 vfb->drawnFormat = params.fmt;
367 vfb->usageFlags = FB_USAGE_RENDERTARGET;
368
369 u32 byteSize = ColorBufferByteSize(vfb);
370 if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + byteSize > framebufRangeEnd_) {
371 framebufRangeEnd_ = params.fb_address + byteSize;
372 }
373
374 // This is where we actually create the framebuffer. The true is "force".
375 ResizeFramebufFBO(vfb, drawing_width, drawing_height, true);
376 NotifyRenderFramebufferCreated(vfb);
377
378 SetColorUpdated(vfb, skipDrawReason);
379
380 INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %i x %i x %i", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, vfb->format);
381
382 vfb->last_frame_render = gpuStats.numFlips;
383 frameLastFramebufUsed_ = gpuStats.numFlips;
384 vfbs_.push_back(vfb);
385 currentRenderVfb_ = vfb;
386
387 if (useBufferedRendering_ && !g_Config.bDisableSlowFramebufEffects) {
388 gpu->PerformMemoryUpload(params.fb_address, byteSize);
389 NotifyStencilUpload(params.fb_address, byteSize, StencilUpload::STENCIL_IS_ZERO);
390 // TODO: Is it worth trying to upload the depth buffer?
391 }
392
393 // Let's check for depth buffer overlap. Might be interesting.
394 bool sharingReported = false;
395 for (size_t i = 0, end = vfbs_.size(); i < end; ++i) {
396 if (vfbs_[i]->z_stride != 0 && params.fb_address == vfbs_[i]->z_address) {
397 // If it's clearing it, most likely it just needs more video memory.
398 // Technically it could write something interesting and the other might not clear, but that's not likely.
399 if (params.isDrawing) {
400 if (params.fb_address != params.z_address && vfbs_[i]->fb_address != vfbs_[i]->z_address) {
401 WARN_LOG_REPORT(SCEGE, "FBO created from existing depthbuffer as color, %08x/%08x and %08x/%08x", params.fb_address, params.z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address);
402 }
403 }
404 } else if (params.z_stride != 0 && params.z_address == vfbs_[i]->fb_address) {
405 // If it's clearing it, then it's probably just the reverse of the above case.
406 if (params.isWritingDepth) {
407 WARN_LOG_REPORT(SCEGE, "FBO using existing buffer as depthbuffer, %08x/%08x and %08x/%08x", params.fb_address, params.z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address);
408 }
409 } else if (vfbs_[i]->z_stride != 0 && params.z_address == vfbs_[i]->z_address && params.fb_address != vfbs_[i]->fb_address && !sharingReported) {
410 // This happens a lot, but virtually always it's cleared.
411 // It's possible the other might not clear, but when every game is reported it's not useful.
412 if (params.isWritingDepth) {
413 WARN_LOG(SCEGE, "FBO reusing depthbuffer, c=%08x/d=%08x and c=%08x/d=%08x", params.fb_address, params.z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address);
414 sharingReported = true;
415 }
416 }
417 }
418
419 // We already have it!
420 } else if (vfb != currentRenderVfb_) {
421 // Use it as a render target.
422 DEBUG_LOG(FRAMEBUF, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
423 vfb->usageFlags |= FB_USAGE_RENDERTARGET;
424 vfb->last_frame_render = gpuStats.numFlips;
425 frameLastFramebufUsed_ = gpuStats.numFlips;
426 vfb->dirtyAfterDisplay = true;
427 if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
428 vfb->reallyDirtyAfterDisplay = true;
429
430 VirtualFramebuffer *prev = currentRenderVfb_;
431 currentRenderVfb_ = vfb;
432 NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth);
433 } else {
434 vfb->last_frame_render = gpuStats.numFlips;
435 frameLastFramebufUsed_ = gpuStats.numFlips;
436 vfb->dirtyAfterDisplay = true;
437 if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
438 vfb->reallyDirtyAfterDisplay = true;
439
440 NotifyRenderFramebufferUpdated(vfb, vfbFormatChanged);
441 }
442
443 gstate_c.curRTWidth = vfb->width;
444 gstate_c.curRTHeight = vfb->height;
445 gstate_c.curRTRenderWidth = vfb->renderWidth;
446 gstate_c.curRTRenderHeight = vfb->renderHeight;
447 return vfb;
448 }
449
DestroyFramebuf(VirtualFramebuffer * v)450 void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
451 // Notify the texture cache of both the color and depth buffers.
452 textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
453 if (v->fbo) {
454 v->fbo->Release();
455 v->fbo = nullptr;
456 }
457
458 // Wipe some pointers
459 if (currentRenderVfb_ == v)
460 currentRenderVfb_ = nullptr;
461 if (displayFramebuf_ == v)
462 displayFramebuf_ = nullptr;
463 if (prevDisplayFramebuf_ == v)
464 prevDisplayFramebuf_ = nullptr;
465 if (prevPrevDisplayFramebuf_ == v)
466 prevPrevDisplayFramebuf_ = nullptr;
467
468 delete v;
469 }
470
BlitFramebufferDepth(VirtualFramebuffer * src,VirtualFramebuffer * dst)471 void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst) {
472 int w = std::min(src->renderWidth, dst->renderWidth);
473 int h = std::min(src->renderHeight, dst->renderHeight);
474
475 // Note: We prefer Blit ahead of Copy here, since at least on GL, Copy will always also copy stencil which we don't want. See #9740.
476 if (gstate_c.Supports(GPU_SUPPORTS_FRAMEBUFFER_BLIT_TO_DEPTH)) {
477 draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth");
478 RebindFramebuffer("After BlitFramebufferDepth");
479 } else if (gstate_c.Supports(GPU_SUPPORTS_COPY_IMAGE)) {
480 draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, w, h, 1, Draw::FB_DEPTH_BIT, "BlitFramebufferDepth");
481 RebindFramebuffer("After BlitFramebufferDepth");
482 }
483 dst->last_frame_depth_updated = gpuStats.numFlips;
484 }
485
NotifyRenderFramebufferCreated(VirtualFramebuffer * vfb)486 void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) {
487 if (!useBufferedRendering_) {
488 // Let's ignore rendering to targets that have not (yet) been displayed.
489 gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
490 } else if (currentRenderVfb_) {
491 DownloadFramebufferOnSwitch(currentRenderVfb_);
492 }
493
494 textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
495
496 // Ugly...
497 if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
498 gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
499 }
500 if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
501 gstate_c.Dirty(DIRTY_PROJMATRIX);
502 gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
503 }
504 }
505
NotifyRenderFramebufferUpdated(VirtualFramebuffer * vfb,bool vfbFormatChanged)506 void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) {
507 if (vfbFormatChanged) {
508 textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
509 if (vfb->drawnFormat != vfb->format) {
510 ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format);
511 }
512 }
513
514 // ugly...
515 if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
516 gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
517 }
518 if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
519 gstate_c.Dirty(DIRTY_PROJMATRIX);
520 gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
521 }
522 }
523
524 // Can't easily dynamically create these strings, we just pass along the pointer.
525 static const char *reinterpretStrings[3][3] = {
526 {
527 "self_reinterpret_565",
528 "reinterpret_565_to_5551",
529 "reinterpret_565_to_4444",
530 },
531 {
532 "reinterpret_5551_to_565",
533 "self_reinterpret_5551",
534 "reinterpret_5551_to_4444",
535 },
536 {
537 "reinterpret_4444_to_565",
538 "reinterpret_4444_to_5551",
539 "self_reinterpret_4444",
540 },
541 };
542
ReinterpretFramebuffer(VirtualFramebuffer * vfb,GEBufferFormat oldFormat,GEBufferFormat newFormat)543 void FramebufferManagerCommon::ReinterpretFramebuffer(VirtualFramebuffer *vfb, GEBufferFormat oldFormat, GEBufferFormat newFormat) {
544 if (!useBufferedRendering_ || !vfb->fbo) {
545 return;
546 }
547
548 _assert_(newFormat != oldFormat);
549 // The caller is responsible for updating the format.
550 _assert_(newFormat == vfb->format);
551
552 ShaderLanguage lang = draw_->GetShaderLanguageDesc().shaderLanguage;
553
554 bool doReinterpret = PSP_CoreParameter().compat.flags().ReinterpretFramebuffers &&
555 (lang == HLSL_D3D11 || lang == GLSL_VULKAN || lang == GLSL_3xx);
556 // Copy image required for now.
557 if (!gstate_c.Supports(GPU_SUPPORTS_COPY_IMAGE))
558 doReinterpret = false;
559 if (!doReinterpret) {
560 // Fake reinterpret - just clear the way we always did on Vulkan. Just clear color and stencil.
561 if (oldFormat == GE_FORMAT_565) {
562 // We have to bind here instead of clear, since it can be that no framebuffer is bound.
563 // The backend can sometimes directly optimize it to a clear.
564
565 // Games that are marked as doing reinterpret just ignore this - better to keep the data than to clear.
566 // Fixes #13717.
567 if (!PSP_CoreParameter().compat.flags().ReinterpretFramebuffers) {
568 draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "FakeReinterpret");
569 // Need to dirty anything that has command buffer dynamic state, in case we started a new pass above.
570 // Should find a way to feed that information back, maybe... Or simply correct the issue in the rendermanager.
571 gstate_c.Dirty(DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE);
572
573 if (currentRenderVfb_ != vfb) {
574 // In case ReinterpretFramebuffer was called from the texture manager.
575 draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "After FakeReinterpret");
576 }
577 }
578 }
579 return;
580 }
581
582 // We only reinterpret between 16 - bit formats, for now.
583 if (!IsGeBufferFormat16BitColor(oldFormat) || !IsGeBufferFormat16BitColor(newFormat)) {
584 // 16->32 and 32->16 will require some more specialized shaders.
585 return;
586 }
587
588 char *vsCode = nullptr;
589 char *fsCode = nullptr;
590
591 if (!reinterpretVS_) {
592 vsCode = new char[4000];
593 const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
594 GenerateReinterpretVertexShader(vsCode, shaderLanguageDesc);
595 reinterpretVS_ = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vsCode, strlen(vsCode), "reinterpret_vs");
596 _assert_(reinterpretVS_);
597 }
598
599 if (!reinterpretSampler_) {
600 Draw::SamplerStateDesc samplerDesc{};
601 samplerDesc.magFilter = Draw::TextureFilter::LINEAR;
602 samplerDesc.minFilter = Draw::TextureFilter::LINEAR;
603 reinterpretSampler_ = draw_->CreateSamplerState(samplerDesc);
604 }
605
606 if (!reinterpretVBuf_) {
607 reinterpretVBuf_ = draw_->CreateBuffer(12 * 3, Draw::BufferUsageFlag::DYNAMIC | Draw::BufferUsageFlag::VERTEXDATA);
608 }
609
610 // See if we need to create a new pipeline.
611
612 Draw::Pipeline *pipeline = reinterpretFromTo_[(int)oldFormat][(int)newFormat];
613 if (!pipeline) {
614 fsCode = new char[4000];
615 const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
616 GenerateReinterpretFragmentShader(fsCode, oldFormat, newFormat, shaderLanguageDesc);
617 Draw::ShaderModule *reinterpretFS = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), "reinterpret_fs");
618 _assert_(reinterpretFS);
619
620 std::vector<Draw::ShaderModule *> shaders;
621 shaders.push_back(reinterpretVS_);
622 shaders.push_back(reinterpretFS);
623
624 using namespace Draw;
625 Draw::PipelineDesc desc{};
626 // We use a "fullscreen triangle".
627 // TODO: clear the stencil buffer. Hard to actually initialize it with the new alpha, though possible - let's see if
628 // we need it.
629 DepthStencilState *depth = draw_->CreateDepthStencilState({ false, false, Comparison::LESS });
630 BlendState *blendstateOff = draw_->CreateBlendState({ false, 0xF });
631 RasterState *rasterNoCull = draw_->CreateRasterState({});
632
633 // No uniforms for these, only a single texture input.
634 PipelineDesc pipelineDesc{ Primitive::TRIANGLE_LIST, shaders, nullptr, depth, blendstateOff, rasterNoCull, nullptr };
635 pipeline = draw_->CreateGraphicsPipeline(pipelineDesc);
636 _assert_(pipeline != nullptr);
637 reinterpretFromTo_[(int)oldFormat][(int)newFormat] = pipeline;
638
639 depth->Release();
640 blendstateOff->Release();
641 rasterNoCull->Release();
642 reinterpretFS->Release();
643 }
644
645 // Copy to a temp framebuffer.
646 Draw::Framebuffer *temp = GetTempFBO(TempFBO::REINTERPRET, vfb->renderWidth, vfb->renderHeight);
647
648 draw_->InvalidateCachedState();
649 draw_->CopyFramebufferImage(vfb->fbo, 0, 0, 0, 0, temp, 0, 0, 0, 0, vfb->renderWidth, vfb->renderHeight, 1, Draw::FBChannel::FB_COLOR_BIT, "reinterpret_prep");
650 draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, reinterpretStrings[(int)oldFormat][(int)newFormat]);
651 draw_->BindPipeline(pipeline);
652 draw_->BindFramebufferAsTexture(temp, 0, Draw::FBChannel::FB_COLOR_BIT, 0);
653 draw_->BindSamplerStates(0, 1, &reinterpretSampler_);
654 draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
655 Draw::Viewport vp = Draw::Viewport{ 0.0f, 0.0f, (float)vfb->renderWidth, (float)vfb->renderHeight, 0.0f, 1.0f };
656 draw_->SetViewports(1, &vp);
657 // Vertex buffer not used - vertices generated in shader.
658 // TODO: Switch to a vertex buffer for GLES2/D3D9 compat.
659 draw_->BindVertexBuffers(0, 1, &reinterpretVBuf_, nullptr);
660 draw_->Draw(3, 0);
661 draw_->InvalidateCachedState();
662
663 // Unbind.
664 draw_->BindTexture(0, nullptr);
665
666 shaderManager_->DirtyLastShader();
667 textureCache_->ForgetLastTexture();
668
669 gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE);
670
671 if (currentRenderVfb_ != vfb) {
672 // In case ReinterpretFramebuffer was called from the texture manager.
673 draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "After reinterpret");
674 }
675 delete[] vsCode;
676 delete[] fsCode;
677 }
678
NotifyRenderFramebufferSwitched(VirtualFramebuffer * prevVfb,VirtualFramebuffer * vfb,bool isClearingDepth)679 void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) {
680 if (ShouldDownloadFramebuffer(vfb) && !vfb->memoryUpdated) {
681 ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height);
682 vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
683 vfb->firstFrameSaved = true;
684 } else {
685 DownloadFramebufferOnSwitch(prevVfb);
686 }
687 textureCache_->ForgetLastTexture();
688 shaderManager_->DirtyLastShader();
689
690 if (prevVfb) {
691 // Copy depth value from the previously bound framebuffer to the current one.
692 // We check that the address is the same within BlitFramebufferDepth before actually blitting.
693
694 bool hasNewerDepth = prevVfb->last_frame_depth_render != 0 && prevVfb->last_frame_depth_render >= vfb->last_frame_depth_updated;
695 if (!prevVfb->fbo || !vfb->fbo || !useBufferedRendering_ || !hasNewerDepth || isClearingDepth) {
696 // If depth wasn't updated, then we're at least "two degrees" away from the data.
697 // This is an optimization: it probably doesn't need to be copied in this case.
698 } else {
699 bool matchingDepthBuffer = prevVfb->z_address == vfb->z_address && prevVfb->z_stride != 0 && vfb->z_stride != 0;
700 bool matchingSize = prevVfb->width == vfb->width && prevVfb->height == vfb->height;
701 if (matchingDepthBuffer && matchingSize) {
702 BlitFramebufferDepth(prevVfb, vfb);
703 }
704 }
705 }
706
707 if (vfb->drawnFormat != vfb->format) {
708 ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format);
709 }
710
711 if (useBufferedRendering_) {
712 if (vfb->fbo) {
713 shaderManager_->DirtyLastShader();
714 draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "FramebufferSwitch");
715 } else {
716 // This should only happen very briefly when toggling useBufferedRendering_.
717 ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
718 }
719 } else {
720 if (vfb->fbo) {
721 // This should only happen very briefly when toggling useBufferedRendering_.
722 textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_DESTROYED);
723 vfb->fbo->Release();
724 vfb->fbo = nullptr;
725 }
726
727 // Let's ignore rendering to targets that have not (yet) been displayed.
728 if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER) {
729 gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
730 } else {
731 gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
732 }
733 }
734 textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
735
736 // ugly... is all this needed?
737 if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
738 gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
739 }
740 if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
741 gstate_c.Dirty(DIRTY_PROJMATRIX);
742 gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
743 }
744 }
745
NotifyVideoUpload(u32 addr,int size,int width,GEBufferFormat fmt)746 void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt) {
747 // Note: UpdateFromMemory() is still called later.
748 // This is a special case where we have extra information prior to the invalidation.
749
750 // TODO: Could possibly be an offset...
751 VirtualFramebuffer *vfb = GetVFBAt(addr);
752 if (vfb) {
753 if (vfb->format != fmt || vfb->drawnFormat != fmt) {
754 DEBUG_LOG(ME, "Changing format for %08x from %d to %d", addr, vfb->drawnFormat, fmt);
755 vfb->format = fmt;
756 vfb->drawnFormat = fmt;
757
758 // Let's count this as a "render". This will also force us to use the correct format.
759 vfb->last_frame_render = gpuStats.numFlips;
760 }
761
762 if (vfb->fb_stride < width) {
763 DEBUG_LOG(ME, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, width);
764 const int bpp = fmt == GE_FORMAT_8888 ? 4 : 2;
765 ResizeFramebufFBO(vfb, width, size / (bpp * width));
766 // Resizing may change the viewport/etc.
767 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
768 vfb->fb_stride = width;
769 // This might be a bit wider than necessary, but we'll redetect on next render.
770 vfb->width = width;
771 }
772 }
773 }
774
UpdateFromMemory(u32 addr,int size,bool safe)775 void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size, bool safe) {
776 // Take off the uncached flag from the address. Not to be confused with the start of VRAM.
777 addr &= 0x3FFFFFFF;
778 // TODO: Could go through all FBOs, but probably not important?
779 // TODO: Could also check for inner changes, but video is most important.
780 bool isDisplayBuf = addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
781 if (isDisplayBuf || safe) {
782 // TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
783 if (!Memory::IsValidAddress(displayFramebufPtr_))
784 return;
785
786 for (size_t i = 0; i < vfbs_.size(); ++i) {
787 VirtualFramebuffer *vfb = vfbs_[i];
788 if (vfb->fb_address == addr) {
789 FlushBeforeCopy();
790
791 if (useBufferedRendering_ && vfb->fbo) {
792 GEBufferFormat fmt = vfb->format;
793 if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
794 // If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
795 fmt = displayFormat_;
796 }
797 DrawPixels(vfb, 0, 0, Memory::GetPointer(addr), fmt, vfb->fb_stride, vfb->width, vfb->height);
798 SetColorUpdated(vfb, gstate_c.skipDrawReason);
799 } else {
800 INFO_LOG(FRAMEBUF, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format);
801 DestroyFramebuf(vfb);
802 vfbs_.erase(vfbs_.begin() + i--);
803 }
804 }
805 }
806
807 RebindFramebuffer("RebindFramebuffer - UpdateFromMemory");
808 }
809 // TODO: Necessary?
810 gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
811 }
812
DrawPixels(VirtualFramebuffer * vfb,int dstX,int dstY,const u8 * srcPixels,GEBufferFormat srcPixelFormat,int srcStride,int width,int height)813 void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
814 textureCache_->ForgetLastTexture();
815 shaderManager_->DirtyLastShader(); // On GL, important that this is BEFORE drawing
816 float u0 = 0.0f, u1 = 1.0f;
817 float v0 = 0.0f, v1 = 1.0f;
818
819 DrawTextureFlags flags;
820 if (useBufferedRendering_ && vfb && vfb->fbo) {
821 flags = DRAWTEX_LINEAR;
822 draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "DrawPixels");
823 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
824 SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight);
825 draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
826 } else {
827 // We are drawing directly to the back buffer so need to flip.
828 // Should more of this be handled by the presentation engine?
829 if (needBackBufferYSwap_)
830 std::swap(v0, v1);
831 flags = g_Config.iBufFilter == SCALE_LINEAR ? DRAWTEX_LINEAR : DRAWTEX_NEAREST;
832 flags = flags | DRAWTEX_TO_BACKBUFFER;
833 FRect frame = GetScreenFrame(pixelWidth_, pixelHeight_);
834 FRect rc;
835 CenterDisplayOutputRect(&rc, 480.0f, 272.0f, frame, ROTATION_LOCKED_HORIZONTAL);
836 SetViewport2D(rc.x, rc.y, rc.w, rc.h);
837 draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_);
838 }
839
840 Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height, u1, v1);
841 if (pixelsTex) {
842 draw_->BindTextures(0, 1, &pixelsTex);
843 Bind2DShader();
844 DrawActiveTexture(dstX, dstY, width, height, vfb->bufferWidth, vfb->bufferHeight, u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags);
845 gpuStats.numUploads++;
846 pixelsTex->Release();
847 draw_->InvalidateCachedState();
848
849 gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
850 }
851 }
852
BindFramebufferAsColorTexture(int stage,VirtualFramebuffer * framebuffer,int flags)853 bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags) {
854 if (!framebuffer->fbo || !useBufferedRendering_) {
855 draw_->BindTexture(stage, nullptr);
856 gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
857 return false;
858 }
859
860 // currentRenderVfb_ will always be set when this is called, except from the GE debugger.
861 // Let's just not bother with the copy in that case.
862 bool skipCopy = !(flags & BINDFBCOLOR_MAY_COPY) || GPUStepping::IsStepping();
863
864 // Currently rendering to this framebuffer. Need to make a copy.
865 if (!skipCopy && framebuffer == currentRenderVfb_) {
866 // TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size.
867 Draw::Framebuffer *renderCopy = GetTempFBO(TempFBO::COPY, framebuffer->renderWidth, framebuffer->renderHeight);
868 if (renderCopy) {
869 VirtualFramebuffer copyInfo = *framebuffer;
870 copyInfo.fbo = renderCopy;
871 CopyFramebufferForColorTexture(©Info, framebuffer, flags);
872 RebindFramebuffer("After BindFramebufferAsColorTexture");
873 draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, 0);
874 } else {
875 draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
876 }
877 return true;
878 } else if (framebuffer != currentRenderVfb_ || (flags & BINDFBCOLOR_FORCE_SELF) != 0) {
879 draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, 0);
880 return true;
881 } else {
882 ERROR_LOG_REPORT_ONCE(vulkanSelfTexture, G3D, "Attempting to texture from target (src=%08x / target=%08x / flags=%d)", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
883 // To do this safely in Vulkan, we need to use input attachments.
884 // Actually if the texture region and render regions don't overlap, this is safe, but we need
885 // to transition to GENERAL image layout which will take some trickery.
886 // Badness on D3D11 to bind the currently rendered-to framebuffer as a texture.
887 draw_->BindTexture(stage, nullptr);
888 gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
889 return false;
890 }
891 }
892
CopyFramebufferForColorTexture(VirtualFramebuffer * dst,VirtualFramebuffer * src,int flags)893 void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags) {
894 int x = 0;
895 int y = 0;
896 int w = src->drawnWidth;
897 int h = src->drawnHeight;
898
899 // If max is not > min, we probably could not detect it. Skip.
900 // See the vertex decoder, where this is updated.
901 if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) == BINDFBCOLOR_MAY_COPY_WITH_UV && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) {
902 x = std::max(gstate_c.vertBounds.minU, (u16)0);
903 y = std::max(gstate_c.vertBounds.minV, (u16)0);
904 w = std::min(gstate_c.vertBounds.maxU, src->drawnWidth) - x;
905 h = std::min(gstate_c.vertBounds.maxV, src->drawnHeight) - y;
906
907 // If we bound a framebuffer, apply the byte offset as pixels to the copy too.
908 if (flags & BINDFBCOLOR_APPLY_TEX_OFFSET) {
909 x += gstate_c.curTextureXOffset;
910 y += gstate_c.curTextureYOffset;
911 }
912
913 // We'll have to reapply these next time since we cropped to UV.
914 gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
915 }
916
917 if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
918 BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, "Blit_CopyFramebufferForColorTexture");
919 }
920 }
921
MakePixelTexture(const u8 * srcPixels,GEBufferFormat srcPixelFormat,int srcStride,int width,int height,float & u1,float & v1)922 Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, float &u1, float &v1) {
923 // TODO: We can just change the texture format and flip some bits around instead of this.
924 // Could share code with the texture cache perhaps.
925 auto generateTexture = [&](uint8_t *data, const uint8_t *initData, uint32_t w, uint32_t h, uint32_t d, uint32_t byteStride, uint32_t sliceByteStride) {
926 for (int y = 0; y < height; y++) {
927 const u16_le *src16 = (const u16_le *)srcPixels + srcStride * y;
928 const u32_le *src32 = (const u32_le *)srcPixels + srcStride * y;
929 u32 *dst = (u32 *)(data + byteStride * y);
930 switch (srcPixelFormat) {
931 case GE_FORMAT_565:
932 if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
933 ConvertRGB565ToBGRA8888(dst, src16, width);
934 else
935 ConvertRGB565ToRGBA8888(dst, src16, width);
936 break;
937
938 case GE_FORMAT_5551:
939 if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
940 ConvertRGBA5551ToBGRA8888(dst, src16, width);
941 else
942 ConvertRGBA5551ToRGBA8888(dst, src16, width);
943 break;
944
945 case GE_FORMAT_4444:
946 if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
947 ConvertRGBA4444ToBGRA8888(dst, src16, width);
948 else
949 ConvertRGBA4444ToRGBA8888(dst, src16, width);
950 break;
951
952 case GE_FORMAT_8888:
953 if (preferredPixelsFormat_ == Draw::DataFormat::B8G8R8A8_UNORM)
954 ConvertRGBA8888ToBGRA8888(dst, src32, width);
955 // This means use original pointer as-is. May avoid or optimize a copy.
956 else if (srcStride == width)
957 return false;
958 else
959 memcpy(dst, src32, width * 4);
960 break;
961
962 case GE_FORMAT_INVALID:
963 case GE_FORMAT_DEPTH16:
964 _dbg_assert_msg_(false, "Invalid pixelFormat passed to DrawPixels().");
965 break;
966 }
967 }
968 return true;
969 };
970
971 Draw::TextureDesc desc{
972 Draw::TextureType::LINEAR2D,
973 preferredPixelsFormat_,
974 width,
975 height,
976 1,
977 1,
978 false,
979 "DrawPixels",
980 { (uint8_t *)srcPixels },
981 generateTexture,
982 };
983 // TODO: On Vulkan, use a custom allocator? Important to use an allocator:
984 // Hot Shot Golf (#12355) does tons of these in a frame in some situations! So actually,
985 // we do use an allocator. In fact, I've now banned allocator-less textures.
986 Draw::Texture *tex = draw_->CreateTexture(desc);
987 if (!tex)
988 ERROR_LOG(G3D, "Failed to create drawpixels texture");
989 return tex;
990 }
991
DrawFramebufferToOutput(const u8 * srcPixels,GEBufferFormat srcPixelFormat,int srcStride)992 void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride) {
993 textureCache_->ForgetLastTexture();
994 shaderManager_->DirtyLastShader();
995
996 float u0 = 0.0f, u1 = 480.0f / 512.0f;
997 float v0 = 0.0f, v1 = 1.0f;
998 Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, 512, 272, u1, v1);
999 if (!pixelsTex)
1000 return;
1001
1002 int uvRotation = useBufferedRendering_ ? g_Config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1003 OutputFlags flags = g_Config.iBufFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1004 if (needBackBufferYSwap_) {
1005 flags |= OutputFlags::BACKBUFFER_FLIPPED;
1006 }
1007 // DrawActiveTexture reverses these, probably to match "up".
1008 if (GetGPUBackend() == GPUBackend::DIRECT3D9 || GetGPUBackend() == GPUBackend::DIRECT3D11) {
1009 flags |= OutputFlags::POSITION_FLIPPED;
1010 }
1011
1012 presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1013 presentation_->SourceTexture(pixelsTex, 512, 272);
1014 presentation_->CopyToOutput(flags, uvRotation, u0, v0, u1, v1);
1015 pixelsTex->Release();
1016
1017 // PresentationCommon sets all kinds of state, we can't rely on anything.
1018 gstate_c.Dirty(DIRTY_ALL);
1019
1020 currentRenderVfb_ = nullptr;
1021 }
1022
DownloadFramebufferOnSwitch(VirtualFramebuffer * vfb)1023 void FramebufferManagerCommon::DownloadFramebufferOnSwitch(VirtualFramebuffer *vfb) {
1024 if (vfb && vfb->safeWidth > 0 && vfb->safeHeight > 0 && !vfb->firstFrameSaved && !vfb->memoryUpdated) {
1025 // Some games will draw to some memory once, and use it as a render-to-texture later.
1026 // To support this, we save the first frame to memory when we have a safe w/h.
1027 // Saving each frame would be slow.
1028 if (!g_Config.bDisableSlowFramebufEffects && !PSP_CoreParameter().compat.flags().DisableFirstFrameReadback) {
1029 ReadFramebufferToMemory(vfb, 0, 0, vfb->safeWidth, vfb->safeHeight);
1030 vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
1031 vfb->firstFrameSaved = true;
1032 vfb->safeWidth = 0;
1033 vfb->safeHeight = 0;
1034 }
1035 }
1036 }
1037
SetViewport2D(int x,int y,int w,int h)1038 void FramebufferManagerCommon::SetViewport2D(int x, int y, int w, int h) {
1039 Draw::Viewport vp{ (float)x, (float)y, (float)w, (float)h, 0.0f, 1.0f };
1040 draw_->SetViewports(1, &vp);
1041 }
1042
CopyDisplayToOutput(bool reallyDirty)1043 void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
1044 DownloadFramebufferOnSwitch(currentRenderVfb_);
1045 shaderManager_->DirtyLastShader();
1046
1047 currentRenderVfb_ = nullptr;
1048
1049 if (displayFramebufPtr_ == 0) {
1050 if (Core_IsStepping())
1051 VERBOSE_LOG(FRAMEBUF, "Display disabled, displaying only black");
1052 else
1053 DEBUG_LOG(FRAMEBUF, "Display disabled, displaying only black");
1054 // No framebuffer to display! Clear to black.
1055 if (useBufferedRendering_) {
1056 draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput");
1057 }
1058 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1059 return;
1060 }
1061
1062 u32 offsetX = 0;
1063 u32 offsetY = 0;
1064
1065 // If it's not really dirty, we're probably frameskipping. Use the last working one.
1066 u32 fbaddr = reallyDirty ? displayFramebufPtr_ : prevDisplayFramebufPtr_;
1067 prevDisplayFramebufPtr_ = fbaddr;
1068
1069 VirtualFramebuffer *vfb = GetVFBAt(fbaddr);
1070 if (!vfb) {
1071 // Let's search for a framebuf within this range. Note that we also look for
1072 // "framebuffers" sitting in RAM (created from block transfer or similar) so we only take off the kernel
1073 // and uncached bits of the address when comparing.
1074 const u32 addr = fbaddr & 0x3FFFFFFF;
1075 for (size_t i = 0; i < vfbs_.size(); ++i) {
1076 VirtualFramebuffer *v = vfbs_[i];
1077 const u32 v_addr = v->fb_address & 0x3FFFFFFF;
1078 const u32 v_size = ColorBufferByteSize(v);
1079 if (addr >= v_addr && addr < v_addr + v_size) {
1080 const u32 dstBpp = v->format == GE_FORMAT_8888 ? 4 : 2;
1081 const u32 v_offsetX = ((addr - v_addr) / dstBpp) % v->fb_stride;
1082 const u32 v_offsetY = ((addr - v_addr) / dstBpp) / v->fb_stride;
1083 // We have enough space there for the display, right?
1084 if (v_offsetX + 480 > (u32)v->fb_stride || v->bufferHeight < v_offsetY + 272) {
1085 continue;
1086 }
1087 // Check for the closest one.
1088 if (offsetY == 0 || offsetY > v_offsetY) {
1089 offsetX = v_offsetX;
1090 offsetY = v_offsetY;
1091 vfb = v;
1092 }
1093 }
1094 }
1095
1096 if (vfb) {
1097 // Okay, we found one above.
1098 // Log should be "Displaying from framebuf" but not worth changing the report.
1099 INFO_LOG_REPORT_ONCE(displayoffset, FRAMEBUF, "Rendering from framebuf with offset %08x -> %08x+%dx%d", addr, vfb->fb_address, offsetX, offsetY);
1100 }
1101 }
1102
1103 if (vfb && vfb->format != displayFormat_) {
1104 if (vfb->last_frame_render + FBO_OLD_AGE < gpuStats.numFlips) {
1105 // The game probably switched formats on us.
1106 vfb->format = displayFormat_;
1107 } else {
1108 vfb = 0;
1109 }
1110 }
1111
1112 if (!vfb) {
1113 if (Memory::IsValidAddress(fbaddr)) {
1114 // The game is displaying something directly from RAM. In GTA, it's decoded video.
1115 if (!vfb) {
1116 DrawFramebufferToOutput(Memory::GetPointer(fbaddr), displayFormat_, displayStride_);
1117 return;
1118 }
1119 } else {
1120 DEBUG_LOG(FRAMEBUF, "Found no FBO to display! displayFBPtr = %08x", fbaddr);
1121 // No framebuffer to display! Clear to black.
1122 if (useBufferedRendering_) {
1123 // Bind and clear the backbuffer. This should be the first time during the frame that it's bound.
1124 draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput_NoFBO");
1125 }
1126 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1127 return;
1128 }
1129 }
1130
1131 vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER;
1132 vfb->last_frame_displayed = gpuStats.numFlips;
1133 vfb->dirtyAfterDisplay = false;
1134 vfb->reallyDirtyAfterDisplay = false;
1135
1136 if (prevDisplayFramebuf_ != displayFramebuf_) {
1137 prevPrevDisplayFramebuf_ = prevDisplayFramebuf_;
1138 }
1139 if (displayFramebuf_ != vfb) {
1140 prevDisplayFramebuf_ = displayFramebuf_;
1141 }
1142 displayFramebuf_ = vfb;
1143
1144 if (vfb->fbo) {
1145 if (Core_IsStepping())
1146 VERBOSE_LOG(FRAMEBUF, "Displaying FBO %08x", vfb->fb_address);
1147 else
1148 DEBUG_LOG(FRAMEBUF, "Displaying FBO %08x", vfb->fb_address);
1149
1150 // TODO ES3: Use glInvalidateFramebuffer to discard depth/stencil data at the end of frame.
1151
1152 float u0 = offsetX / (float)vfb->bufferWidth;
1153 float v0 = offsetY / (float)vfb->bufferHeight;
1154 float u1 = (480.0f + offsetX) / (float)vfb->bufferWidth;
1155 float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight;
1156
1157 textureCache_->ForgetLastTexture();
1158
1159 int uvRotation = useBufferedRendering_ ? g_Config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1160 OutputFlags flags = g_Config.iBufFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1161 if (needBackBufferYSwap_) {
1162 flags |= OutputFlags::BACKBUFFER_FLIPPED;
1163 }
1164 // DrawActiveTexture reverses these, probably to match "up".
1165 if (GetGPUBackend() == GPUBackend::DIRECT3D9 || GetGPUBackend() == GPUBackend::DIRECT3D11) {
1166 flags |= OutputFlags::POSITION_FLIPPED;
1167 }
1168
1169 int actualWidth = (vfb->bufferWidth * vfb->renderWidth) / vfb->width;
1170 int actualHeight = (vfb->bufferHeight * vfb->renderHeight) / vfb->height;
1171 presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1172 presentation_->SourceFramebuffer(vfb->fbo, actualWidth, actualHeight);
1173 presentation_->CopyToOutput(flags, uvRotation, u0, v0, u1, v1);
1174 } else if (useBufferedRendering_) {
1175 WARN_LOG(FRAMEBUF, "Current VFB lacks an FBO: %08x", vfb->fb_address);
1176 }
1177
1178 // This may get called mid-draw if the game uses an immediate flip.
1179 // PresentationCommon sets all kinds of state, we can't rely on anything.
1180 gstate_c.Dirty(DIRTY_ALL);
1181 }
1182
DecimateFBOs()1183 void FramebufferManagerCommon::DecimateFBOs() {
1184 currentRenderVfb_ = nullptr;
1185
1186 for (auto iter : fbosToDelete_) {
1187 iter->Release();
1188 }
1189 fbosToDelete_.clear();
1190
1191 for (size_t i = 0; i < vfbs_.size(); ++i) {
1192 VirtualFramebuffer *vfb = vfbs_[i];
1193 int age = frameLastFramebufUsed_ - std::max(vfb->last_frame_render, vfb->last_frame_used);
1194
1195 if (ShouldDownloadFramebuffer(vfb) && age == 0 && !vfb->memoryUpdated) {
1196 ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height);
1197 vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
1198 vfb->firstFrameSaved = true;
1199 }
1200
1201 // Let's also "decimate" the usageFlags.
1202 UpdateFramebufUsage(vfb);
1203
1204 if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) {
1205 if (age > FBO_OLD_AGE) {
1206 INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age);
1207 DestroyFramebuf(vfb);
1208 vfbs_.erase(vfbs_.begin() + i--);
1209 }
1210 }
1211 }
1212
1213 for (auto it = tempFBOs_.begin(); it != tempFBOs_.end(); ) {
1214 int age = frameLastFramebufUsed_ - it->second.last_frame_used;
1215 if (age > FBO_OLD_AGE) {
1216 it->second.fbo->Release();
1217 it = tempFBOs_.erase(it);
1218 } else {
1219 ++it;
1220 }
1221 }
1222
1223 // Do the same for ReadFramebuffersToMemory's VFBs
1224 for (size_t i = 0; i < bvfbs_.size(); ++i) {
1225 VirtualFramebuffer *vfb = bvfbs_[i];
1226 int age = frameLastFramebufUsed_ - vfb->last_frame_render;
1227 if (age > FBO_OLD_AGE) {
1228 INFO_LOG(FRAMEBUF, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age);
1229 DestroyFramebuf(vfb);
1230 bvfbs_.erase(bvfbs_.begin() + i--);
1231 }
1232 }
1233 }
1234
1235 // Requires width/height to be set already.
ResizeFramebufFBO(VirtualFramebuffer * vfb,int w,int h,bool force,bool skipCopy)1236 void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, int h, bool force, bool skipCopy) {
1237 _dbg_assert_(w > 0);
1238 _dbg_assert_(h > 0);
1239 VirtualFramebuffer old = *vfb;
1240
1241 int oldWidth = vfb->bufferWidth;
1242 int oldHeight = vfb->bufferHeight;
1243
1244 if (force) {
1245 vfb->bufferWidth = w;
1246 vfb->bufferHeight = h;
1247 } else {
1248 if (vfb->bufferWidth >= w && vfb->bufferHeight >= h) {
1249 return;
1250 }
1251
1252 // In case it gets thin and wide, don't resize down either side.
1253 vfb->bufferWidth = std::max((int)vfb->bufferWidth, w);
1254 vfb->bufferHeight = std::max((int)vfb->bufferHeight, h);
1255 }
1256
1257 bool force1x = false;
1258 switch (bloomHack_) {
1259 case 1:
1260 force1x = vfb->bufferWidth <= 128 || vfb->bufferHeight <= 64;
1261 break;
1262 case 2:
1263 force1x = vfb->bufferWidth <= 256 || vfb->bufferHeight <= 128;
1264 break;
1265 case 3:
1266 force1x = vfb->bufferWidth < 480 || vfb->bufferWidth > 800 || vfb->bufferHeight < 272; // GOW uses 864x272
1267 break;
1268 }
1269
1270 if (PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000) {
1271 force1x = true;
1272 }
1273
1274 if (force1x && g_Config.iInternalResolution != 1) {
1275 vfb->renderScaleFactor = 1.0f;
1276 vfb->renderWidth = vfb->bufferWidth;
1277 vfb->renderHeight = vfb->bufferHeight;
1278 } else {
1279 vfb->renderScaleFactor = renderScaleFactor_;
1280 vfb->renderWidth = (u16)(vfb->bufferWidth * renderScaleFactor_);
1281 vfb->renderHeight = (u16)(vfb->bufferHeight * renderScaleFactor_);
1282 }
1283
1284 // During hardware rendering, we always render at full color depth even if the game wouldn't on real hardware.
1285 // It's not worth the trouble trying to support lower bit-depth rendering, just
1286 // more cases to test that nobody will ever use.
1287
1288 textureCache_->ForgetLastTexture();
1289
1290 if (!useBufferedRendering_) {
1291 if (vfb->fbo) {
1292 vfb->fbo->Release();
1293 vfb->fbo = nullptr;
1294 }
1295 return;
1296 }
1297 if (!old.fbo && vfb->last_frame_failed != 0 && vfb->last_frame_failed - gpuStats.numFlips < 63) {
1298 // Don't constantly retry FBOs which failed to create.
1299 return;
1300 }
1301
1302 shaderManager_->DirtyLastShader();
1303 char tag[128];
1304 size_t len = snprintf(tag, sizeof(tag), "FB_%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->format));
1305 vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, tag });
1306 if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
1307 NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, ColorBufferByteSize(vfb), tag, len);
1308 }
1309 if (Memory::IsVRAMAddress(vfb->z_address) && vfb->z_stride != 0) {
1310 char buf[128];
1311 size_t len = snprintf(buf, sizeof(buf), "Z_%s", tag);
1312 NotifyMemInfo(MemBlockFlags::ALLOC, vfb->z_address, vfb->fb_stride * vfb->height * sizeof(uint16_t), buf, len);
1313 }
1314 if (old.fbo) {
1315 INFO_LOG(FRAMEBUF, "Resizing FBO for %08x : %dx%dx%s", vfb->fb_address, w, h, GeBufferFormatToString(vfb->format));
1316 if (vfb->fbo) {
1317 // TODO: Swap the order of the below? That way we can avoid the needGLESRebinds_ check below I think.
1318 draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1319 if (!skipCopy) {
1320 BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, "Blit_ResizeFramebufFBO");
1321 }
1322 }
1323 fbosToDelete_.push_back(old.fbo);
1324 if (needGLESRebinds_) {
1325 draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "ResizeFramebufFBO");
1326 }
1327 } else {
1328 draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1329 }
1330
1331 if (!vfb->fbo) {
1332 ERROR_LOG(FRAMEBUF, "Error creating FBO during resize! %dx%d", vfb->renderWidth, vfb->renderHeight);
1333 vfb->last_frame_failed = gpuStats.numFlips;
1334 }
1335 }
1336
1337 // This is called from detected memcopies and framebuffer initialization from VRAM. Not block transfers.
1338 // MotoGP goes this path so we need to catch those copies here.
NotifyFramebufferCopy(u32 src,u32 dst,int size,bool isMemset,u32 skipDrawReason)1339 bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset, u32 skipDrawReason) {
1340 if (size == 0) {
1341 return false;
1342 }
1343
1344 dst &= 0x3FFFFFFF;
1345 src &= 0x3FFFFFFF;
1346
1347 VirtualFramebuffer *dstBuffer = 0;
1348 VirtualFramebuffer *srcBuffer = 0;
1349 u32 dstY = (u32)-1;
1350 u32 dstH = 0;
1351 u32 srcY = (u32)-1;
1352 u32 srcH = 0;
1353 for (size_t i = 0; i < vfbs_.size(); ++i) {
1354 VirtualFramebuffer *vfb = vfbs_[i];
1355 if (vfb->fb_stride == 0) {
1356 continue;
1357 }
1358
1359 // We only remove the kernel and uncached bits when comparing.
1360 const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
1361 const u32 vfb_size = ColorBufferByteSize(vfb);
1362 const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
1363 const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
1364 const int vfb_byteWidth = vfb->width * vfb_bpp;
1365
1366 if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) {
1367 const u32 offset = dst - vfb_address;
1368 const u32 yOffset = offset / vfb_byteStride;
1369 if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) {
1370 dstBuffer = vfb;
1371 dstY = yOffset;
1372 dstH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
1373 }
1374 }
1375
1376 if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) {
1377 const u32 offset = src - vfb_address;
1378 const u32 yOffset = offset / vfb_byteStride;
1379 if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) {
1380 srcBuffer = vfb;
1381 srcY = yOffset;
1382 srcH = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
1383 } else if ((offset % vfb_byteStride) == 0 && size == vfb->fb_stride && yOffset < srcY) {
1384 // Valkyrie Profile reads 512 bytes at a time, rather than 2048. So, let's whitelist fb_stride also.
1385 srcBuffer = vfb;
1386 srcY = yOffset;
1387 srcH = 1;
1388 } else if (yOffset == 0 && yOffset < srcY) {
1389 // Okay, last try - it might be a clut.
1390 if (vfb->usageFlags & FB_USAGE_CLUT) {
1391 srcBuffer = vfb;
1392 srcY = yOffset;
1393 srcH = 1;
1394 }
1395 }
1396 }
1397 }
1398
1399 if (!useBufferedRendering_) {
1400 // If we're copying into a recently used display buf, it's probably destined for the screen.
1401 if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) {
1402 return false;
1403 }
1404 }
1405
1406 if (!dstBuffer && srcBuffer) {
1407 // Note - if we're here, we're in a memcpy, not a block transfer. Not allowing IntraVRAMBlockTransferAllowCreateFB.
1408 // Technically, that makes BlockTransferAllowCreateFB a bit of a misnomer.
1409 if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB) {
1410 dstBuffer = CreateRAMFramebuffer(dst, srcBuffer->width, srcBuffer->height, srcBuffer->fb_stride, srcBuffer->format);
1411 dstY = 0;
1412 }
1413 }
1414 if (dstBuffer) {
1415 dstBuffer->last_frame_used = gpuStats.numFlips;
1416 }
1417
1418 if (dstBuffer && srcBuffer && !isMemset) {
1419 if (srcBuffer == dstBuffer) {
1420 WARN_LOG_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size);
1421 } else {
1422 WARN_LOG_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size);
1423 // Just do the blit!
1424 BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0, "Blit_InterBufferMemcpy");
1425 SetColorUpdated(dstBuffer, skipDrawReason);
1426 RebindFramebuffer("RebindFramebuffer - Inter-buffer memcpy");
1427 }
1428 return false;
1429 } else if (dstBuffer) {
1430 if (isMemset) {
1431 gpuStats.numClears++;
1432 }
1433 WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size);
1434 FlushBeforeCopy();
1435 const u8 *srcBase = Memory::GetPointerUnchecked(src);
1436 DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH);
1437 SetColorUpdated(dstBuffer, skipDrawReason);
1438 RebindFramebuffer("RebindFramebuffer - Memcpy fbo upload");
1439 // This is a memcpy, let's still copy just in case.
1440 return false;
1441 } else if (srcBuffer) {
1442 WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst);
1443 FlushBeforeCopy();
1444 if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) {
1445 WARN_LOG_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight);
1446 } else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated && !PSP_CoreParameter().compat.flags().DisableReadbacks) {
1447 ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH);
1448 srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
1449 }
1450 return false;
1451 } else {
1452 return false;
1453 }
1454 }
1455
1456 // Can't be const, in case it has to create a vfb unfortunately.
FindTransferFramebuffers(VirtualFramebuffer * & dstBuffer,VirtualFramebuffer * & srcBuffer,u32 dstBasePtr,int dstStride,int & dstX,int & dstY,u32 srcBasePtr,int srcStride,int & srcX,int & srcY,int & srcWidth,int & srcHeight,int & dstWidth,int & dstHeight,int bpp)1457 void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) {
1458 u32 dstYOffset = -1;
1459 u32 dstXOffset = -1;
1460 u32 srcYOffset = -1;
1461 u32 srcXOffset = -1;
1462 int width = srcWidth;
1463 int height = srcHeight;
1464
1465 dstBasePtr &= 0x3FFFFFFF;
1466 srcBasePtr &= 0x3FFFFFFF;
1467
1468 for (size_t i = 0; i < vfbs_.size(); ++i) {
1469 VirtualFramebuffer *vfb = vfbs_[i];
1470 const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
1471 const u32 vfb_size = ColorBufferByteSize(vfb);
1472 const u32 vfb_bpp = vfb->format == GE_FORMAT_8888 ? 4 : 2;
1473 const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
1474 const u32 vfb_byteWidth = vfb->width * vfb_bpp;
1475
1476 // These heuristics are a bit annoying.
1477 // The goal is to avoid using GPU block transfers for things that ought to be memory.
1478 // Maybe we should even check for textures at these places instead?
1479
1480 if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
1481 const u32 byteOffset = dstBasePtr - vfb_address;
1482 const u32 byteStride = dstStride * bpp;
1483 const u32 yOffset = byteOffset / byteStride;
1484
1485 // Some games use mismatching bitdepths. But make sure the stride matches.
1486 // If it doesn't, generally this means we detected the framebuffer with too large a height.
1487 // Use bufferHeight in case of buffers that resize up and down often per frame (Valkyrie Profile.)
1488 bool match = yOffset < dstYOffset && (int)yOffset <= (int)vfb->bufferHeight - dstHeight;
1489 if (match && vfb_byteStride != byteStride) {
1490 // Grand Knights History copies with a mismatching stride but a full line at a time.
1491 // Makes it hard to detect the wrong transfers in e.g. God of War.
1492 if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
1493 // However, some other games write cluts to framebuffers.
1494 // Let's catch this and upload. Otherwise reject the match.
1495 match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
1496 if (match) {
1497 dstWidth = byteStride * height / vfb_bpp;
1498 dstHeight = 1;
1499 }
1500 } else {
1501 dstWidth = byteStride * height / vfb_bpp;
1502 dstHeight = 1;
1503 }
1504 } else if (match) {
1505 dstWidth = width;
1506 dstHeight = height;
1507 }
1508 if (match) {
1509 dstYOffset = yOffset;
1510 dstXOffset = dstStride == 0 ? 0 : (byteOffset / bpp) % dstStride;
1511 dstBuffer = vfb;
1512 }
1513 }
1514 if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
1515 const u32 byteOffset = srcBasePtr - vfb_address;
1516 const u32 byteStride = srcStride * bpp;
1517 const u32 yOffset = byteOffset / byteStride;
1518 bool match = yOffset < srcYOffset && (int)yOffset <= (int)vfb->bufferHeight - srcHeight;
1519 if (match && vfb_byteStride != byteStride) {
1520 if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
1521 match = false;
1522 } else {
1523 srcWidth = byteStride * height / vfb_bpp;
1524 srcHeight = 1;
1525 }
1526 } else if (match) {
1527 srcWidth = width;
1528 srcHeight = height;
1529 }
1530 if (match) {
1531 srcYOffset = yOffset;
1532 srcXOffset = srcStride == 0 ? 0 : (byteOffset / bpp) % srcStride;
1533 srcBuffer = vfb;
1534 }
1535 }
1536 }
1537
1538 if (srcBuffer && !dstBuffer) {
1539 if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
1540 (PSP_CoreParameter().compat.flags().IntraVRAMBlockTransferAllowCreateFB &&
1541 Memory::IsVRAMAddress(srcBuffer->fb_address) && Memory::IsVRAMAddress(dstBasePtr))) {
1542 GEBufferFormat ramFormat;
1543 // Try to guess the appropriate format. We only know the bpp from the block transfer command (16 or 32 bit).
1544 if (bpp == 4) {
1545 // Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
1546 ramFormat = GE_FORMAT_8888;
1547 } else if (srcBuffer->format != GE_FORMAT_8888) {
1548 // We guess that the game will interpret the data the same as it was in the source of the copy.
1549 // Seems like a likely good guess, and works in Test Drive Unlimited.
1550 ramFormat = srcBuffer->format;
1551 } else {
1552 // No info left - just fall back to something. But this is definitely split pixel tricks.
1553 ramFormat = GE_FORMAT_5551;
1554 }
1555 dstBuffer = CreateRAMFramebuffer(dstBasePtr, dstWidth, dstHeight, dstStride, ramFormat);
1556 }
1557 }
1558
1559 if (dstBuffer)
1560 dstBuffer->last_frame_used = gpuStats.numFlips;
1561
1562 if (dstYOffset != (u32)-1) {
1563 dstY += dstYOffset;
1564 dstX += dstXOffset;
1565 }
1566 if (srcYOffset != (u32)-1) {
1567 srcY += srcYOffset;
1568 srcX += srcXOffset;
1569 }
1570 }
1571
CreateRAMFramebuffer(uint32_t fbAddress,int width,int height,int stride,GEBufferFormat format)1572 VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAddress, int width, int height, int stride, GEBufferFormat format) {
1573 INFO_LOG(G3D, "Creating RAM framebuffer at %08x (%dx%d, stride %d, format %d)", fbAddress, width, height, stride, format);
1574
1575 // A target for the destination is missing - so just create one!
1576 // Make sure this one would be found by the algorithm above so we wouldn't
1577 // create a new one each frame.
1578 VirtualFramebuffer *vfb = new VirtualFramebuffer{};
1579 vfb->fbo = nullptr;
1580 vfb->fb_address = fbAddress; // NOTE - not necessarily in VRAM!
1581 vfb->fb_stride = stride;
1582 vfb->z_address = 0; // marks that if anyone tries to render to this framebuffer, it should be dropped and recreated.
1583 vfb->z_stride = 0;
1584 vfb->width = std::max(width, stride);
1585 vfb->height = height;
1586 vfb->newWidth = vfb->width;
1587 vfb->newHeight = vfb->height;
1588 vfb->lastFrameNewSize = gpuStats.numFlips;
1589 vfb->renderScaleFactor = renderScaleFactor_;
1590 vfb->renderWidth = (u16)(vfb->width * renderScaleFactor_);
1591 vfb->renderHeight = (u16)(vfb->height * renderScaleFactor_);
1592 vfb->bufferWidth = vfb->width;
1593 vfb->bufferHeight = vfb->height;
1594 vfb->format = format;
1595 vfb->drawnFormat = GE_FORMAT_8888;
1596 vfb->usageFlags = FB_USAGE_RENDERTARGET;
1597 SetColorUpdated(vfb, 0);
1598 char name[64];
1599 snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address);
1600 textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
1601 vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, name });
1602 vfbs_.push_back(vfb);
1603
1604 u32 byteSize = ColorBufferByteSize(vfb);
1605 if (fbAddress + byteSize > framebufRangeEnd_) {
1606 framebufRangeEnd_ = fbAddress + byteSize;
1607 }
1608
1609 return vfb;
1610 }
1611
1612 // 1:1 pixel sides buffers, we resize buffers to these before we read them back.
FindDownloadTempBuffer(VirtualFramebuffer * vfb)1613 VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFramebuffer *vfb) {
1614 // For now we'll keep these on the same struct as the ones that can get displayed
1615 // (and blatantly copy work already done above while at it).
1616 VirtualFramebuffer *nvfb = nullptr;
1617
1618 // We maintain a separate vector of framebuffer objects for blitting.
1619 for (VirtualFramebuffer *v : bvfbs_) {
1620 if (v->fb_address == vfb->fb_address && v->format == vfb->format) {
1621 if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) {
1622 nvfb = v;
1623 v->fb_stride = vfb->fb_stride;
1624 v->width = vfb->width;
1625 v->height = vfb->height;
1626 break;
1627 }
1628 }
1629 }
1630
1631 // Create a new fbo if none was found for the size
1632 if (!nvfb) {
1633 nvfb = new VirtualFramebuffer();
1634 memset(nvfb, 0, sizeof(VirtualFramebuffer));
1635 nvfb->fbo = nullptr;
1636 nvfb->fb_address = vfb->fb_address;
1637 nvfb->fb_stride = vfb->fb_stride;
1638 nvfb->z_address = vfb->z_address;
1639 nvfb->z_stride = vfb->z_stride;
1640 nvfb->width = vfb->width;
1641 nvfb->height = vfb->height;
1642 nvfb->renderWidth = vfb->bufferWidth;
1643 nvfb->renderHeight = vfb->bufferHeight;
1644 nvfb->renderScaleFactor = 1.0f; // For readbacks we resize to the original size, of course.
1645 nvfb->bufferWidth = vfb->bufferWidth;
1646 nvfb->bufferHeight = vfb->bufferHeight;
1647 nvfb->format = vfb->format;
1648 nvfb->drawnWidth = vfb->drawnWidth;
1649 nvfb->drawnHeight = vfb->drawnHeight;
1650 nvfb->drawnFormat = vfb->format;
1651
1652 char name[64];
1653 snprintf(name, sizeof(name), "download_temp");
1654 nvfb->fbo = draw_->CreateFramebuffer({ nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, false, name });
1655 if (!nvfb->fbo) {
1656 ERROR_LOG(FRAMEBUF, "Error creating FBO! %d x %d", nvfb->renderWidth, nvfb->renderHeight);
1657 return nullptr;
1658 }
1659
1660 bvfbs_.push_back(nvfb);
1661 } else {
1662 UpdateDownloadTempBuffer(nvfb);
1663 }
1664
1665 nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
1666 nvfb->last_frame_render = gpuStats.numFlips;
1667 nvfb->dirtyAfterDisplay = true;
1668
1669 return nvfb;
1670 }
1671
ApplyClearToMemory(int x1,int y1,int x2,int y2,u32 clearColor)1672 void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor) {
1673 if (currentRenderVfb_) {
1674 if ((currentRenderVfb_->usageFlags & FB_USAGE_DOWNLOAD_CLEAR) != 0) {
1675 // Already zeroed in memory.
1676 return;
1677 }
1678 }
1679 if (!Memory::IsValidAddress(gstate.getFrameBufAddress())) {
1680 return;
1681 }
1682
1683 u8 *addr = Memory::GetPointerUnchecked(gstate.getFrameBufAddress());
1684 const int bpp = gstate.FrameBufFormat() == GE_FORMAT_8888 ? 4 : 2;
1685
1686 u32 clearBits = clearColor;
1687 if (bpp == 2) {
1688 u16 clear16 = 0;
1689 switch (gstate.FrameBufFormat()) {
1690 case GE_FORMAT_565: ConvertRGBA8888ToRGB565(&clear16, &clearColor, 1); break;
1691 case GE_FORMAT_5551: ConvertRGBA8888ToRGBA5551(&clear16, &clearColor, 1); break;
1692 case GE_FORMAT_4444: ConvertRGBA8888ToRGBA4444(&clear16, &clearColor, 1); break;
1693 default: _dbg_assert_(0); break;
1694 }
1695 clearBits = clear16 | (clear16 << 16);
1696 }
1697
1698 const bool singleByteClear = (clearBits >> 16) == (clearBits & 0xFFFF) && (clearBits >> 24) == (clearBits & 0xFF);
1699 const int stride = gstate.FrameBufStride();
1700 const int width = x2 - x1;
1701
1702 const int byteStride = stride * bpp;
1703 const int byteWidth = width * bpp;
1704 for (int y = y1; y < y2; ++y) {
1705 NotifyMemInfo(MemBlockFlags::WRITE, gstate.getFrameBufAddress() + x1 * bpp + y * byteStride, byteWidth, "FramebufferClear");
1706 }
1707
1708 // Can use memset for simple cases. Often alpha is different and gums up the works.
1709 if (singleByteClear) {
1710 addr += x1 * bpp;
1711 for (int y = y1; y < y2; ++y) {
1712 memset(addr + y * byteStride, clearBits, byteWidth);
1713 }
1714 } else {
1715 // This will most often be true - rarely is the width not aligned.
1716 // TODO: We should really use non-temporal stores here to avoid the cache,
1717 // as it's unlikely that these bytes will be read.
1718 if ((width & 3) == 0 && (x1 & 3) == 0) {
1719 u64 val64 = clearBits | ((u64)clearBits << 32);
1720 int xstride = 8 / bpp;
1721
1722 u64 *addr64 = (u64 *)addr;
1723 const int stride64 = stride / xstride;
1724 const int x1_64 = x1 / xstride;
1725 const int x2_64 = x2 / xstride;
1726 for (int y = y1; y < y2; ++y) {
1727 for (int x = x1_64; x < x2_64; ++x) {
1728 addr64[y * stride64 + x] = val64;
1729 }
1730 }
1731 } else if (bpp == 4) {
1732 u32 *addr32 = (u32 *)addr;
1733 for (int y = y1; y < y2; ++y) {
1734 for (int x = x1; x < x2; ++x) {
1735 addr32[y * stride + x] = clearBits;
1736 }
1737 }
1738 } else if (bpp == 2) {
1739 u16 *addr16 = (u16 *)addr;
1740 for (int y = y1; y < y2; ++y) {
1741 for (int x = x1; x < x2; ++x) {
1742 addr16[y * stride + x] = (u16)clearBits;
1743 }
1744 }
1745 }
1746 }
1747
1748 if (currentRenderVfb_) {
1749 // The current content is in memory now, so update the flag.
1750 if (x1 == 0 && y1 == 0 && x2 >= currentRenderVfb_->width && y2 >= currentRenderVfb_->height) {
1751 currentRenderVfb_->usageFlags |= FB_USAGE_DOWNLOAD_CLEAR;
1752 currentRenderVfb_->memoryUpdated = true;
1753 }
1754 }
1755 }
1756
NotifyBlockTransferBefore(u32 dstBasePtr,int dstStride,int dstX,int dstY,u32 srcBasePtr,int srcStride,int srcX,int srcY,int width,int height,int bpp,u32 skipDrawReason)1757 bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
1758 if (!useBufferedRendering_) {
1759 return false;
1760 }
1761
1762 // Skip checking if there's no framebuffers in that area.
1763 if (!MayIntersectFramebuffer(srcBasePtr) && !MayIntersectFramebuffer(dstBasePtr)) {
1764 return false;
1765 }
1766
1767 VirtualFramebuffer *dstBuffer = 0;
1768 VirtualFramebuffer *srcBuffer = 0;
1769 int srcWidth = width;
1770 int srcHeight = height;
1771 int dstWidth = width;
1772 int dstHeight = height;
1773 FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
1774
1775 if (dstBuffer && srcBuffer) {
1776 if (srcBuffer == dstBuffer) {
1777 if (srcX != dstX || srcY != dstY) {
1778 WARN_LOG_N_TIMES(dstsrc, 100, G3D, "Intra-buffer block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
1779 width, height, bpp,
1780 srcBasePtr, srcX, srcY, srcStride,
1781 dstBasePtr, dstX, dstY, dstStride);
1782 FlushBeforeCopy();
1783 // Some backends can handle blitting within a framebuffer. Others will just have to deal with it or ignore it, apparently.
1784 BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp, "Blit_IntraBufferBlockTransfer");
1785 RebindFramebuffer("rebind after intra block transfer");
1786 SetColorUpdated(dstBuffer, skipDrawReason);
1787 return true; // Skip the memory copy.
1788 } else {
1789 // Ignore, nothing to do. Tales of Phantasia X does this by accident.
1790 return true; // Skip the memory copy.
1791 }
1792 } else {
1793 WARN_LOG_N_TIMES(dstnotsrc, 100, G3D, "Inter-buffer block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
1794 width, height, bpp,
1795 srcBasePtr, srcX, srcY, srcStride,
1796 dstBasePtr, dstX, dstY, dstStride);
1797 // Straightforward blit between two framebuffers.
1798 FlushBeforeCopy();
1799 BlitFramebuffer(dstBuffer, dstX, dstY, srcBuffer, srcX, srcY, dstWidth, dstHeight, bpp, "Blit_InterBufferBlockTransfer");
1800 RebindFramebuffer("RebindFramebuffer - Inter-buffer block transfer");
1801 SetColorUpdated(dstBuffer, skipDrawReason);
1802 return true; // No need to actually do the memory copy behind, probably.
1803 }
1804 return false;
1805 } else if (dstBuffer) {
1806 // Here we should just draw the pixels into the buffer. Copy first.
1807 return false;
1808 } else if (srcBuffer) {
1809 WARN_LOG_N_TIMES(btd, 100, G3D, "Block transfer readback %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
1810 width, height, bpp,
1811 srcBasePtr, srcX, srcY, srcStride,
1812 dstBasePtr, dstX, dstY, dstStride);
1813 FlushBeforeCopy();
1814 if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) {
1815 const int srcBpp = srcBuffer->format == GE_FORMAT_8888 ? 4 : 2;
1816 const float srcXFactor = (float)bpp / srcBpp;
1817 const bool tooTall = srcY + srcHeight > srcBuffer->bufferHeight;
1818 if (srcHeight <= 0 || (tooTall && srcY != 0)) {
1819 WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight);
1820 } else {
1821 if (tooTall) {
1822 WARN_LOG_ONCE(btdheight, G3D, "Block transfer download %08x -> %08x dangerous, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcY, srcHeight, srcBuffer->bufferHeight);
1823 }
1824 ReadFramebufferToMemory(srcBuffer, static_cast<int>(srcX * srcXFactor), srcY, static_cast<int>(srcWidth * srcXFactor), srcHeight);
1825 srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
1826 }
1827 }
1828 return false; // Let the bit copy happen
1829 } else {
1830 return false;
1831 }
1832 }
1833
NotifyBlockTransferAfter(u32 dstBasePtr,int dstStride,int dstX,int dstY,u32 srcBasePtr,int srcStride,int srcX,int srcY,int width,int height,int bpp,u32 skipDrawReason)1834 void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
1835 // If it's a block transfer direct to the screen, and we're not using buffers, draw immediately.
1836 // We may still do a partial block draw below if this doesn't pass.
1837 if (!useBufferedRendering_ && dstStride >= 480 && width >= 480 && height == 272) {
1838 bool isPrevDisplayBuffer = PrevDisplayFramebufAddr() == dstBasePtr;
1839 bool isDisplayBuffer = DisplayFramebufAddr() == dstBasePtr;
1840 if (isPrevDisplayBuffer || isDisplayBuffer) {
1841 FlushBeforeCopy();
1842 DrawFramebufferToOutput(Memory::GetPointerUnchecked(dstBasePtr), displayFormat_, dstStride);
1843 return;
1844 }
1845 }
1846
1847 if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
1848 VirtualFramebuffer *dstBuffer = 0;
1849 VirtualFramebuffer *srcBuffer = 0;
1850 int srcWidth = width;
1851 int srcHeight = height;
1852 int dstWidth = width;
1853 int dstHeight = height;
1854 FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
1855
1856 // A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
1857 // the backbuffer. Detect this and have the framebuffermanager draw the pixels.
1858 if (!useBufferedRendering_ && currentRenderVfb_ != dstBuffer) {
1859 return;
1860 }
1861
1862 if (dstBuffer && !srcBuffer) {
1863 WARN_LOG_ONCE(btu, G3D, "Block transfer upload %08x -> %08x", srcBasePtr, dstBasePtr);
1864 FlushBeforeCopy();
1865 const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
1866 int dstBpp = dstBuffer->format == GE_FORMAT_8888 ? 4 : 2;
1867 float dstXFactor = (float)bpp / dstBpp;
1868 if (dstWidth > dstBuffer->width || dstHeight > dstBuffer->height) {
1869 // The buffer isn't big enough, and we have a clear hint of size. Resize.
1870 // This happens in Valkyrie Profile when uploading video at the ending.
1871 ResizeFramebufFBO(dstBuffer, dstWidth, dstHeight, false, true);
1872 // Make sure we don't flop back and forth.
1873 dstBuffer->newWidth = std::max(dstWidth, (int)dstBuffer->width);
1874 dstBuffer->newHeight = std::max(dstHeight, (int)dstBuffer->height);
1875 dstBuffer->lastFrameNewSize = gpuStats.numFlips;
1876 // Resizing may change the viewport/etc.
1877 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
1878 }
1879 DrawPixels(dstBuffer, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstBuffer->format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstWidth * dstXFactor), dstHeight);
1880 SetColorUpdated(dstBuffer, skipDrawReason);
1881 RebindFramebuffer("RebindFramebuffer - NotifyBlockTransferAfter");
1882 }
1883 }
1884 }
1885
SetSafeSize(u16 w,u16 h)1886 void FramebufferManagerCommon::SetSafeSize(u16 w, u16 h) {
1887 VirtualFramebuffer *vfb = currentRenderVfb_;
1888 if (vfb) {
1889 vfb->safeWidth = std::min(vfb->bufferWidth, std::max(vfb->safeWidth, w));
1890 vfb->safeHeight = std::min(vfb->bufferHeight, std::max(vfb->safeHeight, h));
1891 }
1892 }
1893
Resized()1894 void FramebufferManagerCommon::Resized() {
1895 gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
1896
1897 int w, h, scaleFactor;
1898 presentation_->CalculateRenderResolution(&w, &h, &scaleFactor, &postShaderIsUpscalingFilter_, &postShaderIsSupersampling_);
1899 PSP_CoreParameter().renderWidth = w;
1900 PSP_CoreParameter().renderHeight = h;
1901 PSP_CoreParameter().renderScaleFactor = scaleFactor;
1902
1903 if (UpdateSize()) {
1904 DestroyAllFBOs();
1905 }
1906
1907 // Might have a new post shader - let's compile it.
1908 presentation_->UpdatePostShader();
1909
1910 #ifdef _WIN32
1911 // Seems related - if you're ok with numbers all the time, show some more :)
1912 if (g_Config.iShowFPSCounter != 0) {
1913 ShowScreenResolution();
1914 }
1915 #endif
1916 }
1917
DestroyAllFBOs()1918 void FramebufferManagerCommon::DestroyAllFBOs() {
1919 currentRenderVfb_ = nullptr;
1920 displayFramebuf_ = nullptr;
1921 prevDisplayFramebuf_ = nullptr;
1922 prevPrevDisplayFramebuf_ = nullptr;
1923
1924 for (VirtualFramebuffer *vfb : vfbs_) {
1925 INFO_LOG(FRAMEBUF, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format);
1926 DestroyFramebuf(vfb);
1927 }
1928 vfbs_.clear();
1929
1930 for (VirtualFramebuffer *vfb : bvfbs_) {
1931 DestroyFramebuf(vfb);
1932 }
1933 bvfbs_.clear();
1934
1935 for (auto &tempFB : tempFBOs_) {
1936 tempFB.second.fbo->Release();
1937 }
1938 tempFBOs_.clear();
1939
1940 for (auto iter : fbosToDelete_) {
1941 iter->Release();
1942 }
1943 fbosToDelete_.clear();
1944 }
1945
GetTempFBO(TempFBO reason,u16 w,u16 h)1946 Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u16 h) {
1947 u64 key = ((u64)reason << 48) | ((u32)w << 16) | h;
1948 auto it = tempFBOs_.find(key);
1949 if (it != tempFBOs_.end()) {
1950 it->second.last_frame_used = gpuStats.numFlips;
1951 return it->second.fbo;
1952 }
1953
1954 bool z_stencil = reason == TempFBO::STENCIL;
1955 char name[128];
1956 snprintf(name, sizeof(name), "temp_fbo_%dx%d%s", w, h, z_stencil ? "_depth" : "");
1957 Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, z_stencil, name });
1958 if (!fbo) {
1959 return nullptr;
1960 }
1961
1962 const TempFBOInfo info = { fbo, gpuStats.numFlips };
1963 tempFBOs_[key] = info;
1964 return fbo;
1965 }
1966
UpdateFramebufUsage(VirtualFramebuffer * vfb)1967 void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) {
1968 auto checkFlag = [&](u16 flag, int last_frame) {
1969 if (vfb->usageFlags & flag) {
1970 const int age = frameLastFramebufUsed_ - last_frame;
1971 if (age > FBO_OLD_USAGE_FLAG) {
1972 vfb->usageFlags &= ~flag;
1973 }
1974 }
1975 };
1976
1977 checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed);
1978 checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used);
1979 checkFlag(FB_USAGE_RENDERTARGET, vfb->last_frame_render);
1980 checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut);
1981 }
1982
ShowScreenResolution()1983 void FramebufferManagerCommon::ShowScreenResolution() {
1984 auto gr = GetI18NCategory("Graphics");
1985
1986 std::ostringstream messageStream;
1987 messageStream << gr->T("Internal Resolution") << ": ";
1988 messageStream << PSP_CoreParameter().renderWidth << "x" << PSP_CoreParameter().renderHeight << " ";
1989 if (postShaderIsUpscalingFilter_) {
1990 messageStream << gr->T("(upscaling)") << " ";
1991 } else if (postShaderIsSupersampling_) {
1992 messageStream << gr->T("(supersampling)") << " ";
1993 }
1994 messageStream << gr->T("Window Size") << ": ";
1995 messageStream << PSP_CoreParameter().pixelWidth << "x" << PSP_CoreParameter().pixelHeight;
1996
1997 host->NotifyUserMessage(messageStream.str(), 2.0f, 0xFFFFFF, "resize");
1998 INFO_LOG(SYSTEM, "%s", messageStream.str().c_str());
1999 }
2000
2001 // We might also want to implement an asynchronous callback-style version of this. Would probably
2002 // only be possible to implement optimally on Vulkan, but on GL and D3D11 we could do pixel buffers
2003 // and read on the next frame, then call the callback. PackFramebufferAsync_ on OpenGL already does something similar.
2004 //
2005 // The main use cases for this are:
2006 // * GE debugging(in practice async will not matter because it will stall anyway.)
2007 // * Video file recording(would probably be great if it was async.)
2008 // * Screenshots(benefit slightly from async.)
2009 // * Save state screenshots(could probably be async but need to manage the stall.)
GetFramebuffer(u32 fb_address,int fb_stride,GEBufferFormat format,GPUDebugBuffer & buffer,int maxRes)2010 bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes) {
2011 VirtualFramebuffer *vfb = currentRenderVfb_;
2012 if (!vfb) {
2013 vfb = GetVFBAt(fb_address);
2014 }
2015
2016 if (!vfb) {
2017 if (!Memory::IsValidAddress(fb_address))
2018 return false;
2019 // If there's no vfb and we're drawing there, must be memory?
2020 buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, format);
2021 return true;
2022 }
2023
2024 int w = vfb->renderWidth, h = vfb->renderHeight;
2025
2026 Draw::Framebuffer *bound = nullptr;
2027
2028 if (vfb->fbo) {
2029 if (maxRes > 0 && vfb->renderWidth > vfb->width * maxRes) {
2030 w = vfb->width * maxRes;
2031 h = vfb->height * maxRes;
2032
2033 Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::COPY, w, h);
2034 VirtualFramebuffer tempVfb = *vfb;
2035 tempVfb.fbo = tempFBO;
2036 tempVfb.bufferWidth = vfb->width;
2037 tempVfb.bufferHeight = vfb->height;
2038 tempVfb.renderWidth = w;
2039 tempVfb.renderHeight = h;
2040 tempVfb.renderScaleFactor = (float)maxRes;
2041 BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0, "Blit_GetFramebuffer");
2042
2043 bound = tempFBO;
2044 } else {
2045 bound = vfb->fbo;
2046 }
2047 }
2048
2049 if (!useBufferedRendering_) {
2050 // Safety check.
2051 w = std::min(w, PSP_CoreParameter().pixelWidth);
2052 h = std::min(h, PSP_CoreParameter().pixelHeight);
2053 }
2054
2055 // TODO: Maybe should handle flipY inside CopyFramebufferToMemorySync somehow?
2056 bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
2057 buffer.Allocate(w, h, GE_FORMAT_8888, flipY);
2058 bool retval = draw_->CopyFramebufferToMemorySync(bound, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w, "GetFramebuffer");
2059 gpuStats.numReadbacks++;
2060 // After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
2061 gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
2062 // We may have blitted to a temp FBO.
2063 RebindFramebuffer("RebindFramebuffer - GetFramebuffer");
2064 return retval;
2065 }
2066
GetDepthbuffer(u32 fb_address,int fb_stride,u32 z_address,int z_stride,GPUDebugBuffer & buffer)2067 bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) {
2068 VirtualFramebuffer *vfb = currentRenderVfb_;
2069 if (!vfb) {
2070 vfb = GetVFBAt(fb_address);
2071 }
2072
2073 if (!vfb) {
2074 if (!Memory::IsValidAddress(z_address))
2075 return false;
2076 // If there's no vfb and we're drawing there, must be memory?
2077 buffer = GPUDebugBuffer(Memory::GetPointer(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT);
2078 return true;
2079 }
2080
2081 int w = vfb->renderWidth;
2082 int h = vfb->renderHeight;
2083 if (!useBufferedRendering_) {
2084 // Safety check.
2085 w = std::min(w, PSP_CoreParameter().pixelWidth);
2086 h = std::min(h, PSP_CoreParameter().pixelHeight);
2087 }
2088
2089 bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
2090 if (gstate_c.Supports(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
2091 buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY);
2092 } else {
2093 buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY);
2094 }
2095 // No need to free on failure, that's the caller's job (it likely will reuse a buffer.)
2096 bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer");
2097 // After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
2098 gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
2099 // That may have unbound the framebuffer, rebind to avoid crashes when debugging.
2100 RebindFramebuffer("RebindFramebuffer - GetDepthbuffer");
2101 return retval;
2102 }
2103
GetStencilbuffer(u32 fb_address,int fb_stride,GPUDebugBuffer & buffer)2104 bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) {
2105 VirtualFramebuffer *vfb = currentRenderVfb_;
2106 if (!vfb) {
2107 vfb = GetVFBAt(fb_address);
2108 }
2109
2110 if (!vfb) {
2111 if (!Memory::IsValidAddress(fb_address))
2112 return false;
2113 // If there's no vfb and we're drawing there, must be memory?
2114 // TODO: Actually get the stencil.
2115 buffer = GPUDebugBuffer(Memory::GetPointer(fb_address), fb_stride, 512, GPU_DBG_FORMAT_8888);
2116 return true;
2117 }
2118
2119 int w = vfb->renderWidth;
2120 int h = vfb->renderHeight;
2121 if (!useBufferedRendering_) {
2122 // Safety check.
2123 w = std::min(w, PSP_CoreParameter().pixelWidth);
2124 h = std::min(h, PSP_CoreParameter().pixelHeight);
2125 }
2126
2127 bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
2128 // No need to free on failure, the caller/destructor will do that. Usually this is a reused buffer, anyway.
2129 buffer.Allocate(w, h, GPU_DBG_FORMAT_8BIT, flipY);
2130 bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_STENCIL_BIT, 0, 0, w,h, Draw::DataFormat::S8, buffer.GetData(), w, "GetStencilbuffer");
2131 // That may have unbound the framebuffer, rebind to avoid crashes when debugging.
2132 RebindFramebuffer("RebindFramebuffer - GetStencilbuffer");
2133 return retval;
2134 }
2135
GetOutputFramebuffer(GPUDebugBuffer & buffer)2136 bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
2137 int w, h;
2138 draw_->GetFramebufferDimensions(nullptr, &w, &h);
2139 Draw::DataFormat fmt = draw_->PreferredFramebufferReadbackFormat(nullptr);
2140 // Ignore preferred formats other than BGRA.
2141 if (fmt != Draw::DataFormat::B8G8R8A8_UNORM)
2142 fmt = Draw::DataFormat::R8G8B8A8_UNORM;
2143 buffer.Allocate(w, h, fmt == Draw::DataFormat::R8G8B8A8_UNORM ? GPU_DBG_FORMAT_8888 : GPU_DBG_FORMAT_8888_BGRA, false);
2144 bool retval = draw_->CopyFramebufferToMemorySync(nullptr, Draw::FB_COLOR_BIT, 0, 0, w, h, fmt, buffer.GetData(), w, "GetOutputFramebuffer");
2145 // That may have unbound the framebuffer, rebind to avoid crashes when debugging.
2146 RebindFramebuffer("RebindFramebuffer - GetOutputFramebuffer");
2147 return retval;
2148 }
2149
2150 // This function takes an already correctly-sized framebuffer and packs it into RAM.
2151 // Does not need to account for scaling.
2152 // Color conversion is currently done on CPU but should theoretically be done on GPU.
2153 // (Except using the GPU might cause problems because of various implementations'
2154 // dithering behavior and games that expect exact colors like Danganronpa, so we
2155 // can't entirely be rid of the CPU path.) -- unknown
PackFramebufferSync_(VirtualFramebuffer * vfb,int x,int y,int w,int h)2156 void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
2157 if (!vfb->fbo) {
2158 ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "PackFramebufferSync_: vfb->fbo == 0");
2159 return;
2160 }
2161
2162 if (w <= 0 || h <= 0) {
2163 ERROR_LOG(G3D, "Bad inputs to PackFramebufferSync_: %d %d %d %d", x, y, w, h);
2164 return;
2165 }
2166
2167 const u32 fb_address = vfb->fb_address & 0x3FFFFFFF;
2168
2169 Draw::DataFormat destFormat = GEFormatToThin3D(vfb->format);
2170 const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
2171
2172 const int dstByteOffset = (y * vfb->fb_stride + x) * dstBpp;
2173 const int dstSize = (h * vfb->fb_stride + w - 1) * dstBpp;
2174
2175 if (!Memory::IsValidRange(fb_address + dstByteOffset, dstSize)) {
2176 ERROR_LOG_REPORT(G3D, "PackFramebufferSync_ would write outside of memory, ignoring");
2177 return;
2178 }
2179
2180 u8 *destPtr = Memory::GetPointer(fb_address + dstByteOffset);
2181
2182 // We always need to convert from the framebuffer native format.
2183 // Right now that's always 8888.
2184 DEBUG_LOG(G3D, "Reading framebuffer to mem, fb_address = %08x, ptr=%p", fb_address, destPtr);
2185
2186 if (destPtr) {
2187 draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, x, y, w, h, destFormat, destPtr, vfb->fb_stride, "PackFramebufferSync_");
2188 char tag[128];
2189 size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->format));
2190 NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
2191 } else {
2192 ERROR_LOG(G3D, "PackFramebufferSync_: Tried to readback to bad address %08x (stride = %d)", fb_address + dstByteOffset, vfb->fb_stride);
2193 }
2194
2195 gpuStats.numReadbacks++;
2196 }
2197
ReadFramebufferToMemory(VirtualFramebuffer * vfb,int x,int y,int w,int h)2198 void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
2199 // Clamp to bufferWidth. Sometimes block transfers can cause this to hit.
2200 if (x + w >= vfb->bufferWidth) {
2201 w = vfb->bufferWidth - x;
2202 }
2203 if (vfb && vfb->fbo) {
2204 // We'll pseudo-blit framebuffers here to get a resized version of vfb.
2205 if (gameUsesSequentialCopies_) {
2206 // Ignore the x/y/etc., read the entire thing.
2207 x = 0;
2208 y = 0;
2209 w = vfb->width;
2210 h = vfb->height;
2211 vfb->memoryUpdated = true;
2212 vfb->usageFlags |= FB_USAGE_DOWNLOAD;
2213 } else if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
2214 // Mark it as fully downloaded until next render to it.
2215 vfb->memoryUpdated = true;
2216 vfb->usageFlags |= FB_USAGE_DOWNLOAD;
2217 } else {
2218 // Let's try to set the flag eventually, if the game copies a lot.
2219 // Some games copy subranges very frequently.
2220 const static int FREQUENT_SEQUENTIAL_COPIES = 3;
2221 static int frameLastCopy = 0;
2222 static u32 bufferLastCopy = 0;
2223 static int copiesThisFrame = 0;
2224 if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) {
2225 frameLastCopy = gpuStats.numFlips;
2226 bufferLastCopy = vfb->fb_address;
2227 copiesThisFrame = 0;
2228 }
2229 if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) {
2230 gameUsesSequentialCopies_ = true;
2231 }
2232 }
2233
2234 if (vfb->renderWidth == vfb->width && vfb->renderHeight == vfb->height) {
2235 // No need to blit
2236 PackFramebufferSync_(vfb, x, y, w, h);
2237 } else {
2238 VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb);
2239 if (nvfb) {
2240 BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, "Blit_ReadFramebufferToMemory");
2241 PackFramebufferSync_(nvfb, x, y, w, h);
2242 }
2243 }
2244
2245 textureCache_->ForgetLastTexture();
2246 RebindFramebuffer("RebindFramebuffer - ReadFramebufferToMemory");
2247 }
2248 }
2249
FlushBeforeCopy()2250 void FramebufferManagerCommon::FlushBeforeCopy() {
2251 // Flush anything not yet drawn before blitting, downloading, or uploading.
2252 // This might be a stalled list, or unflushed before a block transfer, etc.
2253
2254 // TODO: It's really bad that we are calling SetRenderFramebuffer here with
2255 // all the irrelevant state checking it'll use to decide what to do. Should
2256 // do something more focused here.
2257 SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
2258 drawEngine_->DispatchFlush();
2259 }
2260
DownloadFramebufferForClut(u32 fb_address,u32 loadBytes)2261 void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
2262 VirtualFramebuffer *vfb = GetVFBAt(fb_address);
2263 if (vfb && vfb->fb_stride != 0) {
2264 const u32 bpp = vfb->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
2265 int x = 0;
2266 int y = 0;
2267 int pixels = loadBytes / bpp;
2268 // The height will be 1 for each stride or part thereof.
2269 int w = std::min(pixels % vfb->fb_stride, (int)vfb->width);
2270 int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height);
2271
2272 // We might still have a pending draw to the fb in question, flush if so.
2273 FlushBeforeCopy();
2274
2275 // No need to download if we already have it.
2276 if (w > 0 && h > 0 && !vfb->memoryUpdated && vfb->clutUpdatedBytes < loadBytes) {
2277 // We intentionally don't try to optimize into a full download here - we don't want to over download.
2278
2279 // CLUT framebuffers are often incorrectly estimated in size.
2280 if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
2281 vfb->memoryUpdated = true;
2282 }
2283 vfb->clutUpdatedBytes = loadBytes;
2284
2285 // We'll pseudo-blit framebuffers here to get a resized version of vfb.
2286 VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb);
2287 if (nvfb) {
2288 BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, "Blit_DownloadFramebufferForClut");
2289 PackFramebufferSync_(nvfb, x, y, w, h);
2290 }
2291
2292 textureCache_->ForgetLastTexture();
2293 RebindFramebuffer("RebindFramebuffer - DownloadFramebufferForClut");
2294 }
2295 }
2296 }
2297
RebindFramebuffer(const char * tag)2298 void FramebufferManagerCommon::RebindFramebuffer(const char *tag) {
2299 shaderManager_->DirtyLastShader();
2300 if (currentRenderVfb_ && currentRenderVfb_->fbo) {
2301 draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag);
2302 } else {
2303 // Should this even happen? It could while debugging, but maybe we can just skip binding at all.
2304 draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "RebindFramebuffer_Bad");
2305 }
2306 }
2307
GetFramebufferList()2308 std::vector<FramebufferInfo> FramebufferManagerCommon::GetFramebufferList() {
2309 std::vector<FramebufferInfo> list;
2310
2311 for (size_t i = 0; i < vfbs_.size(); ++i) {
2312 VirtualFramebuffer *vfb = vfbs_[i];
2313
2314 FramebufferInfo info;
2315 info.fb_address = vfb->fb_address;
2316 info.z_address = vfb->z_address;
2317 info.format = vfb->format;
2318 info.width = vfb->width;
2319 info.height = vfb->height;
2320 info.fbo = vfb->fbo;
2321 list.push_back(info);
2322 }
2323
2324 return list;
2325 }
2326
2327 template <typename T>
DoRelease(T * & obj)2328 static void DoRelease(T *&obj) {
2329 if (obj)
2330 obj->Release();
2331 obj = nullptr;
2332 }
2333
DeviceLost()2334 void FramebufferManagerCommon::DeviceLost() {
2335 DestroyAllFBOs();
2336 for (int i = 0; i < 3; i++) {
2337 for (int j = 0; j < 3; j++) {
2338 DoRelease(reinterpretFromTo_[i][j]);
2339 }
2340 }
2341 DoRelease(reinterpretVBuf_);
2342 DoRelease(reinterpretSampler_);
2343 DoRelease(reinterpretVS_);
2344 presentation_->DeviceLost();
2345 draw_ = nullptr;
2346 }
2347
DeviceRestore(Draw::DrawContext * draw)2348 void FramebufferManagerCommon::DeviceRestore(Draw::DrawContext *draw) {
2349 draw_ = draw;
2350 presentation_->DeviceRestore(draw);
2351 }
2352