1 // Copyright (c) 2012- PPSSPP Project.
2
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
11
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18 #include <algorithm>
19 #include <cstring>
20
21 #include "ext/xxhash.h"
22 #include "Common/Data/Convert/ColorConv.h"
23 #include "Common/Data/Text/I18n.h"
24 #include "Common/Math/math_util.h"
25 #include "Common/Profiler/Profiler.h"
26 #include "Common/GPU/OpenGL/GLRenderManager.h"
27
28 #include "Core/Config.h"
29 #include "Core/Host.h"
30 #include "Core/MemMap.h"
31 #include "Core/Reporting.h"
32 #include "GPU/ge_constants.h"
33 #include "GPU/GPUState.h"
34 #include "GPU/GLES/TextureCacheGLES.h"
35 #include "GPU/GLES/FramebufferManagerGLES.h"
36 #include "GPU/Common/FragmentShaderGenerator.h"
37 #include "GPU/GLES/DepalettizeShaderGLES.h"
38 #include "GPU/GLES/ShaderManagerGLES.h"
39 #include "GPU/GLES/DrawEngineGLES.h"
40 #include "GPU/Common/TextureDecoder.h"
41
42 #ifdef _M_SSE
43 #include <emmintrin.h>
44 #endif
45
TextureCacheGLES(Draw::DrawContext * draw)46 TextureCacheGLES::TextureCacheGLES(Draw::DrawContext *draw)
47 : TextureCacheCommon(draw) {
48 render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
49
50 SetupTextureDecoder();
51
52 nextTexture_ = nullptr;
53
54 std::vector<GLRInputLayout::Entry> entries;
55 entries.push_back({ 0, 3, GL_FLOAT, GL_FALSE, 20, 0 });
56 entries.push_back({ 1, 2, GL_FLOAT, GL_FALSE, 20, 12 });
57 shadeInputLayout_ = render_->CreateInputLayout(entries);
58 }
59
~TextureCacheGLES()60 TextureCacheGLES::~TextureCacheGLES() {
61 if (shadeInputLayout_) {
62 render_->DeleteInputLayout(shadeInputLayout_);
63 }
64 Clear(true);
65 }
66
SetFramebufferManager(FramebufferManagerGLES * fbManager)67 void TextureCacheGLES::SetFramebufferManager(FramebufferManagerGLES *fbManager) {
68 framebufferManagerGL_ = fbManager;
69 framebufferManager_ = fbManager;
70 }
71
ReleaseTexture(TexCacheEntry * entry,bool delete_them)72 void TextureCacheGLES::ReleaseTexture(TexCacheEntry *entry, bool delete_them) {
73 if (delete_them) {
74 if (entry->textureName) {
75 render_->DeleteTexture(entry->textureName);
76 }
77 }
78 entry->textureName = nullptr;
79 }
80
Clear(bool delete_them)81 void TextureCacheGLES::Clear(bool delete_them) {
82 TextureCacheCommon::Clear(delete_them);
83 }
84
getClutDestFormat(GEPaletteFormat format)85 Draw::DataFormat getClutDestFormat(GEPaletteFormat format) {
86 switch (format) {
87 case GE_CMODE_16BIT_ABGR4444:
88 return Draw::DataFormat::R4G4B4A4_UNORM_PACK16;
89 case GE_CMODE_16BIT_ABGR5551:
90 return Draw::DataFormat::R5G5B5A1_UNORM_PACK16;
91 case GE_CMODE_16BIT_BGR5650:
92 return Draw::DataFormat::R5G6B5_UNORM_PACK16;
93 case GE_CMODE_32BIT_ABGR8888:
94 return Draw::DataFormat::R8G8B8A8_UNORM;
95 }
96 return Draw::DataFormat::UNDEFINED;;
97 }
98
99 static const GLuint MinFiltGL[8] = {
100 GL_NEAREST,
101 GL_LINEAR,
102 GL_NEAREST,
103 GL_LINEAR,
104 GL_NEAREST_MIPMAP_NEAREST,
105 GL_LINEAR_MIPMAP_NEAREST,
106 GL_NEAREST_MIPMAP_LINEAR,
107 GL_LINEAR_MIPMAP_LINEAR,
108 };
109
110 static const GLuint MagFiltGL[2] = {
111 GL_NEAREST,
112 GL_LINEAR
113 };
114
ApplySamplingParams(const SamplerCacheKey & key)115 void TextureCacheGLES::ApplySamplingParams(const SamplerCacheKey &key) {
116 if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
117 float minLod = (float)key.minLevel / 256.0f;
118 float maxLod = (float)key.maxLevel / 256.0f;
119 float lodBias = (float)key.lodBias / 256.0f;
120 render_->SetTextureLod(0, minLod, maxLod, lodBias);
121 }
122
123 float aniso = 0.0f;
124 int minKey = ((int)key.mipEnable << 2) | ((int)key.mipFilt << 1) | ((int)key.minFilt);
125 render_->SetTextureSampler(0,
126 key.sClamp ? GL_CLAMP_TO_EDGE : GL_REPEAT, key.tClamp ? GL_CLAMP_TO_EDGE : GL_REPEAT,
127 key.magFilt ? GL_LINEAR : GL_NEAREST, MinFiltGL[minKey], aniso);
128 }
129
ConvertColors(void * dstBuf,const void * srcBuf,Draw::DataFormat dstFmt,int numPixels)130 static void ConvertColors(void *dstBuf, const void *srcBuf, Draw::DataFormat dstFmt, int numPixels) {
131 const u32 *src = (const u32 *)srcBuf;
132 u32 *dst = (u32 *)dstBuf;
133 switch (dstFmt) {
134 case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
135 ConvertRGBA4444ToABGR4444((u16 *)dst, (const u16 *)src, numPixels);
136 break;
137 // Final Fantasy 2 uses this heavily in animated textures.
138 case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
139 ConvertRGBA5551ToABGR1555((u16 *)dst, (const u16 *)src, numPixels);
140 break;
141 case Draw::DataFormat::R5G6B5_UNORM_PACK16:
142 ConvertRGB565ToBGR565((u16 *)dst, (const u16 *)src, numPixels);
143 break;
144 default:
145 // No need to convert RGBA8888, right order already
146 if (dst != src)
147 memcpy(dst, src, numPixels * sizeof(u32));
148 break;
149 }
150 }
151
StartFrame()152 void TextureCacheGLES::StartFrame() {
153 InvalidateLastTexture();
154 timesInvalidatedAllThisFrame_ = 0;
155
156 GLRenderManager *renderManager = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
157 if (!lowMemoryMode_ && renderManager->SawOutOfMemory()) {
158 lowMemoryMode_ = true;
159 decimationCounter_ = 0;
160
161 auto err = GetI18NCategory("Error");
162 if (standardScaleFactor_ > 1) {
163 host->NotifyUserMessage(err->T("Warning: Video memory FULL, reducing upscaling and switching to slow caching mode"), 2.0f);
164 } else {
165 host->NotifyUserMessage(err->T("Warning: Video memory FULL, switching to slow caching mode"), 2.0f);
166 }
167 }
168
169 if (texelsScaledThisFrame_) {
170 VERBOSE_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_);
171 }
172 texelsScaledThisFrame_ = 0;
173 if (clearCacheNextFrame_) {
174 Clear(true);
175 clearCacheNextFrame_ = false;
176 } else {
177 Decimate();
178 }
179 }
180
UpdateCurrentClut(GEPaletteFormat clutFormat,u32 clutBase,bool clutIndexIsSimple)181 void TextureCacheGLES::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) {
182 const u32 clutBaseBytes = clutFormat == GE_CMODE_32BIT_ABGR8888 ? (clutBase * sizeof(u32)) : (clutBase * sizeof(u16));
183 // Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.
184 // If not, we're going to hash random data, which hopefully doesn't cause a performance issue.
185 //
186 // TODO: Actually, this seems like a hack. The game can upload part of a CLUT and reference other data.
187 // clutTotalBytes_ is the last amount uploaded. We should hash clutMaxBytes_, but this will often hash
188 // unrelated old entries for small palettes.
189 // Adding clutBaseBytes may just be mitigating this for some usage patterns.
190 const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);
191
192 if (replacer_.Enabled())
193 clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
194 else
195 clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;
196
197 // Avoid a copy when we don't need to convert colors.
198 if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
199 const int numColors = clutFormat == GE_CMODE_32BIT_ABGR8888 ? (clutMaxBytes_ / sizeof(u32)) : (clutMaxBytes_ / sizeof(u16));
200 ConvertColors(clutBufConverted_, clutBufRaw_, getClutDestFormat(clutFormat), numColors);
201 clutBuf_ = clutBufConverted_;
202 } else {
203 clutBuf_ = clutBufRaw_;
204 }
205
206 // Special optimization: fonts typically draw clut4 with just alpha values in a single color.
207 clutAlphaLinear_ = false;
208 clutAlphaLinearColor_ = 0;
209 if (clutFormat == GE_CMODE_16BIT_ABGR4444 && clutIndexIsSimple) {
210 const u16_le *clut = GetCurrentClut<u16_le>();
211 clutAlphaLinear_ = true;
212 clutAlphaLinearColor_ = clut[15] & 0xFFF0;
213 for (int i = 0; i < 16; ++i) {
214 u16 step = clutAlphaLinearColor_ | i;
215 if (clut[i] != step) {
216 clutAlphaLinear_ = false;
217 break;
218 }
219 }
220 }
221
222 clutLastFormat_ = gstate.clutformat;
223 }
224
BindTexture(TexCacheEntry * entry)225 void TextureCacheGLES::BindTexture(TexCacheEntry *entry) {
226 if (entry->textureName != lastBoundTexture) {
227 render_->BindTexture(0, entry->textureName);
228 lastBoundTexture = entry->textureName;
229 }
230 int maxLevel = (entry->status & TexCacheEntry::STATUS_BAD_MIPS) ? 0 : entry->maxLevel;
231 SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry);
232 ApplySamplingParams(samplerKey);
233 gstate_c.SetUseShaderDepal(false);
234 }
235
Unbind()236 void TextureCacheGLES::Unbind() {
237 render_->BindTexture(TEX_SLOT_PSP_TEXTURE, nullptr);
238 InvalidateLastTexture();
239 }
240
241 class TextureShaderApplier {
242 public:
243 struct Pos {
244 float x;
245 float y;
246 float z;
247 };
248 struct UV {
249 float u;
250 float v;
251 };
252
TextureShaderApplier(DepalShader * shader,float bufferW,float bufferH,int renderW,int renderH)253 TextureShaderApplier(DepalShader *shader, float bufferW, float bufferH, int renderW, int renderH)
254 : shader_(shader), bufferW_(bufferW), bufferH_(bufferH), renderW_(renderW), renderH_(renderH) {
255 static const Pos pos[4] = {
256 {-1, -1, -1},
257 { 1, -1, -1},
258 { 1, 1, -1},
259 {-1, 1, -1},
260 };
261 memcpy(pos_, pos, sizeof(pos_));
262
263 static const UV uv[4] = {
264 {0, 0},
265 {1, 0},
266 {1, 1},
267 {0, 1},
268 };
269 memcpy(uv_, uv, sizeof(uv_));
270 }
271
ApplyBounds(const KnownVertexBounds & bounds,u32 uoff,u32 voff)272 void ApplyBounds(const KnownVertexBounds &bounds, u32 uoff, u32 voff) {
273 // If min is not < max, then we don't have values (wasn't set during decode.)
274 if (bounds.minV < bounds.maxV) {
275 const float invWidth = 1.0f / bufferW_;
276 const float invHeight = 1.0f / bufferH_;
277 // Inverse of half = double.
278 const float invHalfWidth = invWidth * 2.0f;
279 const float invHalfHeight = invHeight * 2.0f;
280
281 const int u1 = bounds.minU + uoff;
282 const int v1 = bounds.minV + voff;
283 const int u2 = bounds.maxU + uoff;
284 const int v2 = bounds.maxV + voff;
285
286 const float left = u1 * invHalfWidth - 1.0f;
287 const float right = u2 * invHalfWidth - 1.0f;
288 const float top = v1 * invHalfHeight - 1.0f;
289 const float bottom = v2 * invHalfHeight - 1.0f;
290 // Points are: BL, BR, TR, TL.
291 pos_[0] = Pos{ left, bottom, -1.0f };
292 pos_[1] = Pos{ right, bottom, -1.0f };
293 pos_[2] = Pos{ right, top, -1.0f };
294 pos_[3] = Pos{ left, top, -1.0f };
295
296 // And also the UVs, same order.
297 const float uvleft = u1 * invWidth;
298 const float uvright = u2 * invWidth;
299 const float uvtop = v1 * invHeight;
300 const float uvbottom = v2 * invHeight;
301 uv_[0] = UV{ uvleft, uvbottom };
302 uv_[1] = UV{ uvright, uvbottom };
303 uv_[2] = UV{ uvright, uvtop };
304 uv_[3] = UV{ uvleft, uvtop };
305
306 // We need to reapply the texture next time since we cropped UV.
307 gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
308 }
309 }
310
Use(GLRenderManager * render,DrawEngineGLES * transformDraw,GLRInputLayout * inputLayout)311 void Use(GLRenderManager *render, DrawEngineGLES *transformDraw, GLRInputLayout *inputLayout) {
312 render->BindProgram(shader_->program);
313 struct SimpleVertex {
314 float pos[3];
315 float uv[2];
316 };
317 uint32_t bindOffset;
318 GLRBuffer *bindBuffer;
319 SimpleVertex *verts = (SimpleVertex *)transformDraw->GetPushVertexBuffer()->Push(sizeof(SimpleVertex) * 4, &bindOffset, &bindBuffer);
320 int order[4] = { 0 ,1, 3, 2 };
321 for (int i = 0; i < 4; i++) {
322 memcpy(verts[i].pos, &pos_[order[i]], sizeof(Pos));
323 memcpy(verts[i].uv, &uv_[order[i]], sizeof(UV));
324 }
325 render->BindVertexBuffer(inputLayout, bindBuffer, bindOffset);
326 }
327
Shade(GLRenderManager * render)328 void Shade(GLRenderManager *render) {
329 render->SetViewport(GLRViewport{ 0, 0, (float)renderW_, (float)renderH_, 0.0f, 1.0f });
330 render->Draw(GL_TRIANGLE_STRIP, 0, 4);
331 }
332
333 protected:
334 DepalShader *shader_;
335 Pos pos_[4];
336 UV uv_[4];
337 float bufferW_;
338 float bufferH_;
339 int renderW_;
340 int renderH_;
341 };
342
ApplyTextureFramebuffer(VirtualFramebuffer * framebuffer,GETextureFormat texFormat,FramebufferNotificationChannel channel)343 void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) {
344 DepalShader *depalShader = nullptr;
345 uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
346 bool need_depalettize = IsClutFormat(texFormat);
347
348 bool depth = channel == NOTIFY_FB_DEPTH;
349 bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) || gstate_c.Supports(GPU_SUPPORTS_GLSL_330)) && !depth;
350 if (!gstate_c.Supports(GPU_SUPPORTS_32BIT_INT_FSHADER)) {
351 useShaderDepal = false;
352 depth = false; // Can't support this
353 }
354
355 if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
356 if (useShaderDepal) {
357 const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
358 GLRTexture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_);
359 render_->BindTexture(TEX_SLOT_CLUT, clutTexture);
360 render_->SetTextureSampler(TEX_SLOT_CLUT, GL_REPEAT, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
361 framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
362 SamplerCacheKey samplerKey = GetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
363 samplerKey.magFilt = false;
364 samplerKey.minFilt = false;
365 samplerKey.mipEnable = false;
366 ApplySamplingParams(samplerKey);
367 InvalidateLastTexture();
368
369 // Since we started/ended render passes, might need these.
370 gstate_c.Dirty(DIRTY_DEPAL);
371 gstate_c.SetUseShaderDepal(true);
372 gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
373 const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
374 const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
375 TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
376 gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
377 return;
378 }
379
380 depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
381 gstate_c.SetUseShaderDepal(false);
382 }
383 if (depalShader) {
384 shaderManager_->DirtyLastShader();
385
386 const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
387 GLRTexture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_);
388 Draw::Framebuffer *depalFBO = framebufferManagerGL_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight);
389 draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "Depal");
390
391 render_->SetScissor(GLRect2D{ 0, 0, (int)framebuffer->renderWidth, (int)framebuffer->renderHeight });
392 render_->SetViewport(GLRViewport{ 0.0f, 0.0f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f });
393 TextureShaderApplier shaderApply(depalShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight);
394 shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset);
395 shaderApply.Use(render_, drawEngine_, shadeInputLayout_);
396
397 draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
398
399 render_->BindTexture(TEX_SLOT_CLUT, clutTexture);
400 render_->SetTextureSampler(TEX_SLOT_CLUT, GL_REPEAT, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
401
402 shaderApply.Shade(render_);
403
404 draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);
405
406 const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
407 const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
408
409 TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
410 gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
411 } else {
412 framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
413
414 gstate_c.SetUseShaderDepal(false);
415 gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
416 }
417
418 framebufferManagerGL_->RebindFramebuffer("ApplyTextureFramebuffer");
419
420 SamplerCacheKey samplerKey = GetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
421 ApplySamplingParams(samplerKey);
422
423 // Since we started/ended render passes, might need these.
424 gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
425 }
426
FromDataFormat(Draw::DataFormat fmt)427 ReplacedTextureFormat FromDataFormat(Draw::DataFormat fmt) {
428 // TODO: 16-bit formats are incorrect, since swizzled.
429 switch (fmt) {
430 case Draw::DataFormat::R5G6B5_UNORM_PACK16: return ReplacedTextureFormat::F_0565_ABGR;
431 case Draw::DataFormat::R5G5B5A1_UNORM_PACK16: return ReplacedTextureFormat::F_1555_ABGR;
432 case Draw::DataFormat::R4G4B4A4_UNORM_PACK16: return ReplacedTextureFormat::F_4444_ABGR;
433 case Draw::DataFormat::R8G8B8A8_UNORM: default: return ReplacedTextureFormat::F_8888;
434 }
435 }
436
ToDataFormat(ReplacedTextureFormat fmt)437 Draw::DataFormat ToDataFormat(ReplacedTextureFormat fmt) {
438 switch (fmt) {
439 case ReplacedTextureFormat::F_5650: return Draw::DataFormat::R5G6B5_UNORM_PACK16;
440 case ReplacedTextureFormat::F_5551: return Draw::DataFormat::R5G5B5A1_UNORM_PACK16;
441 case ReplacedTextureFormat::F_4444: return Draw::DataFormat::R4G4B4A4_UNORM_PACK16;
442 case ReplacedTextureFormat::F_8888: default: return Draw::DataFormat::R8G8B8A8_UNORM;
443 }
444 }
445
BuildTexture(TexCacheEntry * const entry)446 void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
447 entry->status &= ~TexCacheEntry::STATUS_ALPHA_MASK;
448
449 // For the estimate, we assume cluts always point to 8888 for simplicity.
450 cacheSizeEstimate_ += EstimateTexMemoryUsage(entry);
451
452 if ((entry->bufw == 0 || (gstate.texbufwidth[0] & 0xf800) != 0) && entry->addr >= PSP_GetKernelMemoryEnd()) {
453 ERROR_LOG_REPORT(G3D, "Texture with unexpected bufw (full=%d)", gstate.texbufwidth[0] & 0xffff);
454 // Proceeding here can cause a crash.
455 return;
456 }
457
458 // Adjust maxLevel to actually present levels..
459 bool badMipSizes = false;
460 bool canAutoGen = false;
461 int maxLevel = entry->maxLevel;
462 for (int i = 0; i <= maxLevel; i++) {
463 // If encountering levels pointing to nothing, adjust max level.
464 u32 levelTexaddr = gstate.getTextureAddress(i);
465 if (!Memory::IsValidAddress(levelTexaddr)) {
466 maxLevel = i - 1;
467 break;
468 }
469
470 // If size reaches 1, stop, and override maxlevel.
471 int tw = gstate.getTextureWidth(i);
472 int th = gstate.getTextureHeight(i);
473 if (tw == 1 || th == 1) {
474 maxLevel = i;
475 break;
476 }
477
478 if (i > 0) {
479 int lastW = gstate.getTextureWidth(i - 1);
480 int lastH = gstate.getTextureHeight(i - 1);
481
482 if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
483 if (tw != 1 && tw != (lastW >> 1))
484 badMipSizes = true;
485 else if (th != 1 && th != (lastH >> 1))
486 badMipSizes = true;
487 }
488
489 if (lastW > tw || lastH > th)
490 canAutoGen = true;
491 }
492 }
493
494 // If GLES3 is available, we can preallocate the storage, which makes texture loading more efficient.
495 Draw::DataFormat dstFmt = GetDestFormat(GETextureFormat(entry->format), gstate.getClutPaletteFormat());
496
497 int scaleFactor = standardScaleFactor_;
498
499 // Rachet down scale factor in low-memory mode.
500 if (lowMemoryMode_) {
501 // Keep it even, though, just in case of npot troubles.
502 scaleFactor = scaleFactor > 4 ? 4 : (scaleFactor > 2 ? 2 : 1);
503 }
504
505 u64 cachekey = replacer_.Enabled() ? entry->CacheKey() : 0;
506 int w = gstate.getTextureWidth(0);
507 int h = gstate.getTextureHeight(0);
508 ReplacedTexture &replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h);
509 if (replaced.GetSize(0, w, h)) {
510 // We're replacing, so we won't scale.
511 scaleFactor = 1;
512 entry->status |= TexCacheEntry::STATUS_IS_SCALED;
513 maxLevel = replaced.MaxLevel();
514 badMipSizes = false;
515 }
516
517 // Don't scale the PPGe texture.
518 if (entry->addr > 0x05000000 && entry->addr < PSP_GetKernelMemoryEnd())
519 scaleFactor = 1;
520
521 if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && scaleFactor != 1) {
522 // Remember for later that we /wanted/ to scale this texture.
523 entry->status |= TexCacheEntry::STATUS_TO_SCALE;
524 scaleFactor = 1;
525 }
526
527 if (scaleFactor != 1) {
528 if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) {
529 entry->status |= TexCacheEntry::STATUS_TO_SCALE;
530 scaleFactor = 1;
531 } else {
532 entry->status &= ~TexCacheEntry::STATUS_TO_SCALE;
533 entry->status |= TexCacheEntry::STATUS_IS_SCALED;
534 texelsScaledThisFrame_ += w * h;
535 }
536 }
537
538 // GLES2 doesn't have support for a "Max lod" which is critical as PSP games often
539 // don't specify mips all the way down. As a result, we either need to manually generate
540 // the bottom few levels or rely on OpenGL's autogen mipmaps instead, which might not
541 // be as good quality as the game's own (might even be better in some cases though).
542
543 // Always load base level texture here
544 if (IsFakeMipmapChange()) {
545 // NOTE: Since the level is not part of the cache key, we assume it never changes.
546 u8 level = std::max(0, gstate.getTexLevelOffset16() / 16);
547 LoadTextureLevel(*entry, replaced, level, scaleFactor, dstFmt);
548 } else {
549 LoadTextureLevel(*entry, replaced, 0, scaleFactor, dstFmt);
550 }
551
552 // Mipmapping is only enabled when texture scaling is disabled.
553 int texMaxLevel = 0;
554 bool genMips = false;
555 if (maxLevel > 0 && scaleFactor == 1) {
556 if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
557 if (badMipSizes) {
558 // WARN_LOG(G3D, "Bad mipmap for texture sized %dx%dx%d - autogenerating", w, h, (int)format);
559 if (canAutoGen) {
560 genMips = true;
561 } else {
562 texMaxLevel = 0;
563 maxLevel = 0;
564 }
565 } else {
566 for (int i = 1; i <= maxLevel; i++) {
567 LoadTextureLevel(*entry, replaced, i, scaleFactor, dstFmt);
568 }
569 texMaxLevel = maxLevel;
570 }
571 } else {
572 // Avoid PowerVR driver bug
573 if (canAutoGen && w > 1 && h > 1 && !(h > w && draw_->GetBugs().Has(Draw::Bugs::PVR_GENMIPMAP_HEIGHT_GREATER))) { // Really! only seems to fail if height > width
574 // NOTICE_LOG(G3D, "Generating mipmap for texture sized %dx%d%d", w, h, (int)format);
575 genMips = true;
576 } else {
577 maxLevel = 0;
578 }
579 }
580 } else if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
581 texMaxLevel = 0;
582 }
583
584 if (maxLevel == 0) {
585 entry->status |= TexCacheEntry::STATUS_BAD_MIPS;
586 } else {
587 entry->status &= ~TexCacheEntry::STATUS_BAD_MIPS;
588 }
589 if (replaced.Valid()) {
590 entry->SetAlphaStatus(TexCacheEntry::TexStatus(replaced.AlphaStatus()));
591 }
592
593 render_->FinalizeTexture(entry->textureName, texMaxLevel, genMips);
594 }
595
GetDestFormat(GETextureFormat format,GEPaletteFormat clutFormat) const596 Draw::DataFormat TextureCacheGLES::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {
597 switch (format) {
598 case GE_TFMT_CLUT4:
599 case GE_TFMT_CLUT8:
600 case GE_TFMT_CLUT16:
601 case GE_TFMT_CLUT32:
602 return getClutDestFormat(clutFormat);
603 case GE_TFMT_4444:
604 return Draw::DataFormat::R4G4B4A4_UNORM_PACK16;
605 case GE_TFMT_5551:
606 return Draw::DataFormat::R5G5B5A1_UNORM_PACK16;
607 case GE_TFMT_5650:
608 return Draw::DataFormat::R5G6B5_UNORM_PACK16;
609 case GE_TFMT_8888:
610 case GE_TFMT_DXT1:
611 case GE_TFMT_DXT3:
612 case GE_TFMT_DXT5:
613 default:
614 return Draw::DataFormat::R8G8B8A8_UNORM;
615 }
616 }
617
CheckAlpha(const uint8_t * pixelData,Draw::DataFormat dstFmt,int stride,int w,int h)618 TexCacheEntry::TexStatus TextureCacheGLES::CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int stride, int w, int h) {
619 CheckAlphaResult res;
620 switch (dstFmt) {
621 case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
622 res = CheckAlphaABGR4444Basic((const uint32_t *)pixelData, stride, w, h);
623 break;
624 case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
625 res = CheckAlphaABGR1555Basic((const uint32_t *)pixelData, stride, w, h);
626 break;
627 case Draw::DataFormat::R5G6B5_UNORM_PACK16:
628 // Never has any alpha.
629 res = CHECKALPHA_FULL;
630 break;
631 default:
632 res = CheckAlphaRGBA8888Basic((const uint32_t *)pixelData, stride, w, h);
633 break;
634 }
635
636 return (TexCacheEntry::TexStatus)res;
637 }
638
LoadTextureLevel(TexCacheEntry & entry,ReplacedTexture & replaced,int level,int scaleFactor,Draw::DataFormat dstFmt)639 void TextureCacheGLES::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int scaleFactor, Draw::DataFormat dstFmt) {
640 int w = gstate.getTextureWidth(level);
641 int h = gstate.getTextureHeight(level);
642 uint8_t *pixelData;
643 int decPitch = 0;
644
645 gpuStats.numTexturesDecoded++;
646
647 if (!entry.textureName) {
648 // TODO: Actually pass in correct size here. The size here is not yet used for anything else
649 // than determining if we can wrap this texture size, that is, it's pow2 or not on very old hardware, else true.
650 // This will be easy after .. well, yet another refactoring, where I hoist the size calculation out of LoadTextureLevel
651 // and unify BuildTexture.
652 entry.textureName = render_->CreateTexture(GL_TEXTURE_2D, 16, 16, 1);
653 }
654
655 if (replaced.GetSize(level, w, h)) {
656 PROFILE_THIS_SCOPE("replacetex");
657
658 int bpp = replaced.Format(level) == ReplacedTextureFormat::F_8888 ? 4 : 2;
659 decPitch = w * bpp;
660 uint8_t *rearrange = (uint8_t *)AllocateAlignedMemory(decPitch * h, 16);
661 replaced.Load(level, rearrange, decPitch);
662 pixelData = rearrange;
663
664 dstFmt = ToDataFormat(replaced.Format(level));
665 } else {
666 PROFILE_THIS_SCOPE("decodetex");
667
668 GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
669 u32 texaddr = gstate.getTextureAddress(level);
670 int bufw = GetTextureBufw(level, texaddr, GETextureFormat(entry.format));
671
672 int pixelSize = dstFmt == Draw::DataFormat::R8G8B8A8_UNORM ? 4 : 2;
673 // We leave GL_UNPACK_ALIGNMENT at 4, so this must be at least 4.
674 decPitch = std::max(w * pixelSize, 4);
675
676 pixelData = (uint8_t *)AllocateAlignedMemory(decPitch * h * pixelSize, 16);
677 DecodeTextureLevel(pixelData, decPitch, GETextureFormat(entry.format), clutformat, texaddr, level, bufw, true, false, false);
678
679 // We check before scaling since scaling shouldn't invent alpha from a full alpha texture.
680 if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
681 TexCacheEntry::TexStatus alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / pixelSize, w, h);
682 entry.SetAlphaStatus(alphaStatus, level);
683 } else {
684 entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
685 }
686
687 if (scaleFactor > 1) {
688 uint8_t *rearrange = (uint8_t *)AllocateAlignedMemory(w * scaleFactor * h * scaleFactor * 4, 16);
689 u32 dFmt = (u32)dstFmt;
690 scaler.ScaleAlways((u32 *)rearrange, (u32 *)pixelData, dFmt, w, h, scaleFactor);
691 dstFmt = (Draw::DataFormat)dFmt;
692 FreeAlignedMemory(pixelData);
693 pixelData = rearrange;
694 decPitch = w * 4;
695 }
696
697 if (replacer_.Enabled()) {
698 ReplacedTextureDecodeInfo replacedInfo;
699 replacedInfo.cachekey = entry.CacheKey();
700 replacedInfo.hash = entry.fullhash;
701 replacedInfo.addr = entry.addr;
702 replacedInfo.isVideo = IsVideo(entry.addr);
703 replacedInfo.isFinal = (entry.status & TexCacheEntry::STATUS_TO_SCALE) == 0;
704 replacedInfo.scaleFactor = scaleFactor;
705 replacedInfo.fmt = FromDataFormat(dstFmt);
706
707 replacer_.NotifyTextureDecoded(replacedInfo, pixelData, decPitch, level, w, h);
708 }
709 }
710
711 PROFILE_THIS_SCOPE("loadtex");
712 if (IsFakeMipmapChange())
713 render_->TextureImage(entry.textureName, 0, w, h, dstFmt, pixelData, GLRAllocType::ALIGNED);
714 else
715 render_->TextureImage(entry.textureName, level, w, h, dstFmt, pixelData, GLRAllocType::ALIGNED);
716 }
717
GetCurrentTextureDebug(GPUDebugBuffer & buffer,int level)718 bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) {
719 GPUgstate saved;
720 if (level != 0) {
721 saved = gstate;
722
723 // The way we set textures is a bit complex. Let's just override level 0.
724 gstate.texsize[0] = gstate.texsize[level];
725 gstate.texaddr[0] = gstate.texaddr[level];
726 gstate.texbufwidth[0] = gstate.texbufwidth[level];
727 }
728
729 InvalidateLastTexture();
730 SetTexture();
731
732 if (!nextTexture_) {
733 if (nextFramebufferTexture_) {
734 VirtualFramebuffer *vfb = nextFramebufferTexture_;
735 buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false);
736 bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug");
737 // Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf.
738 // So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends.
739 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE);
740 // We may have blitted to a temp FBO.
741 framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
742 if (!retval)
743 ERROR_LOG(G3D, "Failed to get debug texture: copy to memory failed");
744 return retval;
745 } else {
746 ERROR_LOG(G3D, "Failed to get debug texture: no texture set");
747 return false;
748 }
749 }
750
751 // Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer.
752 TexCacheEntry *entry = nextTexture_;
753 // We might need a render pass to set the sampling params, unfortunately. Otherwise BuildTexture may crash.
754 framebufferManagerGL_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
755 ApplyTexture();
756
757 GLRenderManager *renderManager = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
758
759 // Not a framebuffer, so let's assume these are right.
760 // TODO: But they may definitely not be, if the texture was scaled.
761 int w = gstate.getTextureWidth(level);
762 int h = gstate.getTextureHeight(level);
763
764 if (level != 0) {
765 gstate = saved;
766 }
767
768 bool result = entry->textureName != nullptr;
769 if (result) {
770 buffer.Allocate(w, h, GE_FORMAT_8888, false);
771 renderManager->CopyImageToMemorySync(entry->textureName, level, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, (uint8_t *)buffer.GetData(), w, "GetCurrentTextureDebug");
772 } else {
773 ERROR_LOG(G3D, "Failed to get debug texture: texture is null");
774 }
775 gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
776 framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
777
778 return result;
779 }
780
DeviceLost()781 void TextureCacheGLES::DeviceLost() {
782 if (shadeInputLayout_) {
783 render_->DeleteInputLayout(shadeInputLayout_);
784 shadeInputLayout_ = nullptr;
785 }
786 Clear(false);
787 draw_ = nullptr;
788 render_ = nullptr;
789 }
790
DeviceRestore(Draw::DrawContext * draw)791 void TextureCacheGLES::DeviceRestore(Draw::DrawContext *draw) {
792 draw_ = draw;
793 render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
794 if (!shadeInputLayout_) {
795 std::vector<GLRInputLayout::Entry> entries;
796 entries.push_back({ 0, 3, GL_FLOAT, GL_FALSE, 20, 0 });
797 entries.push_back({ 1, 2, GL_FLOAT, GL_FALSE, 20, 12 });
798 shadeInputLayout_ = render_->CreateInputLayout(entries);
799 }
800 }
801