1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include <algorithm>
19 #include <cstring>
20 
21 #include "ext/xxhash.h"
22 #include "Common/Data/Convert/ColorConv.h"
23 #include "Common/Data/Text/I18n.h"
24 #include "Common/Math/math_util.h"
25 #include "Common/Profiler/Profiler.h"
26 #include "Common/GPU/OpenGL/GLRenderManager.h"
27 
28 #include "Core/Config.h"
29 #include "Core/Host.h"
30 #include "Core/MemMap.h"
31 #include "Core/Reporting.h"
32 #include "GPU/ge_constants.h"
33 #include "GPU/GPUState.h"
34 #include "GPU/GLES/TextureCacheGLES.h"
35 #include "GPU/GLES/FramebufferManagerGLES.h"
36 #include "GPU/Common/FragmentShaderGenerator.h"
37 #include "GPU/GLES/DepalettizeShaderGLES.h"
38 #include "GPU/GLES/ShaderManagerGLES.h"
39 #include "GPU/GLES/DrawEngineGLES.h"
40 #include "GPU/Common/TextureDecoder.h"
41 
42 #ifdef _M_SSE
43 #include <emmintrin.h>
44 #endif
45 
TextureCacheGLES(Draw::DrawContext * draw)46 TextureCacheGLES::TextureCacheGLES(Draw::DrawContext *draw)
47 	: TextureCacheCommon(draw) {
48 	render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
49 
50 	SetupTextureDecoder();
51 
52 	nextTexture_ = nullptr;
53 
54 	std::vector<GLRInputLayout::Entry> entries;
55 	entries.push_back({ 0, 3, GL_FLOAT, GL_FALSE, 20, 0 });
56 	entries.push_back({ 1, 2, GL_FLOAT, GL_FALSE, 20, 12 });
57 	shadeInputLayout_ = render_->CreateInputLayout(entries);
58 }
59 
~TextureCacheGLES()60 TextureCacheGLES::~TextureCacheGLES() {
61 	if (shadeInputLayout_) {
62 		render_->DeleteInputLayout(shadeInputLayout_);
63 	}
64 	Clear(true);
65 }
66 
SetFramebufferManager(FramebufferManagerGLES * fbManager)67 void TextureCacheGLES::SetFramebufferManager(FramebufferManagerGLES *fbManager) {
68 	framebufferManagerGL_ = fbManager;
69 	framebufferManager_ = fbManager;
70 }
71 
ReleaseTexture(TexCacheEntry * entry,bool delete_them)72 void TextureCacheGLES::ReleaseTexture(TexCacheEntry *entry, bool delete_them) {
73 	if (delete_them) {
74 		if (entry->textureName) {
75 			render_->DeleteTexture(entry->textureName);
76 		}
77 	}
78 	entry->textureName = nullptr;
79 }
80 
Clear(bool delete_them)81 void TextureCacheGLES::Clear(bool delete_them) {
82 	TextureCacheCommon::Clear(delete_them);
83 }
84 
getClutDestFormat(GEPaletteFormat format)85 Draw::DataFormat getClutDestFormat(GEPaletteFormat format) {
86 	switch (format) {
87 	case GE_CMODE_16BIT_ABGR4444:
88 		return Draw::DataFormat::R4G4B4A4_UNORM_PACK16;
89 	case GE_CMODE_16BIT_ABGR5551:
90 		return Draw::DataFormat::R5G5B5A1_UNORM_PACK16;
91 	case GE_CMODE_16BIT_BGR5650:
92 		return Draw::DataFormat::R5G6B5_UNORM_PACK16;
93 	case GE_CMODE_32BIT_ABGR8888:
94 		return Draw::DataFormat::R8G8B8A8_UNORM;
95 	}
96 	return Draw::DataFormat::UNDEFINED;;
97 }
98 
99 static const GLuint MinFiltGL[8] = {
100 	GL_NEAREST,
101 	GL_LINEAR,
102 	GL_NEAREST,
103 	GL_LINEAR,
104 	GL_NEAREST_MIPMAP_NEAREST,
105 	GL_LINEAR_MIPMAP_NEAREST,
106 	GL_NEAREST_MIPMAP_LINEAR,
107 	GL_LINEAR_MIPMAP_LINEAR,
108 };
109 
110 static const GLuint MagFiltGL[2] = {
111 	GL_NEAREST,
112 	GL_LINEAR
113 };
114 
ApplySamplingParams(const SamplerCacheKey & key)115 void TextureCacheGLES::ApplySamplingParams(const SamplerCacheKey &key) {
116 	if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
117 		float minLod = (float)key.minLevel / 256.0f;
118 		float maxLod = (float)key.maxLevel / 256.0f;
119 		float lodBias = (float)key.lodBias / 256.0f;
120 		render_->SetTextureLod(0, minLod, maxLod, lodBias);
121 	}
122 
123 	float aniso = 0.0f;
124 	int minKey = ((int)key.mipEnable << 2) | ((int)key.mipFilt << 1) | ((int)key.minFilt);
125 	render_->SetTextureSampler(0,
126 		key.sClamp ? GL_CLAMP_TO_EDGE : GL_REPEAT, key.tClamp ? GL_CLAMP_TO_EDGE : GL_REPEAT,
127 		key.magFilt ? GL_LINEAR : GL_NEAREST, MinFiltGL[minKey], aniso);
128 }
129 
ConvertColors(void * dstBuf,const void * srcBuf,Draw::DataFormat dstFmt,int numPixels)130 static void ConvertColors(void *dstBuf, const void *srcBuf, Draw::DataFormat dstFmt, int numPixels) {
131 	const u32 *src = (const u32 *)srcBuf;
132 	u32 *dst = (u32 *)dstBuf;
133 	switch (dstFmt) {
134 	case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
135 		ConvertRGBA4444ToABGR4444((u16 *)dst, (const u16 *)src, numPixels);
136 		break;
137 	// Final Fantasy 2 uses this heavily in animated textures.
138 	case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
139 		ConvertRGBA5551ToABGR1555((u16 *)dst, (const u16 *)src, numPixels);
140 		break;
141 	case Draw::DataFormat::R5G6B5_UNORM_PACK16:
142 		ConvertRGB565ToBGR565((u16 *)dst, (const u16 *)src, numPixels);
143 		break;
144 	default:
145 		// No need to convert RGBA8888, right order already
146 		if (dst != src)
147 			memcpy(dst, src, numPixels * sizeof(u32));
148 		break;
149 	}
150 }
151 
StartFrame()152 void TextureCacheGLES::StartFrame() {
153 	InvalidateLastTexture();
154 	timesInvalidatedAllThisFrame_ = 0;
155 
156 	GLRenderManager *renderManager = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
157 	if (!lowMemoryMode_ && renderManager->SawOutOfMemory()) {
158 		lowMemoryMode_ = true;
159 		decimationCounter_ = 0;
160 
161 		auto err = GetI18NCategory("Error");
162 		if (standardScaleFactor_ > 1) {
163 			host->NotifyUserMessage(err->T("Warning: Video memory FULL, reducing upscaling and switching to slow caching mode"), 2.0f);
164 		} else {
165 			host->NotifyUserMessage(err->T("Warning: Video memory FULL, switching to slow caching mode"), 2.0f);
166 		}
167 	}
168 
169 	if (texelsScaledThisFrame_) {
170 		VERBOSE_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_);
171 	}
172 	texelsScaledThisFrame_ = 0;
173 	if (clearCacheNextFrame_) {
174 		Clear(true);
175 		clearCacheNextFrame_ = false;
176 	} else {
177 		Decimate();
178 	}
179 }
180 
UpdateCurrentClut(GEPaletteFormat clutFormat,u32 clutBase,bool clutIndexIsSimple)181 void TextureCacheGLES::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) {
182 	const u32 clutBaseBytes = clutFormat == GE_CMODE_32BIT_ABGR8888 ? (clutBase * sizeof(u32)) : (clutBase * sizeof(u16));
183 	// Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.
184 	// If not, we're going to hash random data, which hopefully doesn't cause a performance issue.
185 	//
186 	// TODO: Actually, this seems like a hack.  The game can upload part of a CLUT and reference other data.
187 	// clutTotalBytes_ is the last amount uploaded.  We should hash clutMaxBytes_, but this will often hash
188 	// unrelated old entries for small palettes.
189 	// Adding clutBaseBytes may just be mitigating this for some usage patterns.
190 	const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);
191 
192 	if (replacer_.Enabled())
193 		clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
194 	else
195 		clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;
196 
197 	// Avoid a copy when we don't need to convert colors.
198 	if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
199 		const int numColors = clutFormat == GE_CMODE_32BIT_ABGR8888 ? (clutMaxBytes_ / sizeof(u32)) : (clutMaxBytes_ / sizeof(u16));
200 		ConvertColors(clutBufConverted_, clutBufRaw_, getClutDestFormat(clutFormat), numColors);
201 		clutBuf_ = clutBufConverted_;
202 	} else {
203 		clutBuf_ = clutBufRaw_;
204 	}
205 
206 	// Special optimization: fonts typically draw clut4 with just alpha values in a single color.
207 	clutAlphaLinear_ = false;
208 	clutAlphaLinearColor_ = 0;
209 	if (clutFormat == GE_CMODE_16BIT_ABGR4444 && clutIndexIsSimple) {
210 		const u16_le *clut = GetCurrentClut<u16_le>();
211 		clutAlphaLinear_ = true;
212 		clutAlphaLinearColor_ = clut[15] & 0xFFF0;
213 		for (int i = 0; i < 16; ++i) {
214 			u16 step = clutAlphaLinearColor_ | i;
215 			if (clut[i] != step) {
216 				clutAlphaLinear_ = false;
217 				break;
218 			}
219 		}
220 	}
221 
222 	clutLastFormat_ = gstate.clutformat;
223 }
224 
BindTexture(TexCacheEntry * entry)225 void TextureCacheGLES::BindTexture(TexCacheEntry *entry) {
226 	if (entry->textureName != lastBoundTexture) {
227 		render_->BindTexture(0, entry->textureName);
228 		lastBoundTexture = entry->textureName;
229 	}
230 	int maxLevel = (entry->status & TexCacheEntry::STATUS_BAD_MIPS) ? 0 : entry->maxLevel;
231 	SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry);
232 	ApplySamplingParams(samplerKey);
233 	gstate_c.SetUseShaderDepal(false);
234 }
235 
Unbind()236 void TextureCacheGLES::Unbind() {
237 	render_->BindTexture(TEX_SLOT_PSP_TEXTURE, nullptr);
238 	InvalidateLastTexture();
239 }
240 
241 class TextureShaderApplier {
242 public:
243 	struct Pos {
244 		float x;
245 		float y;
246 		float z;
247 	};
248 	struct UV {
249 		float u;
250 		float v;
251 	};
252 
TextureShaderApplier(DepalShader * shader,float bufferW,float bufferH,int renderW,int renderH)253 	TextureShaderApplier(DepalShader *shader, float bufferW, float bufferH, int renderW, int renderH)
254 		: shader_(shader), bufferW_(bufferW), bufferH_(bufferH), renderW_(renderW), renderH_(renderH) {
255 		static const Pos pos[4] = {
256 			{-1, -1, -1},
257 			{ 1, -1, -1},
258 			{ 1,  1, -1},
259 			{-1,  1, -1},
260 		};
261 		memcpy(pos_, pos, sizeof(pos_));
262 
263 		static const UV uv[4] = {
264 			{0, 0},
265 			{1, 0},
266 			{1, 1},
267 			{0, 1},
268 		};
269 		memcpy(uv_, uv, sizeof(uv_));
270 	}
271 
ApplyBounds(const KnownVertexBounds & bounds,u32 uoff,u32 voff)272 	void ApplyBounds(const KnownVertexBounds &bounds, u32 uoff, u32 voff) {
273 		// If min is not < max, then we don't have values (wasn't set during decode.)
274 		if (bounds.minV < bounds.maxV) {
275 			const float invWidth = 1.0f / bufferW_;
276 			const float invHeight = 1.0f / bufferH_;
277 			// Inverse of half = double.
278 			const float invHalfWidth = invWidth * 2.0f;
279 			const float invHalfHeight = invHeight * 2.0f;
280 
281 			const int u1 = bounds.minU + uoff;
282 			const int v1 = bounds.minV + voff;
283 			const int u2 = bounds.maxU + uoff;
284 			const int v2 = bounds.maxV + voff;
285 
286 			const float left = u1 * invHalfWidth - 1.0f;
287 			const float right = u2 * invHalfWidth - 1.0f;
288 			const float top = v1 * invHalfHeight - 1.0f;
289 			const float bottom = v2 * invHalfHeight - 1.0f;
290 			// Points are: BL, BR, TR, TL.
291 			pos_[0] = Pos{ left, bottom, -1.0f };
292 			pos_[1] = Pos{ right, bottom, -1.0f };
293 			pos_[2] = Pos{ right, top, -1.0f };
294 			pos_[3] = Pos{ left, top, -1.0f };
295 
296 			// And also the UVs, same order.
297 			const float uvleft = u1 * invWidth;
298 			const float uvright = u2 * invWidth;
299 			const float uvtop = v1 * invHeight;
300 			const float uvbottom = v2 * invHeight;
301 			uv_[0] = UV{ uvleft, uvbottom };
302 			uv_[1] = UV{ uvright, uvbottom };
303 			uv_[2] = UV{ uvright, uvtop };
304 			uv_[3] = UV{ uvleft, uvtop };
305 
306 			// We need to reapply the texture next time since we cropped UV.
307 			gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
308 		}
309 	}
310 
Use(GLRenderManager * render,DrawEngineGLES * transformDraw,GLRInputLayout * inputLayout)311 	void Use(GLRenderManager *render, DrawEngineGLES *transformDraw, GLRInputLayout *inputLayout) {
312 		render->BindProgram(shader_->program);
313 		struct SimpleVertex {
314 			float pos[3];
315 			float uv[2];
316 		};
317 		uint32_t bindOffset;
318 		GLRBuffer *bindBuffer;
319 		SimpleVertex *verts = (SimpleVertex *)transformDraw->GetPushVertexBuffer()->Push(sizeof(SimpleVertex) * 4, &bindOffset, &bindBuffer);
320 		int order[4] = { 0 ,1, 3, 2 };
321 		for (int i = 0; i < 4; i++) {
322 			memcpy(verts[i].pos, &pos_[order[i]], sizeof(Pos));
323 			memcpy(verts[i].uv, &uv_[order[i]], sizeof(UV));
324 		}
325 		render->BindVertexBuffer(inputLayout, bindBuffer, bindOffset);
326 	}
327 
Shade(GLRenderManager * render)328 	void Shade(GLRenderManager *render) {
329 		render->SetViewport(GLRViewport{ 0, 0, (float)renderW_, (float)renderH_, 0.0f, 1.0f });
330 		render->Draw(GL_TRIANGLE_STRIP, 0, 4);
331 	}
332 
333 protected:
334 	DepalShader *shader_;
335 	Pos pos_[4];
336 	UV uv_[4];
337 	float bufferW_;
338 	float bufferH_;
339 	int renderW_;
340 	int renderH_;
341 };
342 
ApplyTextureFramebuffer(VirtualFramebuffer * framebuffer,GETextureFormat texFormat,FramebufferNotificationChannel channel)343 void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) {
344 	DepalShader *depalShader = nullptr;
345 	uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
346 	bool need_depalettize = IsClutFormat(texFormat);
347 
348 	bool depth = channel == NOTIFY_FB_DEPTH;
349 	bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) || gstate_c.Supports(GPU_SUPPORTS_GLSL_330)) && !depth;
350 	if (!gstate_c.Supports(GPU_SUPPORTS_32BIT_INT_FSHADER)) {
351 		useShaderDepal = false;
352 		depth = false;  // Can't support this
353 	}
354 
355 	if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
356 		if (useShaderDepal) {
357 			const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
358 			GLRTexture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_);
359 			render_->BindTexture(TEX_SLOT_CLUT, clutTexture);
360 			render_->SetTextureSampler(TEX_SLOT_CLUT, GL_REPEAT, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
361 			framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
362 			SamplerCacheKey samplerKey = GetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
363 			samplerKey.magFilt = false;
364 			samplerKey.minFilt = false;
365 			samplerKey.mipEnable = false;
366 			ApplySamplingParams(samplerKey);
367 			InvalidateLastTexture();
368 
369 			// Since we started/ended render passes, might need these.
370 			gstate_c.Dirty(DIRTY_DEPAL);
371 			gstate_c.SetUseShaderDepal(true);
372 			gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
373 			const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
374 			const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
375 			TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
376 			gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
377 			return;
378 		}
379 
380 		depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
381 		gstate_c.SetUseShaderDepal(false);
382 	}
383 	if (depalShader) {
384 		shaderManager_->DirtyLastShader();
385 
386 		const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
387 		GLRTexture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_);
388 		Draw::Framebuffer *depalFBO = framebufferManagerGL_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight);
389 		draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "Depal");
390 
391 		render_->SetScissor(GLRect2D{ 0, 0, (int)framebuffer->renderWidth, (int)framebuffer->renderHeight });
392 		render_->SetViewport(GLRViewport{ 0.0f, 0.0f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f });
393 		TextureShaderApplier shaderApply(depalShader, framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight);
394 		shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset);
395 		shaderApply.Use(render_, drawEngine_, shadeInputLayout_);
396 
397 		draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
398 
399 		render_->BindTexture(TEX_SLOT_CLUT, clutTexture);
400 		render_->SetTextureSampler(TEX_SLOT_CLUT, GL_REPEAT, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
401 
402 		shaderApply.Shade(render_);
403 
404 		draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);
405 
406 		const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
407 		const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
408 
409 		TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
410 		gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
411 	} else {
412 		framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
413 
414 		gstate_c.SetUseShaderDepal(false);
415 		gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
416 	}
417 
418 	framebufferManagerGL_->RebindFramebuffer("ApplyTextureFramebuffer");
419 
420 	SamplerCacheKey samplerKey = GetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
421 	ApplySamplingParams(samplerKey);
422 
423 	// Since we started/ended render passes, might need these.
424 	gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
425 }
426 
FromDataFormat(Draw::DataFormat fmt)427 ReplacedTextureFormat FromDataFormat(Draw::DataFormat fmt) {
428 	// TODO: 16-bit formats are incorrect, since swizzled.
429 	switch (fmt) {
430 	case Draw::DataFormat::R5G6B5_UNORM_PACK16: return ReplacedTextureFormat::F_0565_ABGR;
431 	case Draw::DataFormat::R5G5B5A1_UNORM_PACK16: return ReplacedTextureFormat::F_1555_ABGR;
432 	case Draw::DataFormat::R4G4B4A4_UNORM_PACK16: return ReplacedTextureFormat::F_4444_ABGR;
433 	case Draw::DataFormat::R8G8B8A8_UNORM: default: return ReplacedTextureFormat::F_8888;
434 	}
435 }
436 
ToDataFormat(ReplacedTextureFormat fmt)437 Draw::DataFormat ToDataFormat(ReplacedTextureFormat fmt) {
438 	switch (fmt) {
439 	case ReplacedTextureFormat::F_5650: return Draw::DataFormat::R5G6B5_UNORM_PACK16;
440 	case ReplacedTextureFormat::F_5551: return Draw::DataFormat::R5G5B5A1_UNORM_PACK16;
441 	case ReplacedTextureFormat::F_4444: return Draw::DataFormat::R4G4B4A4_UNORM_PACK16;
442 	case ReplacedTextureFormat::F_8888: default: return Draw::DataFormat::R8G8B8A8_UNORM;
443 	}
444 }
445 
BuildTexture(TexCacheEntry * const entry)446 void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
447 	entry->status &= ~TexCacheEntry::STATUS_ALPHA_MASK;
448 
449 	// For the estimate, we assume cluts always point to 8888 for simplicity.
450 	cacheSizeEstimate_ += EstimateTexMemoryUsage(entry);
451 
452 	if ((entry->bufw == 0 || (gstate.texbufwidth[0] & 0xf800) != 0) && entry->addr >= PSP_GetKernelMemoryEnd()) {
453 		ERROR_LOG_REPORT(G3D, "Texture with unexpected bufw (full=%d)", gstate.texbufwidth[0] & 0xffff);
454 		// Proceeding here can cause a crash.
455 		return;
456 	}
457 
458 	// Adjust maxLevel to actually present levels..
459 	bool badMipSizes = false;
460 	bool canAutoGen = false;
461 	int maxLevel = entry->maxLevel;
462 	for (int i = 0; i <= maxLevel; i++) {
463 		// If encountering levels pointing to nothing, adjust max level.
464 		u32 levelTexaddr = gstate.getTextureAddress(i);
465 		if (!Memory::IsValidAddress(levelTexaddr)) {
466 			maxLevel = i - 1;
467 			break;
468 		}
469 
470 		// If size reaches 1, stop, and override maxlevel.
471 		int tw = gstate.getTextureWidth(i);
472 		int th = gstate.getTextureHeight(i);
473 		if (tw == 1 || th == 1) {
474 			maxLevel = i;
475 			break;
476 		}
477 
478 		if (i > 0) {
479 			int lastW = gstate.getTextureWidth(i - 1);
480 			int lastH = gstate.getTextureHeight(i - 1);
481 
482 			if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
483 				if (tw != 1 && tw != (lastW >> 1))
484 					badMipSizes = true;
485 				else if (th != 1 && th != (lastH >> 1))
486 					badMipSizes = true;
487 			}
488 
489 			if (lastW > tw || lastH > th)
490 				canAutoGen = true;
491 		}
492 	}
493 
494 	// If GLES3 is available, we can preallocate the storage, which makes texture loading more efficient.
495 	Draw::DataFormat dstFmt = GetDestFormat(GETextureFormat(entry->format), gstate.getClutPaletteFormat());
496 
497 	int scaleFactor = standardScaleFactor_;
498 
499 	// Rachet down scale factor in low-memory mode.
500 	if (lowMemoryMode_) {
501 		// Keep it even, though, just in case of npot troubles.
502 		scaleFactor = scaleFactor > 4 ? 4 : (scaleFactor > 2 ? 2 : 1);
503 	}
504 
505 	u64 cachekey = replacer_.Enabled() ? entry->CacheKey() : 0;
506 	int w = gstate.getTextureWidth(0);
507 	int h = gstate.getTextureHeight(0);
508 	ReplacedTexture &replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h);
509 	if (replaced.GetSize(0, w, h)) {
510 		// We're replacing, so we won't scale.
511 		scaleFactor = 1;
512 		entry->status |= TexCacheEntry::STATUS_IS_SCALED;
513 		maxLevel = replaced.MaxLevel();
514 		badMipSizes = false;
515 	}
516 
517 	// Don't scale the PPGe texture.
518 	if (entry->addr > 0x05000000 && entry->addr < PSP_GetKernelMemoryEnd())
519 		scaleFactor = 1;
520 
521 	if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && scaleFactor != 1) {
522 		// Remember for later that we /wanted/ to scale this texture.
523 		entry->status |= TexCacheEntry::STATUS_TO_SCALE;
524 		scaleFactor = 1;
525 	}
526 
527 	if (scaleFactor != 1) {
528 		if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) {
529 			entry->status |= TexCacheEntry::STATUS_TO_SCALE;
530 			scaleFactor = 1;
531 		} else {
532 			entry->status &= ~TexCacheEntry::STATUS_TO_SCALE;
533 			entry->status |= TexCacheEntry::STATUS_IS_SCALED;
534 			texelsScaledThisFrame_ += w * h;
535 		}
536 	}
537 
538 	// GLES2 doesn't have support for a "Max lod" which is critical as PSP games often
539 	// don't specify mips all the way down. As a result, we either need to manually generate
540 	// the bottom few levels or rely on OpenGL's autogen mipmaps instead, which might not
541 	// be as good quality as the game's own (might even be better in some cases though).
542 
543 	// Always load base level texture here
544 	if (IsFakeMipmapChange()) {
545 		// NOTE: Since the level is not part of the cache key, we assume it never changes.
546 		u8 level = std::max(0, gstate.getTexLevelOffset16() / 16);
547 		LoadTextureLevel(*entry, replaced, level, scaleFactor, dstFmt);
548 	} else {
549 		LoadTextureLevel(*entry, replaced, 0, scaleFactor, dstFmt);
550 	}
551 
552 	// Mipmapping is only enabled when texture scaling is disabled.
553 	int texMaxLevel = 0;
554 	bool genMips = false;
555 	if (maxLevel > 0 && scaleFactor == 1) {
556 		if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
557 			if (badMipSizes) {
558 				// WARN_LOG(G3D, "Bad mipmap for texture sized %dx%dx%d - autogenerating", w, h, (int)format);
559 				if (canAutoGen) {
560 					genMips = true;
561 				} else {
562 					texMaxLevel = 0;
563 					maxLevel = 0;
564 				}
565 			} else {
566 				for (int i = 1; i <= maxLevel; i++) {
567 					LoadTextureLevel(*entry, replaced, i, scaleFactor, dstFmt);
568 				}
569 				texMaxLevel = maxLevel;
570 			}
571 		} else {
572 			// Avoid PowerVR driver bug
573 			if (canAutoGen && w > 1 && h > 1 && !(h > w && draw_->GetBugs().Has(Draw::Bugs::PVR_GENMIPMAP_HEIGHT_GREATER))) {  // Really! only seems to fail if height > width
574 				// NOTICE_LOG(G3D, "Generating mipmap for texture sized %dx%d%d", w, h, (int)format);
575 				genMips = true;
576 			} else {
577 				maxLevel = 0;
578 			}
579 		}
580 	} else if (gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
581 		texMaxLevel = 0;
582 	}
583 
584 	if (maxLevel == 0) {
585 		entry->status |= TexCacheEntry::STATUS_BAD_MIPS;
586 	} else {
587 		entry->status &= ~TexCacheEntry::STATUS_BAD_MIPS;
588 	}
589 	if (replaced.Valid()) {
590 		entry->SetAlphaStatus(TexCacheEntry::TexStatus(replaced.AlphaStatus()));
591 	}
592 
593 	render_->FinalizeTexture(entry->textureName, texMaxLevel, genMips);
594 }
595 
GetDestFormat(GETextureFormat format,GEPaletteFormat clutFormat) const596 Draw::DataFormat TextureCacheGLES::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {
597 	switch (format) {
598 	case GE_TFMT_CLUT4:
599 	case GE_TFMT_CLUT8:
600 	case GE_TFMT_CLUT16:
601 	case GE_TFMT_CLUT32:
602 		return getClutDestFormat(clutFormat);
603 	case GE_TFMT_4444:
604 		return Draw::DataFormat::R4G4B4A4_UNORM_PACK16;
605 	case GE_TFMT_5551:
606 		return Draw::DataFormat::R5G5B5A1_UNORM_PACK16;
607 	case GE_TFMT_5650:
608 		return Draw::DataFormat::R5G6B5_UNORM_PACK16;
609 	case GE_TFMT_8888:
610 	case GE_TFMT_DXT1:
611 	case GE_TFMT_DXT3:
612 	case GE_TFMT_DXT5:
613 	default:
614 		return Draw::DataFormat::R8G8B8A8_UNORM;
615 	}
616 }
617 
CheckAlpha(const uint8_t * pixelData,Draw::DataFormat dstFmt,int stride,int w,int h)618 TexCacheEntry::TexStatus TextureCacheGLES::CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int stride, int w, int h) {
619 	CheckAlphaResult res;
620 	switch (dstFmt) {
621 	case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
622 		res = CheckAlphaABGR4444Basic((const uint32_t *)pixelData, stride, w, h);
623 		break;
624 	case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
625 		res = CheckAlphaABGR1555Basic((const uint32_t *)pixelData, stride, w, h);
626 		break;
627 	case Draw::DataFormat::R5G6B5_UNORM_PACK16:
628 		// Never has any alpha.
629 		res = CHECKALPHA_FULL;
630 		break;
631 	default:
632 		res = CheckAlphaRGBA8888Basic((const uint32_t *)pixelData, stride, w, h);
633 		break;
634 	}
635 
636 	return (TexCacheEntry::TexStatus)res;
637 }
638 
LoadTextureLevel(TexCacheEntry & entry,ReplacedTexture & replaced,int level,int scaleFactor,Draw::DataFormat dstFmt)639 void TextureCacheGLES::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int scaleFactor, Draw::DataFormat dstFmt) {
640 	int w = gstate.getTextureWidth(level);
641 	int h = gstate.getTextureHeight(level);
642 	uint8_t *pixelData;
643 	int decPitch = 0;
644 
645 	gpuStats.numTexturesDecoded++;
646 
647 	if (!entry.textureName) {
648 		// TODO: Actually pass in correct size here. The size here is not yet used for anything else
649 		// than determining if we can wrap this texture size, that is, it's pow2 or not on very old hardware, else true.
650 		// This will be easy after .. well, yet another refactoring, where I hoist the size calculation out of LoadTextureLevel
651 		// and unify BuildTexture.
652 		entry.textureName = render_->CreateTexture(GL_TEXTURE_2D, 16, 16, 1);
653 	}
654 
655 	if (replaced.GetSize(level, w, h)) {
656 		PROFILE_THIS_SCOPE("replacetex");
657 
658 		int bpp = replaced.Format(level) == ReplacedTextureFormat::F_8888 ? 4 : 2;
659 		decPitch = w * bpp;
660 		uint8_t *rearrange = (uint8_t *)AllocateAlignedMemory(decPitch * h, 16);
661 		replaced.Load(level, rearrange, decPitch);
662 		pixelData = rearrange;
663 
664 		dstFmt = ToDataFormat(replaced.Format(level));
665 	} else {
666 		PROFILE_THIS_SCOPE("decodetex");
667 
668 		GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
669 		u32 texaddr = gstate.getTextureAddress(level);
670 		int bufw = GetTextureBufw(level, texaddr, GETextureFormat(entry.format));
671 
672 		int pixelSize = dstFmt == Draw::DataFormat::R8G8B8A8_UNORM ? 4 : 2;
673 		// We leave GL_UNPACK_ALIGNMENT at 4, so this must be at least 4.
674 		decPitch = std::max(w * pixelSize, 4);
675 
676 		pixelData = (uint8_t *)AllocateAlignedMemory(decPitch * h * pixelSize, 16);
677 		DecodeTextureLevel(pixelData, decPitch, GETextureFormat(entry.format), clutformat, texaddr, level, bufw, true, false, false);
678 
679 		// We check before scaling since scaling shouldn't invent alpha from a full alpha texture.
680 		if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
681 			TexCacheEntry::TexStatus alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / pixelSize, w, h);
682 			entry.SetAlphaStatus(alphaStatus, level);
683 		} else {
684 			entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
685 		}
686 
687 		if (scaleFactor > 1) {
688 			uint8_t *rearrange = (uint8_t *)AllocateAlignedMemory(w * scaleFactor * h * scaleFactor * 4, 16);
689 			u32 dFmt = (u32)dstFmt;
690 			scaler.ScaleAlways((u32 *)rearrange, (u32 *)pixelData, dFmt, w, h, scaleFactor);
691 			dstFmt = (Draw::DataFormat)dFmt;
692 			FreeAlignedMemory(pixelData);
693 			pixelData = rearrange;
694 			decPitch = w * 4;
695 		}
696 
697 		if (replacer_.Enabled()) {
698 			ReplacedTextureDecodeInfo replacedInfo;
699 			replacedInfo.cachekey = entry.CacheKey();
700 			replacedInfo.hash = entry.fullhash;
701 			replacedInfo.addr = entry.addr;
702 			replacedInfo.isVideo = IsVideo(entry.addr);
703 			replacedInfo.isFinal = (entry.status & TexCacheEntry::STATUS_TO_SCALE) == 0;
704 			replacedInfo.scaleFactor = scaleFactor;
705 			replacedInfo.fmt = FromDataFormat(dstFmt);
706 
707 			replacer_.NotifyTextureDecoded(replacedInfo, pixelData, decPitch, level, w, h);
708 		}
709 	}
710 
711 	PROFILE_THIS_SCOPE("loadtex");
712 	if (IsFakeMipmapChange())
713 		render_->TextureImage(entry.textureName, 0, w, h, dstFmt, pixelData, GLRAllocType::ALIGNED);
714 	else
715 		render_->TextureImage(entry.textureName, level, w, h, dstFmt, pixelData, GLRAllocType::ALIGNED);
716 }
717 
GetCurrentTextureDebug(GPUDebugBuffer & buffer,int level)718 bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) {
719 	GPUgstate saved;
720 	if (level != 0) {
721 		saved = gstate;
722 
723 		// The way we set textures is a bit complex.  Let's just override level 0.
724 		gstate.texsize[0] = gstate.texsize[level];
725 		gstate.texaddr[0] = gstate.texaddr[level];
726 		gstate.texbufwidth[0] = gstate.texbufwidth[level];
727 	}
728 
729 	InvalidateLastTexture();
730 	SetTexture();
731 
732 	if (!nextTexture_) {
733 		if (nextFramebufferTexture_) {
734 			VirtualFramebuffer *vfb = nextFramebufferTexture_;
735 			buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false);
736 			bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug");
737 			// Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf.
738 			// So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends.
739 			gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE);
740 			// We may have blitted to a temp FBO.
741 			framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
742 			if (!retval)
743 				ERROR_LOG(G3D, "Failed to get debug texture: copy to memory failed");
744 			return retval;
745 		} else {
746 			ERROR_LOG(G3D, "Failed to get debug texture: no texture set");
747 			return false;
748 		}
749 	}
750 
751 	// Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer.
752 	TexCacheEntry *entry = nextTexture_;
753 	// We might need a render pass to set the sampling params, unfortunately.  Otherwise BuildTexture may crash.
754 	framebufferManagerGL_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
755 	ApplyTexture();
756 
757 	GLRenderManager *renderManager = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
758 
759 	// Not a framebuffer, so let's assume these are right.
760 	// TODO: But they may definitely not be, if the texture was scaled.
761 	int w = gstate.getTextureWidth(level);
762 	int h = gstate.getTextureHeight(level);
763 
764 	if (level != 0) {
765 		gstate = saved;
766 	}
767 
768 	bool result = entry->textureName != nullptr;
769 	if (result) {
770 		buffer.Allocate(w, h, GE_FORMAT_8888, false);
771 		renderManager->CopyImageToMemorySync(entry->textureName, level, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, (uint8_t *)buffer.GetData(), w, "GetCurrentTextureDebug");
772 	} else {
773 		ERROR_LOG(G3D, "Failed to get debug texture: texture is null");
774 	}
775 	gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
776 	framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
777 
778 	return result;
779 }
780 
DeviceLost()781 void TextureCacheGLES::DeviceLost() {
782 	if (shadeInputLayout_) {
783 		render_->DeleteInputLayout(shadeInputLayout_);
784 		shadeInputLayout_ = nullptr;
785 	}
786 	Clear(false);
787 	draw_ = nullptr;
788 	render_ = nullptr;
789 }
790 
DeviceRestore(Draw::DrawContext * draw)791 void TextureCacheGLES::DeviceRestore(Draw::DrawContext *draw) {
792 	draw_ = draw;
793 	render_ = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
794 	if (!shadeInputLayout_) {
795 		std::vector<GLRInputLayout::Entry> entries;
796 		entries.push_back({ 0, 3, GL_FLOAT, GL_FALSE, 20, 0 });
797 		entries.push_back({ 1, 2, GL_FLOAT, GL_FALSE, 20, 12 });
798 		shadeInputLayout_ = render_->CreateInputLayout(entries);
799 	}
800 }
801