1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #pragma once
19 
20 #include <cstring>
21 
22 #include "ppsspp_config.h"
23 
24 #include "Common/Common.h"
25 #include "Common/Data/Collections/Hashmaps.h"
26 #include "Common/Log.h"
27 #include "Core/Reporting.h"
28 #include "GPU/ge_constants.h"
29 #include "GPU/Common/ShaderCommon.h"
30 #include "GPU/GPUCommon.h"
31 
32 #if PPSSPP_ARCH(ARM)
33 #include "Common/ArmEmitter.h"
34 #elif PPSSPP_ARCH(ARM64)
35 #include "Common/Arm64Emitter.h"
36 #elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
37 #include "Common/x64Emitter.h"
38 #else
39 #include "Common/FakeEmitter.h"
40 #endif
41 
42 // DecVtxFormat - vertex formats for PC
43 // Kind of like a D3D VertexDeclaration.
44 // Can write code to easily bind these using OpenGL, or read these manually.
45 // No morph support, that is taken care of by the VertexDecoder.
46 
47 // Keep this in 4 bits.
48 enum {
49 	DEC_NONE,
50 	DEC_FLOAT_1,
51 	DEC_FLOAT_2,
52 	DEC_FLOAT_3,
53 	DEC_FLOAT_4,
54 	DEC_S8_3,
55 	DEC_S16_3,
56 	DEC_U8_1,
57 	DEC_U8_2,
58 	DEC_U8_3,
59 	DEC_U8_4,
60 	DEC_U16_1,
61 	DEC_U16_2,
62 	DEC_U16_3,
63 	DEC_U16_4,
64 };
65 
66 int DecFmtSize(u8 fmt);
67 
68 struct DecVtxFormat {
69 	u8 w0fmt; u8 w0off;  // first 4 weights
70 	u8 w1fmt; u8 w1off;  // second 4 weights
71 	u8 uvfmt; u8 uvoff;
72 	u8 c0fmt; u8 c0off;  // First color
73 	u8 c1fmt; u8 c1off;
74 	u8 nrmfmt; u8 nrmoff;
75 	u8 posfmt; u8 posoff;
76 	u8 stride;
77 
78 	uint32_t id;
79 	void ComputeID();
80 	void InitializeFromID(uint32_t id);
81 };
82 
83 void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
84 
RoundUp4(int x)85 inline int RoundUp4(int x) {
86 	return (x + 3) & ~3;
87 }
88 
89 class IndexConverter {
90 private:
91 	union {
92 		const void *indices;
93 		const u8 *indices8;
94 		const u16_le *indices16;
95 		const u32_le *indices32;
96 	};
97 	u32 indexType;
98 
99 public:
IndexConverter(u32 vertType,const void * indices)100 	IndexConverter(u32 vertType, const void *indices)
101 		: indices(indices), indexType(vertType & GE_VTYPE_IDX_MASK) {
102 	}
103 
operator()104 	u32 operator() (u32 index) const {
105 		switch (indexType) {
106 		case GE_VTYPE_IDX_8BIT:
107 			return indices8[index];
108 		case GE_VTYPE_IDX_16BIT:
109 			return indices16[index];
110 		case GE_VTYPE_IDX_32BIT:
111 			return indices32[index];
112 		default:
113 			return index;
114 		}
115 	}
116 };
117 
118 // Reads decoded vertex formats in a convenient way. For software transform and debugging.
119 class VertexReader {
120 public:
VertexReader(u8 * base,const DecVtxFormat & decFmt,int vtype)121 	VertexReader(u8 *base, const DecVtxFormat &decFmt, int vtype) : base_(base), data_(base), decFmt_(decFmt), vtype_(vtype) {}
122 
ReadPos(float pos[3])123 	void ReadPos(float pos[3]) const {
124 		switch (decFmt_.posfmt) {
125 		case DEC_FLOAT_3:
126 			{
127 				const float *f = (const float *)(data_ + decFmt_.posoff);
128 				memcpy(pos, f, 12);
129 				if (isThrough()) {
130 					// Integer value passed in a float. Clamped to 0, 65535.
131 					const float z = (int)pos[2] * (1.0f / 65535.0f);
132 					pos[2] = z > 1.0f ? 1.0f : (z < 0.0f ? 0.0f : z);
133 				}
134 			}
135 			break;
136 		case DEC_S16_3:
137 			{
138 				// X and Y are signed 16 bit, Z is unsigned 16 bit
139 				const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
140 				const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
141 				if (isThrough()) {
142 					for (int i = 0; i < 2; i++)
143 						pos[i] = s[i];
144 					pos[2] = u[2] * (1.0f / 65535.0f);
145 				} else {
146 					for (int i = 0; i < 3; i++)
147 						pos[i] = s[i] * (1.0f / 32768.0f);
148 				}
149 			}
150 			break;
151 		case DEC_S8_3:
152 			{
153 				// X and Y are signed 8 bit, Z is unsigned 8 bit
154 				const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
155 				const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
156 				if (isThrough()) {
157 					for (int i = 0; i < 2; i++)
158 						pos[i] = b[i];
159 					pos[2] = u[2] * (1.0f / 255.0f);
160 				} else {
161 					for (int i = 0; i < 3; i++)
162 						pos[i] = b[i] * (1.0f / 128.0f);
163 				}
164 			}
165 			break;
166 		default:
167 			ERROR_LOG_REPORT_ONCE(fmtpos, G3D, "Reader: Unsupported Pos Format %d", decFmt_.posfmt);
168 			memset(pos, 0, sizeof(float) * 3);
169 			break;
170 		}
171 	}
172 
ReadPosThroughZ16(float pos[3])173 	void ReadPosThroughZ16(float pos[3]) const {
174 		switch (decFmt_.posfmt) {
175 		case DEC_FLOAT_3:
176 			{
177 				const float *f = (const float *)(data_ + decFmt_.posoff);
178 				memcpy(pos, f, 12);
179 				if (isThrough()) {
180 					// Integer value passed in a float. Clamped to 0, 65535.
181 					const float z = (int)pos[2];
182 					pos[2] = z > 65535.0f ? 65535.0f : (z < 0.0f ? 0.0f : z);
183 				}
184 			}
185 			break;
186 		case DEC_S16_3:
187 			{
188 				// X and Y are signed 16 bit, Z is unsigned 16 bit
189 				const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
190 				const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
191 				if (isThrough()) {
192 					for (int i = 0; i < 2; i++)
193 						pos[i] = s[i];
194 					pos[2] = u[2];
195 				} else {
196 					for (int i = 0; i < 3; i++)
197 						pos[i] = s[i] * (1.0f / 32768.0f);
198 				}
199 			}
200 			break;
201 		case DEC_S8_3:
202 			{
203 				// X and Y are signed 8 bit, Z is unsigned 8 bit
204 				const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
205 				const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
206 				if (isThrough()) {
207 					for (int i = 0; i < 2; i++)
208 						pos[i] = b[i];
209 					pos[2] = u[2];
210 				} else {
211 					for (int i = 0; i < 3; i++)
212 						pos[i] = b[i] * (1.0f / 128.0f);
213 				}
214 			}
215 			break;
216 		default:
217 			ERROR_LOG_REPORT_ONCE(fmtz16, G3D, "Reader: Unsupported Pos Format %d", decFmt_.posfmt);
218 			memset(pos, 0, sizeof(float) * 3);
219 			break;
220 		}
221 	}
222 
ReadNrm(float nrm[3])223 	void ReadNrm(float nrm[3]) const {
224 		switch (decFmt_.nrmfmt) {
225 		case DEC_FLOAT_3:
226 			//memcpy(nrm, data_ + decFmt_.nrmoff, 12);
227 			{
228 				const float *f = (const float *)(data_ + decFmt_.nrmoff);
229 				for (int i = 0; i < 3; i++)
230 					nrm[i] = f[i];
231 			}
232 			break;
233 		case DEC_S16_3:
234 			{
235 				const s16 *s = (const s16 *)(data_ + decFmt_.nrmoff);
236 				for (int i = 0; i < 3; i++)
237 					nrm[i] = s[i] * (1.f / 32767.f);
238 			}
239 			break;
240 		case DEC_S8_3:
241 			{
242 				const s8 *b = (const s8 *)(data_ + decFmt_.nrmoff);
243 				for (int i = 0; i < 3; i++)
244 					nrm[i] = b[i] * (1.f / 127.f);
245 			}
246 			break;
247 		default:
248 			ERROR_LOG_REPORT_ONCE(fmtnrm, G3D, "Reader: Unsupported Nrm Format %d", decFmt_.nrmfmt);
249 			memset(nrm, 0, sizeof(float) * 3);
250 			break;
251 		}
252 	}
253 
ReadUV(float uv[2])254 	void ReadUV(float uv[2]) const {
255 		switch (decFmt_.uvfmt) {
256 		case DEC_U8_2:
257 			{
258 				const u8 *b = (const u8 *)(data_ + decFmt_.uvoff);
259 				uv[0] = b[0] * (1.f / 128.f);
260 				uv[1] = b[1] * (1.f / 128.f);
261 			}
262 			break;
263 
264 		case DEC_U16_2:
265 			{
266 				const u16 *s = (const u16 *)(data_ + decFmt_.uvoff);
267 				uv[0] = s[0] * (1.f / 32768.f);
268 				uv[1] = s[1] * (1.f / 32768.f);
269 			}
270 			break;
271 
272 		case DEC_FLOAT_2:
273 			{
274 				const float *f = (const float *)(data_ + decFmt_.uvoff);
275 				uv[0] = f[0];
276 				uv[1] = f[1];
277 			}
278 			break;
279 
280 		default:
281 			ERROR_LOG_REPORT_ONCE(fmtuv, G3D, "Reader: Unsupported UV Format %d", decFmt_.uvfmt);
282 			memset(uv, 0, sizeof(float) * 2);
283 			break;
284 		}
285 	}
286 
ReadColor0(float color[4])287 	void ReadColor0(float color[4]) const {
288 		switch (decFmt_.c0fmt) {
289 		case DEC_U8_4:
290 			{
291 				const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
292 				for (int i = 0; i < 4; i++)
293 					color[i] = b[i] * (1.f / 255.f);
294 			}
295 			break;
296 		case DEC_FLOAT_4:
297 			memcpy(color, data_ + decFmt_.c0off, 16);
298 			break;
299 		default:
300 			ERROR_LOG_REPORT_ONCE(fmtc0, G3D, "Reader: Unsupported C0 Format %d", decFmt_.c0fmt);
301 			memset(color, 0, sizeof(float) * 4);
302 			break;
303 		}
304 	}
305 
ReadColor0_8888(u8 color[4])306 	void ReadColor0_8888(u8 color[4]) const {
307 		switch (decFmt_.c0fmt) {
308 		case DEC_U8_4:
309 			{
310 				const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
311 				for (int i = 0; i < 4; i++)
312 					color[i] = b[i];
313 			}
314 			break;
315 		case DEC_FLOAT_4:
316 			{
317 				const float *f = (const float *)(data_ + decFmt_.c0off);
318 				for (int i = 0; i < 4; i++)
319 					color[i] = f[i] * 255.0f;
320 			}
321 			break;
322 		default:
323 			ERROR_LOG_REPORT_ONCE(fmtc0_8888, G3D, "Reader: Unsupported C0 Format %d", decFmt_.c0fmt);
324 			memset(color, 0, sizeof(u8) * 4);
325 			break;
326 		}
327 	}
328 
329 
ReadColor1(float color[3])330 	void ReadColor1(float color[3]) const {
331 		switch (decFmt_.c1fmt) {
332 		case DEC_U8_4:
333 			{
334 				const u8 *b = (const u8 *)(data_ + decFmt_.c1off);
335 				for (int i = 0; i < 3; i++)
336 					color[i] = b[i] * (1.f / 255.f);
337 			}
338 			break;
339 		case DEC_FLOAT_4:
340 			memcpy(color, data_ + decFmt_.c1off, 12);
341 			break;
342 		default:
343 			ERROR_LOG_REPORT_ONCE(fmtc1, G3D, "Reader: Unsupported C1 Format %d", decFmt_.c1fmt);
344 			memset(color, 0, sizeof(float) * 3);
345 			break;
346 		}
347 	}
348 
ReadWeights(float weights[8])349 	void ReadWeights(float weights[8]) const {
350 		const float *f = (const float *)(data_ + decFmt_.w0off);
351 		const u8 *b = (const u8 *)(data_ + decFmt_.w0off);
352 		const u16 *s = (const u16 *)(data_ + decFmt_.w0off);
353 		switch (decFmt_.w0fmt) {
354 		case DEC_FLOAT_1:
355 		case DEC_FLOAT_2:
356 		case DEC_FLOAT_3:
357 		case DEC_FLOAT_4:
358 			for (int i = 0; i <= decFmt_.w0fmt - DEC_FLOAT_1; i++)
359 				weights[i] = f[i];
360 			break;
361 		case DEC_U8_1: weights[0] = b[0] * (1.f / 128.f); break;
362 		case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i] = b[i] * (1.f / 128.f); break;
363 		case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i] = b[i] * (1.f / 128.f); break;
364 		case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i] = b[i] * (1.f / 128.f); break;
365 		case DEC_U16_1: weights[0] = s[0] * (1.f / 32768.f); break;
366 		case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i] = s[i] * (1.f / 32768.f); break;
367 		case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i] = s[i] * (1.f / 32768.f); break;
368 		case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i] = s[i] * (1.f / 32768.f); break;
369 		default:
370 			ERROR_LOG_REPORT_ONCE(fmtw0, G3D, "Reader: Unsupported W0 Format %d", decFmt_.w0fmt);
371 			memset(weights, 0, sizeof(float) * 8);
372 			break;
373 		}
374 
375 		f = (const float *)(data_ + decFmt_.w1off);
376 		b = (const u8 *)(data_ + decFmt_.w1off);
377 		s = (const u16 *)(data_ + decFmt_.w1off);
378 		switch (decFmt_.w1fmt) {
379 		case 0:
380 			// It's fine for there to be w0 weights but not w1.
381 			break;
382 		case DEC_FLOAT_1:
383 		case DEC_FLOAT_2:
384 		case DEC_FLOAT_3:
385 		case DEC_FLOAT_4:
386 			for (int i = 0; i <= decFmt_.w1fmt - DEC_FLOAT_1; i++)
387 				weights[i+4] = f[i];
388 			break;
389 		case DEC_U8_1: weights[4] = b[0] * (1.f / 128.f); break;
390 		case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
391 		case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
392 		case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
393 		case DEC_U16_1: weights[4] = s[0] * (1.f / 32768.f); break;
394 		case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
395 		case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
396 		case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i+4] = s[i]  * (1.f / 32768.f); break;
397 		default:
398 			ERROR_LOG_REPORT_ONCE(fmtw1, G3D, "Reader: Unsupported W1 Format %d", decFmt_.w1fmt);
399 			memset(weights + 4, 0, sizeof(float) * 4);
400 			break;
401 		}
402 	}
403 
hasColor0()404 	bool hasColor0() const { return decFmt_.c0fmt != 0; }
hasColor1()405 	bool hasColor1() const { return decFmt_.c1fmt != 0; }
hasNormal()406 	bool hasNormal() const { return decFmt_.nrmfmt != 0; }
hasUV()407 	bool hasUV() const { return decFmt_.uvfmt != 0; }
isThrough()408 	bool isThrough() const { return (vtype_ & GE_VTYPE_THROUGH) != 0; }
Goto(int index)409 	void Goto(int index) {
410 		data_ = base_ + index * decFmt_.stride;
411 	}
412 
413 private:
414 	u8 *base_;
415 	u8 *data_;
416 	DecVtxFormat decFmt_;
417 	int vtype_;
418 };
419 // Debugging utilities
420 void PrintDecodedVertex(VertexReader &vtx);
421 
422 
423 class VertexDecoder;
424 class VertexDecoderJitCache;
425 
426 typedef void (VertexDecoder::*StepFunction)() const;
427 typedef void (VertexDecoderJitCache::*JitStepFunction)();
428 
429 struct JitLookup {
430 	StepFunction func;
431 	JitStepFunction jitFunc;
432 };
433 
434 // Collapse to less skinning shaders to reduce shader switching, which is expensive.
435 int TranslateNumBones(int bones);
436 
437 typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
438 
439 struct VertexDecoderOptions {
440 	bool expandAllWeightsToFloat;
441 	bool expand8BitNormalsToFloat;
442 };
443 
444 class VertexDecoder {
445 public:
446 	VertexDecoder();
447 
448 	// A jit cache is not mandatory.
449 	void SetVertexType(u32 vtype, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache = nullptr);
450 
VertexType()451 	u32 VertexType() const { return fmt_; }
452 
GetDecVtxFmt()453 	const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
454 
455 	void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const;
456 
hasColor()457 	bool hasColor() const { return col != 0; }
hasTexcoord()458 	bool hasTexcoord() const { return tc != 0; }
VertexSize()459 	int VertexSize() const { return size; }  // PSP format size
460 
461 	std::string GetString(DebugShaderStringType stringType);
462 
463 	void Step_WeightsU8() const;
464 	void Step_WeightsU16() const;
465 	void Step_WeightsU8ToFloat() const;
466 	void Step_WeightsU16ToFloat() const;
467 	void Step_WeightsFloat() const;
468 
469 	void ComputeSkinMatrix(const float weights[8]) const;
470 
471 	void Step_WeightsU8Skin() const;
472 	void Step_WeightsU16Skin() const;
473 	void Step_WeightsFloatSkin() const;
474 
475 	void Step_TcU8ToFloat() const;
476 	void Step_TcU16ToFloat() const;
477 	void Step_TcFloat() const;
478 
479 	void Step_TcU8Prescale() const;
480 	void Step_TcU16Prescale() const;
481 	void Step_TcU16DoublePrescale() const;
482 	void Step_TcFloatPrescale() const;
483 
484 	void Step_TcU16DoubleToFloat() const;
485 	void Step_TcU16ThroughToFloat() const;
486 	void Step_TcU16ThroughDoubleToFloat() const;
487 	void Step_TcFloatThrough() const;
488 
489 	void Step_TcU8MorphToFloat() const;
490 	void Step_TcU16MorphToFloat() const;
491 	void Step_TcU16DoubleMorphToFloat() const;
492 	void Step_TcFloatMorph() const;
493 	void Step_TcU8PrescaleMorph() const;
494 	void Step_TcU16PrescaleMorph() const;
495 	void Step_TcU16DoublePrescaleMorph() const;
496 	void Step_TcFloatPrescaleMorph() const;
497 
498 	void Step_ColorInvalid() const;
499 	void Step_Color4444() const;
500 	void Step_Color565() const;
501 	void Step_Color5551() const;
502 	void Step_Color8888() const;
503 
504 	void Step_Color4444Morph() const;
505 	void Step_Color565Morph() const;
506 	void Step_Color5551Morph() const;
507 	void Step_Color8888Morph() const;
508 
509 	void Step_NormalS8() const;
510 	void Step_NormalS8ToFloat() const;
511 	void Step_NormalS16() const;
512 	void Step_NormalFloat() const;
513 
514 	void Step_NormalS8Skin() const;
515 	void Step_NormalS16Skin() const;
516 	void Step_NormalFloatSkin() const;
517 
518 	void Step_NormalS8Morph() const;
519 	void Step_NormalS16Morph() const;
520 	void Step_NormalFloatMorph() const;
521 
522 	void Step_NormalS8MorphSkin() const;
523 	void Step_NormalS16MorphSkin() const;
524 	void Step_NormalFloatMorphSkin() const;
525 
526 	void Step_PosS8() const;
527 	void Step_PosS16() const;
528 	void Step_PosFloat() const;
529 
530 	void Step_PosS8Skin() const;
531 	void Step_PosS16Skin() const;
532 	void Step_PosFloatSkin() const;
533 
534 	void Step_PosS8Morph() const;
535 	void Step_PosS16Morph() const;
536 	void Step_PosFloatMorph() const;
537 
538 	void Step_PosS8MorphSkin() const;
539 	void Step_PosS16MorphSkin() const;
540 	void Step_PosFloatMorphSkin() const;
541 
542 	void Step_PosS8Through() const;
543 	void Step_PosS16Through() const;
544 	void Step_PosFloatThrough() const;
545 
546 	// output must be big for safety.
547 	// Returns number of chars written.
548 	// Ugly for speed.
549 	int ToString(char *output) const;
550 
551 	// Mutable decoder state
552 	mutable u8 *decoded_;
553 	mutable const u8 *ptr_;
554 
555 	JittedVertexDecoder jitted_;
556 	int32_t jittedSize_;
557 
558 	// "Immutable" state, set at startup
559 
560 	// The decoding steps. Never more than 5.
561 	StepFunction steps_[5];
562 	int numSteps_;
563 
564 	u32 fmt_;
565 	DecVtxFormat decFmt;
566 
567 	bool throughmode;
568 	u8 size;
569 	u8 onesize_;
570 
571 	u8 weightoff;
572 	u8 tcoff;
573 	u8 coloff;
574 	u8 nrmoff;
575 	u8 posoff;
576 
577 	u8 tc;
578 	u8 col;
579 	u8 nrm;
580 	u8 pos;
581 	u8 weighttype;
582 	u8 idx;
583 	u8 morphcount;
584 	u8 nweights;
585 
586 	u8 biggest;  // in practice, alignment.
587 
588 	friend class VertexDecoderJitCache;
589 };
590 
591 
592 // A compiled vertex decoder takes the following arguments (C calling convention):
593 // u8 *src, u8 *dst, int count
594 //
595 // x86:
596 //   src is placed in esi and dst in edi
597 //   for every vertex, we step esi and edi forwards by the two vertex sizes
598 //   all movs are done relative to esi and edi
599 //
600 // that's it!
601 
602 #if PPSSPP_ARCH(ARM)
603 #define VERTEXDECODER_JIT_BACKEND ArmGen::ARMXCodeBlock
604 #elif PPSSPP_ARCH(ARM64)
605 #define VERTEXDECODER_JIT_BACKEND Arm64Gen::ARM64CodeBlock
606 #elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
607 #define VERTEXDECODER_JIT_BACKEND Gen::XCodeBlock
608 #endif
609 
610 
611 #ifdef VERTEXDECODER_JIT_BACKEND
612 class VertexDecoderJitCache : public VERTEXDECODER_JIT_BACKEND {
613 public:
614 	VertexDecoderJitCache();
615 
616 	// Returns a pointer to the code to run.
617 	JittedVertexDecoder Compile(const VertexDecoder &dec, int32_t *jittedSize);
618 	void Clear();
619 
620 	void Jit_WeightsU8();
621 	void Jit_WeightsU16();
622 	void Jit_WeightsU8ToFloat();
623 	void Jit_WeightsU16ToFloat();
624 	void Jit_WeightsFloat();
625 
626 	void Jit_WeightsU8Skin();
627 	void Jit_WeightsU16Skin();
628 	void Jit_WeightsFloatSkin();
629 
630 	void Jit_TcU8ToFloat();
631 	void Jit_TcU16ToFloat();
632 	void Jit_TcFloat();
633 
634 	void Jit_TcU8Prescale();
635 	void Jit_TcU16Prescale();
636 	void Jit_TcFloatPrescale();
637 
638 	void Jit_TcAnyMorph(int bits);
639 	void Jit_TcU8MorphToFloat();
640 	void Jit_TcU16MorphToFloat();
641 	void Jit_TcFloatMorph();
642 	void Jit_TcU8PrescaleMorph();
643 	void Jit_TcU16PrescaleMorph();
644 	void Jit_TcFloatPrescaleMorph();
645 
646 	void Jit_TcU16ThroughToFloat();
647 	void Jit_TcFloatThrough();
648 
649 	void Jit_Color8888();
650 	void Jit_Color4444();
651 	void Jit_Color565();
652 	void Jit_Color5551();
653 
654 	void Jit_NormalS8();
655 	void Jit_NormalS8ToFloat();
656 	void Jit_NormalS16();
657 	void Jit_NormalFloat();
658 
659 	void Jit_NormalS8Skin();
660 	void Jit_NormalS16Skin();
661 	void Jit_NormalFloatSkin();
662 
663 	void Jit_PosS8();
664 	void Jit_PosS8ToFloat();
665 	void Jit_PosS16();
666 	void Jit_PosFloat();
667 	void Jit_PosS8Through();
668 	void Jit_PosS16Through();
669 
670 	void Jit_PosS8Skin();
671 	void Jit_PosS16Skin();
672 	void Jit_PosFloatSkin();
673 
674 	void Jit_NormalS8Morph();
675 	void Jit_NormalS16Morph();
676 	void Jit_NormalFloatMorph();
677 
678 	void Jit_PosS8Morph();
679 	void Jit_PosS16Morph();
680 	void Jit_PosFloatMorph();
681 
682 	void Jit_Color8888Morph();
683 	void Jit_Color4444Morph();
684 	void Jit_Color565Morph();
685 	void Jit_Color5551Morph();
686 
687 private:
688 	bool CompileStep(const VertexDecoder &dec, int i);
689 	void Jit_ApplyWeights();
690 	void Jit_WriteMatrixMul(int outOff, bool pos);
691 	void Jit_WriteMorphColor(int outOff, bool checkAlpha = true);
692 	void Jit_AnyS8ToFloat(int srcoff);
693 	void Jit_AnyS16ToFloat(int srcoff);
694 	void Jit_AnyU8ToFloat(int srcoff, u32 bits = 32);
695 	void Jit_AnyU16ToFloat(int srcoff, u32 bits = 64);
696 	void Jit_AnyS8Morph(int srcoff, int dstoff);
697 	void Jit_AnyS16Morph(int srcoff, int dstoff);
698 	void Jit_AnyFloatMorph(int srcoff, int dstoff);
699 
700 	const VertexDecoder *dec_;
701 #if PPSSPP_ARCH(ARM64)
702 	Arm64Gen::ARM64FloatEmitter fp;
703 #endif
704 };
705 #else
706 class VertexDecoderJitCache : public FakeGen::FakeXCodeBlock {
707 public:
708 	VertexDecoderJitCache();
709 
Compile(const VertexDecoder & dec,int32_t * jittedSize)710 	JittedVertexDecoder Compile(const VertexDecoder &dec, int32_t *jittedSize) {
711 		return nullptr;
712 	}
713 	void Clear();
714 };
715 #endif
716