1 // Copyright (c) 2012- PPSSPP Project.
2
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
11
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18 #pragma once
19
20 #include <cstring>
21
22 #include "ppsspp_config.h"
23
24 #include "Common/Common.h"
25 #include "Common/Data/Collections/Hashmaps.h"
26 #include "Common/Log.h"
27 #include "Core/Reporting.h"
28 #include "GPU/ge_constants.h"
29 #include "GPU/Common/ShaderCommon.h"
30 #include "GPU/GPUCommon.h"
31
32 #if PPSSPP_ARCH(ARM)
33 #include "Common/ArmEmitter.h"
34 #elif PPSSPP_ARCH(ARM64)
35 #include "Common/Arm64Emitter.h"
36 #elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
37 #include "Common/x64Emitter.h"
38 #else
39 #include "Common/FakeEmitter.h"
40 #endif
41
42 // DecVtxFormat - vertex formats for PC
43 // Kind of like a D3D VertexDeclaration.
44 // Can write code to easily bind these using OpenGL, or read these manually.
45 // No morph support, that is taken care of by the VertexDecoder.
46
47 // Keep this in 4 bits.
48 enum {
49 DEC_NONE,
50 DEC_FLOAT_1,
51 DEC_FLOAT_2,
52 DEC_FLOAT_3,
53 DEC_FLOAT_4,
54 DEC_S8_3,
55 DEC_S16_3,
56 DEC_U8_1,
57 DEC_U8_2,
58 DEC_U8_3,
59 DEC_U8_4,
60 DEC_U16_1,
61 DEC_U16_2,
62 DEC_U16_3,
63 DEC_U16_4,
64 };
65
66 int DecFmtSize(u8 fmt);
67
68 struct DecVtxFormat {
69 u8 w0fmt; u8 w0off; // first 4 weights
70 u8 w1fmt; u8 w1off; // second 4 weights
71 u8 uvfmt; u8 uvoff;
72 u8 c0fmt; u8 c0off; // First color
73 u8 c1fmt; u8 c1off;
74 u8 nrmfmt; u8 nrmoff;
75 u8 posfmt; u8 posoff;
76 u8 stride;
77
78 uint32_t id;
79 void ComputeID();
80 void InitializeFromID(uint32_t id);
81 };
82
83 void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
84
RoundUp4(int x)85 inline int RoundUp4(int x) {
86 return (x + 3) & ~3;
87 }
88
89 class IndexConverter {
90 private:
91 union {
92 const void *indices;
93 const u8 *indices8;
94 const u16_le *indices16;
95 const u32_le *indices32;
96 };
97 u32 indexType;
98
99 public:
IndexConverter(u32 vertType,const void * indices)100 IndexConverter(u32 vertType, const void *indices)
101 : indices(indices), indexType(vertType & GE_VTYPE_IDX_MASK) {
102 }
103
operator()104 u32 operator() (u32 index) const {
105 switch (indexType) {
106 case GE_VTYPE_IDX_8BIT:
107 return indices8[index];
108 case GE_VTYPE_IDX_16BIT:
109 return indices16[index];
110 case GE_VTYPE_IDX_32BIT:
111 return indices32[index];
112 default:
113 return index;
114 }
115 }
116 };
117
118 // Reads decoded vertex formats in a convenient way. For software transform and debugging.
119 class VertexReader {
120 public:
VertexReader(u8 * base,const DecVtxFormat & decFmt,int vtype)121 VertexReader(u8 *base, const DecVtxFormat &decFmt, int vtype) : base_(base), data_(base), decFmt_(decFmt), vtype_(vtype) {}
122
ReadPos(float pos[3])123 void ReadPos(float pos[3]) const {
124 switch (decFmt_.posfmt) {
125 case DEC_FLOAT_3:
126 {
127 const float *f = (const float *)(data_ + decFmt_.posoff);
128 memcpy(pos, f, 12);
129 if (isThrough()) {
130 // Integer value passed in a float. Clamped to 0, 65535.
131 const float z = (int)pos[2] * (1.0f / 65535.0f);
132 pos[2] = z > 1.0f ? 1.0f : (z < 0.0f ? 0.0f : z);
133 }
134 }
135 break;
136 case DEC_S16_3:
137 {
138 // X and Y are signed 16 bit, Z is unsigned 16 bit
139 const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
140 const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
141 if (isThrough()) {
142 for (int i = 0; i < 2; i++)
143 pos[i] = s[i];
144 pos[2] = u[2] * (1.0f / 65535.0f);
145 } else {
146 for (int i = 0; i < 3; i++)
147 pos[i] = s[i] * (1.0f / 32768.0f);
148 }
149 }
150 break;
151 case DEC_S8_3:
152 {
153 // X and Y are signed 8 bit, Z is unsigned 8 bit
154 const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
155 const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
156 if (isThrough()) {
157 for (int i = 0; i < 2; i++)
158 pos[i] = b[i];
159 pos[2] = u[2] * (1.0f / 255.0f);
160 } else {
161 for (int i = 0; i < 3; i++)
162 pos[i] = b[i] * (1.0f / 128.0f);
163 }
164 }
165 break;
166 default:
167 ERROR_LOG_REPORT_ONCE(fmtpos, G3D, "Reader: Unsupported Pos Format %d", decFmt_.posfmt);
168 memset(pos, 0, sizeof(float) * 3);
169 break;
170 }
171 }
172
ReadPosThroughZ16(float pos[3])173 void ReadPosThroughZ16(float pos[3]) const {
174 switch (decFmt_.posfmt) {
175 case DEC_FLOAT_3:
176 {
177 const float *f = (const float *)(data_ + decFmt_.posoff);
178 memcpy(pos, f, 12);
179 if (isThrough()) {
180 // Integer value passed in a float. Clamped to 0, 65535.
181 const float z = (int)pos[2];
182 pos[2] = z > 65535.0f ? 65535.0f : (z < 0.0f ? 0.0f : z);
183 }
184 }
185 break;
186 case DEC_S16_3:
187 {
188 // X and Y are signed 16 bit, Z is unsigned 16 bit
189 const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
190 const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
191 if (isThrough()) {
192 for (int i = 0; i < 2; i++)
193 pos[i] = s[i];
194 pos[2] = u[2];
195 } else {
196 for (int i = 0; i < 3; i++)
197 pos[i] = s[i] * (1.0f / 32768.0f);
198 }
199 }
200 break;
201 case DEC_S8_3:
202 {
203 // X and Y are signed 8 bit, Z is unsigned 8 bit
204 const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
205 const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
206 if (isThrough()) {
207 for (int i = 0; i < 2; i++)
208 pos[i] = b[i];
209 pos[2] = u[2];
210 } else {
211 for (int i = 0; i < 3; i++)
212 pos[i] = b[i] * (1.0f / 128.0f);
213 }
214 }
215 break;
216 default:
217 ERROR_LOG_REPORT_ONCE(fmtz16, G3D, "Reader: Unsupported Pos Format %d", decFmt_.posfmt);
218 memset(pos, 0, sizeof(float) * 3);
219 break;
220 }
221 }
222
ReadNrm(float nrm[3])223 void ReadNrm(float nrm[3]) const {
224 switch (decFmt_.nrmfmt) {
225 case DEC_FLOAT_3:
226 //memcpy(nrm, data_ + decFmt_.nrmoff, 12);
227 {
228 const float *f = (const float *)(data_ + decFmt_.nrmoff);
229 for (int i = 0; i < 3; i++)
230 nrm[i] = f[i];
231 }
232 break;
233 case DEC_S16_3:
234 {
235 const s16 *s = (const s16 *)(data_ + decFmt_.nrmoff);
236 for (int i = 0; i < 3; i++)
237 nrm[i] = s[i] * (1.f / 32767.f);
238 }
239 break;
240 case DEC_S8_3:
241 {
242 const s8 *b = (const s8 *)(data_ + decFmt_.nrmoff);
243 for (int i = 0; i < 3; i++)
244 nrm[i] = b[i] * (1.f / 127.f);
245 }
246 break;
247 default:
248 ERROR_LOG_REPORT_ONCE(fmtnrm, G3D, "Reader: Unsupported Nrm Format %d", decFmt_.nrmfmt);
249 memset(nrm, 0, sizeof(float) * 3);
250 break;
251 }
252 }
253
ReadUV(float uv[2])254 void ReadUV(float uv[2]) const {
255 switch (decFmt_.uvfmt) {
256 case DEC_U8_2:
257 {
258 const u8 *b = (const u8 *)(data_ + decFmt_.uvoff);
259 uv[0] = b[0] * (1.f / 128.f);
260 uv[1] = b[1] * (1.f / 128.f);
261 }
262 break;
263
264 case DEC_U16_2:
265 {
266 const u16 *s = (const u16 *)(data_ + decFmt_.uvoff);
267 uv[0] = s[0] * (1.f / 32768.f);
268 uv[1] = s[1] * (1.f / 32768.f);
269 }
270 break;
271
272 case DEC_FLOAT_2:
273 {
274 const float *f = (const float *)(data_ + decFmt_.uvoff);
275 uv[0] = f[0];
276 uv[1] = f[1];
277 }
278 break;
279
280 default:
281 ERROR_LOG_REPORT_ONCE(fmtuv, G3D, "Reader: Unsupported UV Format %d", decFmt_.uvfmt);
282 memset(uv, 0, sizeof(float) * 2);
283 break;
284 }
285 }
286
ReadColor0(float color[4])287 void ReadColor0(float color[4]) const {
288 switch (decFmt_.c0fmt) {
289 case DEC_U8_4:
290 {
291 const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
292 for (int i = 0; i < 4; i++)
293 color[i] = b[i] * (1.f / 255.f);
294 }
295 break;
296 case DEC_FLOAT_4:
297 memcpy(color, data_ + decFmt_.c0off, 16);
298 break;
299 default:
300 ERROR_LOG_REPORT_ONCE(fmtc0, G3D, "Reader: Unsupported C0 Format %d", decFmt_.c0fmt);
301 memset(color, 0, sizeof(float) * 4);
302 break;
303 }
304 }
305
ReadColor0_8888(u8 color[4])306 void ReadColor0_8888(u8 color[4]) const {
307 switch (decFmt_.c0fmt) {
308 case DEC_U8_4:
309 {
310 const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
311 for (int i = 0; i < 4; i++)
312 color[i] = b[i];
313 }
314 break;
315 case DEC_FLOAT_4:
316 {
317 const float *f = (const float *)(data_ + decFmt_.c0off);
318 for (int i = 0; i < 4; i++)
319 color[i] = f[i] * 255.0f;
320 }
321 break;
322 default:
323 ERROR_LOG_REPORT_ONCE(fmtc0_8888, G3D, "Reader: Unsupported C0 Format %d", decFmt_.c0fmt);
324 memset(color, 0, sizeof(u8) * 4);
325 break;
326 }
327 }
328
329
ReadColor1(float color[3])330 void ReadColor1(float color[3]) const {
331 switch (decFmt_.c1fmt) {
332 case DEC_U8_4:
333 {
334 const u8 *b = (const u8 *)(data_ + decFmt_.c1off);
335 for (int i = 0; i < 3; i++)
336 color[i] = b[i] * (1.f / 255.f);
337 }
338 break;
339 case DEC_FLOAT_4:
340 memcpy(color, data_ + decFmt_.c1off, 12);
341 break;
342 default:
343 ERROR_LOG_REPORT_ONCE(fmtc1, G3D, "Reader: Unsupported C1 Format %d", decFmt_.c1fmt);
344 memset(color, 0, sizeof(float) * 3);
345 break;
346 }
347 }
348
ReadWeights(float weights[8])349 void ReadWeights(float weights[8]) const {
350 const float *f = (const float *)(data_ + decFmt_.w0off);
351 const u8 *b = (const u8 *)(data_ + decFmt_.w0off);
352 const u16 *s = (const u16 *)(data_ + decFmt_.w0off);
353 switch (decFmt_.w0fmt) {
354 case DEC_FLOAT_1:
355 case DEC_FLOAT_2:
356 case DEC_FLOAT_3:
357 case DEC_FLOAT_4:
358 for (int i = 0; i <= decFmt_.w0fmt - DEC_FLOAT_1; i++)
359 weights[i] = f[i];
360 break;
361 case DEC_U8_1: weights[0] = b[0] * (1.f / 128.f); break;
362 case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i] = b[i] * (1.f / 128.f); break;
363 case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i] = b[i] * (1.f / 128.f); break;
364 case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i] = b[i] * (1.f / 128.f); break;
365 case DEC_U16_1: weights[0] = s[0] * (1.f / 32768.f); break;
366 case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i] = s[i] * (1.f / 32768.f); break;
367 case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i] = s[i] * (1.f / 32768.f); break;
368 case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i] = s[i] * (1.f / 32768.f); break;
369 default:
370 ERROR_LOG_REPORT_ONCE(fmtw0, G3D, "Reader: Unsupported W0 Format %d", decFmt_.w0fmt);
371 memset(weights, 0, sizeof(float) * 8);
372 break;
373 }
374
375 f = (const float *)(data_ + decFmt_.w1off);
376 b = (const u8 *)(data_ + decFmt_.w1off);
377 s = (const u16 *)(data_ + decFmt_.w1off);
378 switch (decFmt_.w1fmt) {
379 case 0:
380 // It's fine for there to be w0 weights but not w1.
381 break;
382 case DEC_FLOAT_1:
383 case DEC_FLOAT_2:
384 case DEC_FLOAT_3:
385 case DEC_FLOAT_4:
386 for (int i = 0; i <= decFmt_.w1fmt - DEC_FLOAT_1; i++)
387 weights[i+4] = f[i];
388 break;
389 case DEC_U8_1: weights[4] = b[0] * (1.f / 128.f); break;
390 case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
391 case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
392 case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
393 case DEC_U16_1: weights[4] = s[0] * (1.f / 32768.f); break;
394 case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
395 case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
396 case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
397 default:
398 ERROR_LOG_REPORT_ONCE(fmtw1, G3D, "Reader: Unsupported W1 Format %d", decFmt_.w1fmt);
399 memset(weights + 4, 0, sizeof(float) * 4);
400 break;
401 }
402 }
403
hasColor0()404 bool hasColor0() const { return decFmt_.c0fmt != 0; }
hasColor1()405 bool hasColor1() const { return decFmt_.c1fmt != 0; }
hasNormal()406 bool hasNormal() const { return decFmt_.nrmfmt != 0; }
hasUV()407 bool hasUV() const { return decFmt_.uvfmt != 0; }
isThrough()408 bool isThrough() const { return (vtype_ & GE_VTYPE_THROUGH) != 0; }
Goto(int index)409 void Goto(int index) {
410 data_ = base_ + index * decFmt_.stride;
411 }
412
413 private:
414 u8 *base_;
415 u8 *data_;
416 DecVtxFormat decFmt_;
417 int vtype_;
418 };
419 // Debugging utilities
420 void PrintDecodedVertex(VertexReader &vtx);
421
422
423 class VertexDecoder;
424 class VertexDecoderJitCache;
425
426 typedef void (VertexDecoder::*StepFunction)() const;
427 typedef void (VertexDecoderJitCache::*JitStepFunction)();
428
429 struct JitLookup {
430 StepFunction func;
431 JitStepFunction jitFunc;
432 };
433
434 // Collapse to less skinning shaders to reduce shader switching, which is expensive.
435 int TranslateNumBones(int bones);
436
437 typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
438
439 struct VertexDecoderOptions {
440 bool expandAllWeightsToFloat;
441 bool expand8BitNormalsToFloat;
442 };
443
444 class VertexDecoder {
445 public:
446 VertexDecoder();
447
448 // A jit cache is not mandatory.
449 void SetVertexType(u32 vtype, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache = nullptr);
450
VertexType()451 u32 VertexType() const { return fmt_; }
452
GetDecVtxFmt()453 const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
454
455 void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const;
456
hasColor()457 bool hasColor() const { return col != 0; }
hasTexcoord()458 bool hasTexcoord() const { return tc != 0; }
VertexSize()459 int VertexSize() const { return size; } // PSP format size
460
461 std::string GetString(DebugShaderStringType stringType);
462
463 void Step_WeightsU8() const;
464 void Step_WeightsU16() const;
465 void Step_WeightsU8ToFloat() const;
466 void Step_WeightsU16ToFloat() const;
467 void Step_WeightsFloat() const;
468
469 void ComputeSkinMatrix(const float weights[8]) const;
470
471 void Step_WeightsU8Skin() const;
472 void Step_WeightsU16Skin() const;
473 void Step_WeightsFloatSkin() const;
474
475 void Step_TcU8ToFloat() const;
476 void Step_TcU16ToFloat() const;
477 void Step_TcFloat() const;
478
479 void Step_TcU8Prescale() const;
480 void Step_TcU16Prescale() const;
481 void Step_TcU16DoublePrescale() const;
482 void Step_TcFloatPrescale() const;
483
484 void Step_TcU16DoubleToFloat() const;
485 void Step_TcU16ThroughToFloat() const;
486 void Step_TcU16ThroughDoubleToFloat() const;
487 void Step_TcFloatThrough() const;
488
489 void Step_TcU8MorphToFloat() const;
490 void Step_TcU16MorphToFloat() const;
491 void Step_TcU16DoubleMorphToFloat() const;
492 void Step_TcFloatMorph() const;
493 void Step_TcU8PrescaleMorph() const;
494 void Step_TcU16PrescaleMorph() const;
495 void Step_TcU16DoublePrescaleMorph() const;
496 void Step_TcFloatPrescaleMorph() const;
497
498 void Step_ColorInvalid() const;
499 void Step_Color4444() const;
500 void Step_Color565() const;
501 void Step_Color5551() const;
502 void Step_Color8888() const;
503
504 void Step_Color4444Morph() const;
505 void Step_Color565Morph() const;
506 void Step_Color5551Morph() const;
507 void Step_Color8888Morph() const;
508
509 void Step_NormalS8() const;
510 void Step_NormalS8ToFloat() const;
511 void Step_NormalS16() const;
512 void Step_NormalFloat() const;
513
514 void Step_NormalS8Skin() const;
515 void Step_NormalS16Skin() const;
516 void Step_NormalFloatSkin() const;
517
518 void Step_NormalS8Morph() const;
519 void Step_NormalS16Morph() const;
520 void Step_NormalFloatMorph() const;
521
522 void Step_NormalS8MorphSkin() const;
523 void Step_NormalS16MorphSkin() const;
524 void Step_NormalFloatMorphSkin() const;
525
526 void Step_PosS8() const;
527 void Step_PosS16() const;
528 void Step_PosFloat() const;
529
530 void Step_PosS8Skin() const;
531 void Step_PosS16Skin() const;
532 void Step_PosFloatSkin() const;
533
534 void Step_PosS8Morph() const;
535 void Step_PosS16Morph() const;
536 void Step_PosFloatMorph() const;
537
538 void Step_PosS8MorphSkin() const;
539 void Step_PosS16MorphSkin() const;
540 void Step_PosFloatMorphSkin() const;
541
542 void Step_PosS8Through() const;
543 void Step_PosS16Through() const;
544 void Step_PosFloatThrough() const;
545
546 // output must be big for safety.
547 // Returns number of chars written.
548 // Ugly for speed.
549 int ToString(char *output) const;
550
551 // Mutable decoder state
552 mutable u8 *decoded_;
553 mutable const u8 *ptr_;
554
555 JittedVertexDecoder jitted_;
556 int32_t jittedSize_;
557
558 // "Immutable" state, set at startup
559
560 // The decoding steps. Never more than 5.
561 StepFunction steps_[5];
562 int numSteps_;
563
564 u32 fmt_;
565 DecVtxFormat decFmt;
566
567 bool throughmode;
568 u8 size;
569 u8 onesize_;
570
571 u8 weightoff;
572 u8 tcoff;
573 u8 coloff;
574 u8 nrmoff;
575 u8 posoff;
576
577 u8 tc;
578 u8 col;
579 u8 nrm;
580 u8 pos;
581 u8 weighttype;
582 u8 idx;
583 u8 morphcount;
584 u8 nweights;
585
586 u8 biggest; // in practice, alignment.
587
588 friend class VertexDecoderJitCache;
589 };
590
591
592 // A compiled vertex decoder takes the following arguments (C calling convention):
593 // u8 *src, u8 *dst, int count
594 //
595 // x86:
596 // src is placed in esi and dst in edi
597 // for every vertex, we step esi and edi forwards by the two vertex sizes
598 // all movs are done relative to esi and edi
599 //
600 // that's it!
601
602 #if PPSSPP_ARCH(ARM)
603 #define VERTEXDECODER_JIT_BACKEND ArmGen::ARMXCodeBlock
604 #elif PPSSPP_ARCH(ARM64)
605 #define VERTEXDECODER_JIT_BACKEND Arm64Gen::ARM64CodeBlock
606 #elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
607 #define VERTEXDECODER_JIT_BACKEND Gen::XCodeBlock
608 #endif
609
610
611 #ifdef VERTEXDECODER_JIT_BACKEND
612 class VertexDecoderJitCache : public VERTEXDECODER_JIT_BACKEND {
613 public:
614 VertexDecoderJitCache();
615
616 // Returns a pointer to the code to run.
617 JittedVertexDecoder Compile(const VertexDecoder &dec, int32_t *jittedSize);
618 void Clear();
619
620 void Jit_WeightsU8();
621 void Jit_WeightsU16();
622 void Jit_WeightsU8ToFloat();
623 void Jit_WeightsU16ToFloat();
624 void Jit_WeightsFloat();
625
626 void Jit_WeightsU8Skin();
627 void Jit_WeightsU16Skin();
628 void Jit_WeightsFloatSkin();
629
630 void Jit_TcU8ToFloat();
631 void Jit_TcU16ToFloat();
632 void Jit_TcFloat();
633
634 void Jit_TcU8Prescale();
635 void Jit_TcU16Prescale();
636 void Jit_TcFloatPrescale();
637
638 void Jit_TcAnyMorph(int bits);
639 void Jit_TcU8MorphToFloat();
640 void Jit_TcU16MorphToFloat();
641 void Jit_TcFloatMorph();
642 void Jit_TcU8PrescaleMorph();
643 void Jit_TcU16PrescaleMorph();
644 void Jit_TcFloatPrescaleMorph();
645
646 void Jit_TcU16ThroughToFloat();
647 void Jit_TcFloatThrough();
648
649 void Jit_Color8888();
650 void Jit_Color4444();
651 void Jit_Color565();
652 void Jit_Color5551();
653
654 void Jit_NormalS8();
655 void Jit_NormalS8ToFloat();
656 void Jit_NormalS16();
657 void Jit_NormalFloat();
658
659 void Jit_NormalS8Skin();
660 void Jit_NormalS16Skin();
661 void Jit_NormalFloatSkin();
662
663 void Jit_PosS8();
664 void Jit_PosS8ToFloat();
665 void Jit_PosS16();
666 void Jit_PosFloat();
667 void Jit_PosS8Through();
668 void Jit_PosS16Through();
669
670 void Jit_PosS8Skin();
671 void Jit_PosS16Skin();
672 void Jit_PosFloatSkin();
673
674 void Jit_NormalS8Morph();
675 void Jit_NormalS16Morph();
676 void Jit_NormalFloatMorph();
677
678 void Jit_PosS8Morph();
679 void Jit_PosS16Morph();
680 void Jit_PosFloatMorph();
681
682 void Jit_Color8888Morph();
683 void Jit_Color4444Morph();
684 void Jit_Color565Morph();
685 void Jit_Color5551Morph();
686
687 private:
688 bool CompileStep(const VertexDecoder &dec, int i);
689 void Jit_ApplyWeights();
690 void Jit_WriteMatrixMul(int outOff, bool pos);
691 void Jit_WriteMorphColor(int outOff, bool checkAlpha = true);
692 void Jit_AnyS8ToFloat(int srcoff);
693 void Jit_AnyS16ToFloat(int srcoff);
694 void Jit_AnyU8ToFloat(int srcoff, u32 bits = 32);
695 void Jit_AnyU16ToFloat(int srcoff, u32 bits = 64);
696 void Jit_AnyS8Morph(int srcoff, int dstoff);
697 void Jit_AnyS16Morph(int srcoff, int dstoff);
698 void Jit_AnyFloatMorph(int srcoff, int dstoff);
699
700 const VertexDecoder *dec_;
701 #if PPSSPP_ARCH(ARM64)
702 Arm64Gen::ARM64FloatEmitter fp;
703 #endif
704 };
705 #else
706 class VertexDecoderJitCache : public FakeGen::FakeXCodeBlock {
707 public:
708 VertexDecoderJitCache();
709
Compile(const VertexDecoder & dec,int32_t * jittedSize)710 JittedVertexDecoder Compile(const VertexDecoder &dec, int32_t *jittedSize) {
711 return nullptr;
712 }
713 void Clear();
714 };
715 #endif
716