1 
2 #include <stdint.h>
3 #include <stdbool.h>
4 #include <stdio.h>
5 #include <string.h>
6 #include <wiiu/gx2.h>
7 #include <wiiu/system/memory.h>
8 #include <wiiu/shader_utils.h>
9 #include <wiiu/wiiu_dbg.h>
10 
11 /* this is a hack for elf builds since their data section is below 0x10000000
12  * and thus can't be accessed by the GX2 hardware */
13 #ifndef GX2_CAN_ACCESS_DATA_SECTION
14 typedef struct
15 {
16    void *vs_program;
17    void *ps_program;
18    void *gs_program;
19    void *gs_copy_program;
20 } org_programs_t;
21 #endif
22 
GX2InitShader(GX2Shader * shader)23 void GX2InitShader(GX2Shader *shader)
24 {
25    if (shader->fs.program)
26       return;
27 
28    shader->fs.size = GX2CalcFetchShaderSizeEx(shader->vs.attribVarCount,
29                      GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
30 #ifdef GX2_CAN_ACCESS_DATA_SECTION
31    shader->fs.program = MEM2_alloc(shader->fs.size, GX2_SHADER_ALIGNMENT);
32 #else
33    shader->fs.program = MEM2_alloc(shader->fs.size + sizeof(org_programs_t), GX2_SHADER_ALIGNMENT);
34 #endif
35    GX2InitFetchShaderEx(&shader->fs, (uint8_t *)shader->fs.program,
36                         shader->vs.attribVarCount,
37                         shader->attribute_stream,
38                         GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
39    GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->fs.program, shader->fs.size);
40 
41 #ifndef GX2_CAN_ACCESS_DATA_SECTION
42    org_programs_t *org = (org_programs_t *)(shader->fs.program + shader->fs.size);
43    org->vs_program = shader->vs.program;
44    org->ps_program = shader->ps.program;
45    org->gs_program = shader->gs.program;
46    org->gs_copy_program = shader->gs.copyProgram;
47 
48    shader->vs.program = MEM2_alloc(shader->vs.size, GX2_SHADER_ALIGNMENT);
49    memcpy(shader->vs.program, org->vs_program, shader->vs.size);
50    GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->vs.program, shader->vs.size);
51 
52    shader->ps.program = MEM2_alloc(shader->ps.size, GX2_SHADER_ALIGNMENT);
53    memcpy(shader->ps.program, org->ps_program, shader->ps.size);
54    GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->ps.program, shader->ps.size);
55 
56    if (org->gs_program)
57    {
58       shader->gs.program = MEM2_alloc(shader->gs.size, GX2_SHADER_ALIGNMENT);
59       memcpy(shader->gs.program, org->gs_program, shader->gs.size);
60       GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->gs.program, shader->gs.size);
61 
62       shader->gs.copyProgram = MEM2_alloc(shader->gs.copyProgramSize, GX2_SHADER_ALIGNMENT);
63       memcpy(shader->gs.copyProgram, org->gs_copy_program, shader->gs.copyProgramSize);
64       GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->gs.copyProgram, shader->gs.copyProgramSize);
65    }
66 
67 #endif
68 
69 }
70 
GX2DestroyShader(GX2Shader * shader)71 void GX2DestroyShader(GX2Shader *shader)
72 {
73 #ifndef GX2_CAN_ACCESS_DATA_SECTION
74    MEM2_free(shader->vs.program);
75    MEM2_free(shader->ps.program);
76    MEM2_free(shader->gs.program);
77    MEM2_free(shader->gs.copyProgram);
78 
79    org_programs_t *org = (org_programs_t *)(shader->fs.program + shader->fs.size);
80 
81    shader->vs.program = org->vs_program;
82    shader->ps.program = org->ps_program;
83    shader->gs.program = org->gs_program;
84    shader->gs.copyProgram = org->gs_copy_program;
85 #endif
86 
87    MEM2_free(shader->fs.program);
88    shader->fs.program = NULL;
89 }
90 
GX2SetShader(GX2Shader * shader)91 void GX2SetShader(GX2Shader *shader)
92 {
93    GX2SetVertexShader(&shader->vs);
94    GX2SetPixelShader(&shader->ps);
95    GX2SetFetchShader(&shader->fs);
96 
97    if (shader->gs.program)
98       GX2SetGeometryShader(&shader->gs);
99 }
100 
dump_vs_data(GX2VertexShader * vs)101 void dump_vs_data(GX2VertexShader* vs)
102 {
103    unsigned i;
104 
105    DEBUG_INT(vs->size);
106    DEBUG_VAR(vs->mode);
107    DEBUG_INT(vs->uniformBlockCount);
108 
109    for (i = 0; i < vs->uniformBlockCount; i++)
110    {
111       DEBUG_STR(vs->uniformBlocks[i].name);
112       DEBUG_INT(vs->uniformBlocks[i].offset);
113       DEBUG_INT(vs->uniformBlocks[i].size);
114    }
115    DEBUG_INT(vs->uniformVarCount);
116    for (i = 0; i < vs->uniformVarCount; i++)
117    {
118       DEBUG_STR(vs->uniformVars[i].name);
119       DEBUG_INT(vs->uniformVars[i].offset);
120       DEBUG_INT(vs->uniformVars[i].type);
121       DEBUG_INT(vs->uniformVars[i].count);
122       DEBUG_INT(vs->uniformVars[i].block);
123    }
124    DEBUG_INT(vs->initialValueCount);
125    for (i = 0; i < vs->initialValueCount; i++)
126    {
127       DEBUG_INT(vs->initialValues[i].offset);
128       DEBUG_FLOAT(vs->initialValues[i].value[0]);
129       DEBUG_FLOAT(vs->initialValues[i].value[1]);
130       DEBUG_FLOAT(vs->initialValues[i].value[2]);
131       DEBUG_FLOAT(vs->initialValues[i].value[3]);
132    }
133    DEBUG_INT(vs->loopVarCount);
134    for (i = 0; i < vs->loopVarCount; i++)
135    {
136       DEBUG_INT(vs->loopVars[i].offset);
137       DEBUG_VAR(vs->loopVars[i].value);
138    }
139    DEBUG_INT(vs->samplerVarCount);
140    for (i = 0; i < vs->samplerVarCount; i++)
141    {
142       DEBUG_STR(vs->samplerVars[i].name);
143       DEBUG_INT(vs->samplerVars[i].type);
144       DEBUG_INT(vs->samplerVars[i].location);
145    }
146 
147    for (i = 0; i < vs->attribVarCount; i++)
148    {
149       DEBUG_STR(vs->attribVars[i].name);
150       DEBUG_VAR(vs->attribVars[i].type);
151       DEBUG_INT(vs->attribVars[i].location);
152       DEBUG_INT(vs->attribVars[i].count);
153    }
154 }
155 
dump_ps_data(GX2PixelShader * ps)156 void dump_ps_data(GX2PixelShader* ps)
157 {
158    unsigned i;
159 
160    DEBUG_INT(ps->size);
161    DEBUG_VAR(ps->mode);
162    DEBUG_INT(ps->uniformBlockCount);
163    for (i = 0; i < ps->uniformBlockCount; i++)
164    {
165       DEBUG_STR(ps->uniformBlocks[i].name);
166       DEBUG_INT(ps->uniformBlocks[i].offset);
167       DEBUG_INT(ps->uniformBlocks[i].size);
168    }
169    DEBUG_INT(ps->uniformVarCount);
170    for (i = 0; i < ps->uniformVarCount; i++)
171    {
172       DEBUG_STR(ps->uniformVars[i].name);
173       DEBUG_INT(ps->uniformVars[i].offset);
174       DEBUG_INT(ps->uniformVars[i].type);
175       DEBUG_INT(ps->uniformVars[i].count);
176       DEBUG_INT(ps->uniformVars[i].block);
177    }
178    DEBUG_INT(ps->initialValueCount);
179    for (i = 0; i < ps->initialValueCount; i++)
180    {
181       DEBUG_INT(ps->initialValues[i].offset);
182       DEBUG_FLOAT(ps->initialValues[i].value[0]);
183       DEBUG_FLOAT(ps->initialValues[i].value[1]);
184       DEBUG_FLOAT(ps->initialValues[i].value[2]);
185       DEBUG_FLOAT(ps->initialValues[i].value[3]);
186    }
187    DEBUG_INT(ps->loopVarCount);
188    for (i = 0; i < ps->loopVarCount; i++)
189    {
190       DEBUG_INT(ps->loopVars[i].offset);
191       DEBUG_VAR(ps->loopVars[i].value);
192    }
193    DEBUG_INT(ps->samplerVarCount);
194    for (i = 0; i < ps->samplerVarCount; i++)
195    {
196       DEBUG_STR(ps->samplerVars[i].name);
197       DEBUG_INT(ps->samplerVars[i].type);
198       DEBUG_INT(ps->samplerVars[i].location);
199    }
200 
201 }
202 
check_shader_verbose(u32 * shader,u32 shader_size,u32 * org,u32 org_size,const char * name)203 void check_shader_verbose(u32 *shader, u32 shader_size, u32 *org, u32 org_size, const char *name)
204 {
205    unsigned i;
206 
207    printf("%s :\n", name);
208    DEBUG_VAR(shader_size);
209    DEBUG_VAR(org_size);
210 
211    if (shader_size != org_size)
212       printf("size mismatch : 0x%08X should be 0x%08X\n", shader_size, org_size);
213 
214    for (i = 0; i < shader_size / 4; i += 4)
215    {
216       printf("0x%08X 0x%08X 0x%08X 0x%08X          0x%08X 0x%08X 0x%08X 0x%08X\n",
217              shader[i], shader[i + 1], shader[i + 2], shader[i + 3],
218              org[i], org[i + 1], org[i + 2], org[i + 3]);
219    }
220 
221    for (i = 0; i < shader_size / 4; i++)
222    {
223       if (shader[i] != org[i])
224          printf("%i(%X): 0x%08X(0x%08X) should be 0x%08X(0x%08X) \n", i, i, shader[i], __builtin_bswap32(shader[i]), org[i],
225                 __builtin_bswap32(org[i]));
226    }
227 }
check_shader(const void * shader_,u32 shader_size,const void * org_,u32 org_size,const char * name)228 void check_shader(const void *shader_, u32 shader_size, const void *org_, u32 org_size, const char *name)
229 {
230    unsigned i;
231    bool different = false;
232    u32 *shader    = (u32 *)shader_;
233    u32 *org       = (u32 *)org_;
234 
235    printf("%-20s : ", name);
236 
237    if (shader_size != org_size)
238    {
239       different = true;
240       printf("\nsize mismatch : 0x%08X should be 0x%08X", shader_size, org_size);
241    }
242 
243    for (i = 0; i < shader_size / 4; i++)
244    {
245       if (shader[i] != org[i])
246       {
247          different = true;
248          printf("\n%i(%X): 0x%08X(0x%08X) should be 0x%08X(0x%08X)", i, i, shader[i], __builtin_bswap32(shader[i]), org[i],
249                 __builtin_bswap32(org[i]));
250       }
251    }
252 
253    if (!different)
254       printf("no errors");
255 
256    printf("\n");
257 }
258 
259 #define MAKE_MAGIC(c0,c1,c2,c3) ((c0 << 24) |(c1 << 16) |(c2 << 8) |(c3 << 0))
260 
261 #define GFD_FILE_MAJOR_VERSION         7
262 #define GFD_FILE_GPU_VERSION           2
263 #define GFD_BLOCK_MAJOR_VERSION        1
264 
265 #define GFD_FILE_MAGIC                 MAKE_MAGIC('G','f','x','2')
266 #define GFD_BLOCK_MAGIC                MAKE_MAGIC('B','L','K','{')
267 #define GFD_BLOCK_RELOCATIONS_MAGIC    MAKE_MAGIC('}','B','L','K')
268 #define GFD_RELOCATIONS_TYPE_MASK      0xFFF00000
269 #define GFD_RELOCATIONS_VALUE_MASK     (~GFD_RELOCATIONS_TYPE_MASK)
270 #define GFD_RELOCATIONS_DATA           0xD0600000
271 #define GFD_RELOCATIONS_TEXT           0xCA700000
272 
273 typedef enum
274 {
275    GFD_BLOCK_TYPE_END_OF_FILE           = 1,
276    GFD_BLOCK_TYPE_PADDING               = 2,
277    GFD_BLOCK_TYPE_VERTEX_SHADER_HEADER  = 3,
278    GFD_BLOCK_TYPE_VERTEX_SHADER_PROGRAM = 5,
279    GFD_BLOCK_TYPE_PIXEL_SHADER_HEADER   = 6,
280    GFD_BLOCK_TYPE_PIXEL_SHADER_PROGRAM  = 7,
281 } GFDBlockType;
282 
283 typedef struct
284 {
285    uint32_t magic;
286    uint32_t headerSize;
287    uint32_t majorVersion;
288    uint32_t minorVersion;
289    uint32_t gpuVersion;
290    uint32_t align;
291    uint32_t unk1;
292    uint32_t unk2;
293 } GFDFileHeader;
294 
295 typedef struct
296 {
297    uint32_t magic;
298    uint32_t headerSize;
299    uint32_t majorVersion;
300    uint32_t minorVersion;
301    GFDBlockType type;
302    uint32_t dataSize;
303    uint32_t id;
304    uint32_t index;
305 } GFDBlockHeader;
306 
307 typedef struct
308 {
309    uint32_t magic;
310    uint32_t headerSize;
311    uint32_t unk1;
312    uint32_t dataSize;
313    uint32_t dataOffset;
314    uint32_t textSize;
315    uint32_t textOffset;
316    uint32_t patchBase;
317    uint32_t patchCount;
318    uint32_t patchOffset;
319 } GFDRelocationHeader;
320 
321 typedef struct
322 {
323    GFDBlockHeader header;
324    u8 data[];
325 } GFDBlock;
326 
gfd_free(GFDFile * gfd)327 void gfd_free(GFDFile* gfd)
328 {
329    if(gfd)
330    {
331       MEM2_free(gfd->data);
332       free(gfd);
333    }
334 }
335 
gfd_relocate_block(GFDBlock * block)336 static bool gfd_relocate_block(GFDBlock* block)
337 {
338    unsigned i;
339    GFDRelocationHeader* rel = (GFDRelocationHeader*)(block->data + block->header.dataSize) - 1;
340 
341    if (rel->magic != GFD_BLOCK_RELOCATIONS_MAGIC)
342    {
343       printf("wrong relocations magic number.\n");
344       return false;
345    }
346 
347    if((rel->patchOffset & GFD_RELOCATIONS_TYPE_MASK) != GFD_RELOCATIONS_DATA)
348    {
349       printf("wrong data relocations mask.\n");
350       return false;
351    }
352 
353    u32* patches = (u32*)(block->data + (rel->patchOffset & GFD_RELOCATIONS_VALUE_MASK));
354 
355    for (i = 0; i < rel->patchCount; i++)
356    {
357       if(patches[i])
358       {
359          if((patches[i] & GFD_RELOCATIONS_TYPE_MASK) != GFD_RELOCATIONS_DATA)
360          {
361             printf("wrong patch relocations mask.\n");
362             return false;
363          }
364 
365          u32* ptr = (u32*)(block->data + (patches[i] & GFD_RELOCATIONS_VALUE_MASK));
366          if((((*ptr) & GFD_RELOCATIONS_TYPE_MASK) != GFD_RELOCATIONS_DATA) &&
367             (((*ptr) & GFD_RELOCATIONS_TYPE_MASK) != GFD_RELOCATIONS_TEXT))
368          {
369             printf("wrong relocations mask.\n");
370             return false;
371          }
372          *ptr = (u32)block->data + ((*ptr) & GFD_RELOCATIONS_VALUE_MASK);
373       }
374    }
375 
376    return true;
377 }
378 
gfd_open(const char * filename)379 GFDFile *gfd_open(const char *filename)
380 {
381    GFDFile* gfd = calloc(1, sizeof(*gfd));
382    FILE     *fp = fopen(filename, "rb");
383 
384    if (!fp)
385       goto error;
386 
387    fseek(fp, 0, SEEK_END);
388    int size = ftell(fp);
389    fseek(fp, 0, SEEK_SET);
390    gfd->data = MEM2_alloc(size, GX2_SHADER_ALIGNMENT);
391    fread(gfd->data, 1, size, fp);
392    fclose(fp);
393 
394    GFDFileHeader *header = (GFDFileHeader *)gfd->data;
395 
396    if (header->magic != GFD_FILE_MAGIC)
397    {
398       printf("wrong file magic number.\n");
399       goto error;
400    }
401 
402    if (header->headerSize != sizeof(GFDFileHeader))
403    {
404       printf("wrong file header size.\n");
405       goto error;
406    }
407 
408    if (header->majorVersion != GFD_FILE_MAJOR_VERSION)
409    {
410       printf("file version not supported.\n");
411       goto error;
412    }
413 
414    if (header->gpuVersion != GFD_FILE_GPU_VERSION)
415    {
416       printf("gpu version not supported.\n");
417       goto error;
418    }
419 
420    if (!header->align)
421    {
422       printf("data is not aligned.\n");
423       goto error;
424    }
425 
426    GFDBlock *block = (GFDBlock *)(gfd->data + header->headerSize);
427 
428    while (block->header.type != GFD_BLOCK_TYPE_END_OF_FILE)
429    {
430       if (block->header.magic != GFD_BLOCK_MAGIC)
431       {
432          printf("wrong block magic number.\n");
433          goto error;
434       }
435 
436       if (block->header.headerSize != sizeof(GFDBlockHeader))
437       {
438          printf("wrong block header size.\n");
439          goto error;
440       }
441 
442       if (block->header.majorVersion != GFD_BLOCK_MAJOR_VERSION)
443       {
444          printf("block version not supported.\n");
445          goto error;
446       }
447 
448       switch (block->header.type)
449       {
450       case GFD_BLOCK_TYPE_VERTEX_SHADER_HEADER:
451          if (gfd->vs)
452             continue;
453 
454          gfd->vs = (GX2VertexShader*)block->data;
455          if(!gfd_relocate_block(block))
456             goto error;
457 
458          break;
459 
460       case GFD_BLOCK_TYPE_VERTEX_SHADER_PROGRAM:
461          if(gfd->vs->program)
462             continue;
463 
464          GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, block->data, block->header.dataSize);
465          gfd->vs->program = block->data;
466          break;
467 
468       case GFD_BLOCK_TYPE_PIXEL_SHADER_HEADER:
469          if (gfd->ps)
470             continue;
471 
472          gfd->ps = (GX2PixelShader*)block->data;
473          if(!gfd_relocate_block(block))
474             goto error;
475 
476          break;
477 
478       case GFD_BLOCK_TYPE_PIXEL_SHADER_PROGRAM:
479          if(gfd->ps->program)
480             continue;
481 
482          GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, block->data, block->header.dataSize);
483          gfd->ps->program = block->data;
484          break;
485 
486       default:
487          break;
488       }
489 
490       block = (GFDBlock *)((u8 *)block + block->header.headerSize + block->header.dataSize);
491    }
492 
493    if(!gfd->vs)
494    {
495       printf("vertex shader is missing.\n");
496       goto error;
497    }
498 
499    if(!gfd->vs->program)
500    {
501       printf("vertex shader program is missing.\n");
502       goto error;
503    }
504 
505    if(!gfd->ps)
506    {
507       printf("pixel shader is missing.\n");
508       goto error;
509    }
510 
511    if(!gfd->ps->program)
512    {
513       printf("pixel shader program is missing.\n");
514       goto error;
515    }
516 
517    return gfd;
518 
519 error:
520    printf("failed to open file : %s\n", filename);
521    gfd_free(gfd);
522 
523    return NULL;
524 }
525