1
2 #include <stdint.h>
3 #include <stdbool.h>
4 #include <stdio.h>
5 #include <string.h>
6 #include <wiiu/gx2.h>
7 #include <wiiu/system/memory.h>
8 #include <wiiu/shader_utils.h>
9 #include <wiiu/wiiu_dbg.h>
10
11 /* this is a hack for elf builds since their data section is below 0x10000000
12 * and thus can't be accessed by the GX2 hardware */
13 #ifndef GX2_CAN_ACCESS_DATA_SECTION
14 typedef struct
15 {
16 void *vs_program;
17 void *ps_program;
18 void *gs_program;
19 void *gs_copy_program;
20 } org_programs_t;
21 #endif
22
GX2InitShader(GX2Shader * shader)23 void GX2InitShader(GX2Shader *shader)
24 {
25 if (shader->fs.program)
26 return;
27
28 shader->fs.size = GX2CalcFetchShaderSizeEx(shader->vs.attribVarCount,
29 GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
30 #ifdef GX2_CAN_ACCESS_DATA_SECTION
31 shader->fs.program = MEM2_alloc(shader->fs.size, GX2_SHADER_ALIGNMENT);
32 #else
33 shader->fs.program = MEM2_alloc(shader->fs.size + sizeof(org_programs_t), GX2_SHADER_ALIGNMENT);
34 #endif
35 GX2InitFetchShaderEx(&shader->fs, (uint8_t *)shader->fs.program,
36 shader->vs.attribVarCount,
37 shader->attribute_stream,
38 GX2_FETCH_SHADER_TESSELLATION_NONE, GX2_TESSELLATION_MODE_DISCRETE);
39 GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->fs.program, shader->fs.size);
40
41 #ifndef GX2_CAN_ACCESS_DATA_SECTION
42 org_programs_t *org = (org_programs_t *)(shader->fs.program + shader->fs.size);
43 org->vs_program = shader->vs.program;
44 org->ps_program = shader->ps.program;
45 org->gs_program = shader->gs.program;
46 org->gs_copy_program = shader->gs.copyProgram;
47
48 shader->vs.program = MEM2_alloc(shader->vs.size, GX2_SHADER_ALIGNMENT);
49 memcpy(shader->vs.program, org->vs_program, shader->vs.size);
50 GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->vs.program, shader->vs.size);
51
52 shader->ps.program = MEM2_alloc(shader->ps.size, GX2_SHADER_ALIGNMENT);
53 memcpy(shader->ps.program, org->ps_program, shader->ps.size);
54 GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->ps.program, shader->ps.size);
55
56 if (org->gs_program)
57 {
58 shader->gs.program = MEM2_alloc(shader->gs.size, GX2_SHADER_ALIGNMENT);
59 memcpy(shader->gs.program, org->gs_program, shader->gs.size);
60 GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->gs.program, shader->gs.size);
61
62 shader->gs.copyProgram = MEM2_alloc(shader->gs.copyProgramSize, GX2_SHADER_ALIGNMENT);
63 memcpy(shader->gs.copyProgram, org->gs_copy_program, shader->gs.copyProgramSize);
64 GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->gs.copyProgram, shader->gs.copyProgramSize);
65 }
66
67 #endif
68
69 }
70
GX2DestroyShader(GX2Shader * shader)71 void GX2DestroyShader(GX2Shader *shader)
72 {
73 #ifndef GX2_CAN_ACCESS_DATA_SECTION
74 MEM2_free(shader->vs.program);
75 MEM2_free(shader->ps.program);
76 MEM2_free(shader->gs.program);
77 MEM2_free(shader->gs.copyProgram);
78
79 org_programs_t *org = (org_programs_t *)(shader->fs.program + shader->fs.size);
80
81 shader->vs.program = org->vs_program;
82 shader->ps.program = org->ps_program;
83 shader->gs.program = org->gs_program;
84 shader->gs.copyProgram = org->gs_copy_program;
85 #endif
86
87 MEM2_free(shader->fs.program);
88 shader->fs.program = NULL;
89 }
90
GX2SetShader(GX2Shader * shader)91 void GX2SetShader(GX2Shader *shader)
92 {
93 GX2SetVertexShader(&shader->vs);
94 GX2SetPixelShader(&shader->ps);
95 GX2SetFetchShader(&shader->fs);
96
97 if (shader->gs.program)
98 GX2SetGeometryShader(&shader->gs);
99 }
100
dump_vs_data(GX2VertexShader * vs)101 void dump_vs_data(GX2VertexShader* vs)
102 {
103 unsigned i;
104
105 DEBUG_INT(vs->size);
106 DEBUG_VAR(vs->mode);
107 DEBUG_INT(vs->uniformBlockCount);
108
109 for (i = 0; i < vs->uniformBlockCount; i++)
110 {
111 DEBUG_STR(vs->uniformBlocks[i].name);
112 DEBUG_INT(vs->uniformBlocks[i].offset);
113 DEBUG_INT(vs->uniformBlocks[i].size);
114 }
115 DEBUG_INT(vs->uniformVarCount);
116 for (i = 0; i < vs->uniformVarCount; i++)
117 {
118 DEBUG_STR(vs->uniformVars[i].name);
119 DEBUG_INT(vs->uniformVars[i].offset);
120 DEBUG_INT(vs->uniformVars[i].type);
121 DEBUG_INT(vs->uniformVars[i].count);
122 DEBUG_INT(vs->uniformVars[i].block);
123 }
124 DEBUG_INT(vs->initialValueCount);
125 for (i = 0; i < vs->initialValueCount; i++)
126 {
127 DEBUG_INT(vs->initialValues[i].offset);
128 DEBUG_FLOAT(vs->initialValues[i].value[0]);
129 DEBUG_FLOAT(vs->initialValues[i].value[1]);
130 DEBUG_FLOAT(vs->initialValues[i].value[2]);
131 DEBUG_FLOAT(vs->initialValues[i].value[3]);
132 }
133 DEBUG_INT(vs->loopVarCount);
134 for (i = 0; i < vs->loopVarCount; i++)
135 {
136 DEBUG_INT(vs->loopVars[i].offset);
137 DEBUG_VAR(vs->loopVars[i].value);
138 }
139 DEBUG_INT(vs->samplerVarCount);
140 for (i = 0; i < vs->samplerVarCount; i++)
141 {
142 DEBUG_STR(vs->samplerVars[i].name);
143 DEBUG_INT(vs->samplerVars[i].type);
144 DEBUG_INT(vs->samplerVars[i].location);
145 }
146
147 for (i = 0; i < vs->attribVarCount; i++)
148 {
149 DEBUG_STR(vs->attribVars[i].name);
150 DEBUG_VAR(vs->attribVars[i].type);
151 DEBUG_INT(vs->attribVars[i].location);
152 DEBUG_INT(vs->attribVars[i].count);
153 }
154 }
155
dump_ps_data(GX2PixelShader * ps)156 void dump_ps_data(GX2PixelShader* ps)
157 {
158 unsigned i;
159
160 DEBUG_INT(ps->size);
161 DEBUG_VAR(ps->mode);
162 DEBUG_INT(ps->uniformBlockCount);
163 for (i = 0; i < ps->uniformBlockCount; i++)
164 {
165 DEBUG_STR(ps->uniformBlocks[i].name);
166 DEBUG_INT(ps->uniformBlocks[i].offset);
167 DEBUG_INT(ps->uniformBlocks[i].size);
168 }
169 DEBUG_INT(ps->uniformVarCount);
170 for (i = 0; i < ps->uniformVarCount; i++)
171 {
172 DEBUG_STR(ps->uniformVars[i].name);
173 DEBUG_INT(ps->uniformVars[i].offset);
174 DEBUG_INT(ps->uniformVars[i].type);
175 DEBUG_INT(ps->uniformVars[i].count);
176 DEBUG_INT(ps->uniformVars[i].block);
177 }
178 DEBUG_INT(ps->initialValueCount);
179 for (i = 0; i < ps->initialValueCount; i++)
180 {
181 DEBUG_INT(ps->initialValues[i].offset);
182 DEBUG_FLOAT(ps->initialValues[i].value[0]);
183 DEBUG_FLOAT(ps->initialValues[i].value[1]);
184 DEBUG_FLOAT(ps->initialValues[i].value[2]);
185 DEBUG_FLOAT(ps->initialValues[i].value[3]);
186 }
187 DEBUG_INT(ps->loopVarCount);
188 for (i = 0; i < ps->loopVarCount; i++)
189 {
190 DEBUG_INT(ps->loopVars[i].offset);
191 DEBUG_VAR(ps->loopVars[i].value);
192 }
193 DEBUG_INT(ps->samplerVarCount);
194 for (i = 0; i < ps->samplerVarCount; i++)
195 {
196 DEBUG_STR(ps->samplerVars[i].name);
197 DEBUG_INT(ps->samplerVars[i].type);
198 DEBUG_INT(ps->samplerVars[i].location);
199 }
200
201 }
202
check_shader_verbose(u32 * shader,u32 shader_size,u32 * org,u32 org_size,const char * name)203 void check_shader_verbose(u32 *shader, u32 shader_size, u32 *org, u32 org_size, const char *name)
204 {
205 unsigned i;
206
207 printf("%s :\n", name);
208 DEBUG_VAR(shader_size);
209 DEBUG_VAR(org_size);
210
211 if (shader_size != org_size)
212 printf("size mismatch : 0x%08X should be 0x%08X\n", shader_size, org_size);
213
214 for (i = 0; i < shader_size / 4; i += 4)
215 {
216 printf("0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n",
217 shader[i], shader[i + 1], shader[i + 2], shader[i + 3],
218 org[i], org[i + 1], org[i + 2], org[i + 3]);
219 }
220
221 for (i = 0; i < shader_size / 4; i++)
222 {
223 if (shader[i] != org[i])
224 printf("%i(%X): 0x%08X(0x%08X) should be 0x%08X(0x%08X) \n", i, i, shader[i], __builtin_bswap32(shader[i]), org[i],
225 __builtin_bswap32(org[i]));
226 }
227 }
check_shader(const void * shader_,u32 shader_size,const void * org_,u32 org_size,const char * name)228 void check_shader(const void *shader_, u32 shader_size, const void *org_, u32 org_size, const char *name)
229 {
230 unsigned i;
231 bool different = false;
232 u32 *shader = (u32 *)shader_;
233 u32 *org = (u32 *)org_;
234
235 printf("%-20s : ", name);
236
237 if (shader_size != org_size)
238 {
239 different = true;
240 printf("\nsize mismatch : 0x%08X should be 0x%08X", shader_size, org_size);
241 }
242
243 for (i = 0; i < shader_size / 4; i++)
244 {
245 if (shader[i] != org[i])
246 {
247 different = true;
248 printf("\n%i(%X): 0x%08X(0x%08X) should be 0x%08X(0x%08X)", i, i, shader[i], __builtin_bswap32(shader[i]), org[i],
249 __builtin_bswap32(org[i]));
250 }
251 }
252
253 if (!different)
254 printf("no errors");
255
256 printf("\n");
257 }
258
259 #define MAKE_MAGIC(c0,c1,c2,c3) ((c0 << 24) |(c1 << 16) |(c2 << 8) |(c3 << 0))
260
261 #define GFD_FILE_MAJOR_VERSION 7
262 #define GFD_FILE_GPU_VERSION 2
263 #define GFD_BLOCK_MAJOR_VERSION 1
264
265 #define GFD_FILE_MAGIC MAKE_MAGIC('G','f','x','2')
266 #define GFD_BLOCK_MAGIC MAKE_MAGIC('B','L','K','{')
267 #define GFD_BLOCK_RELOCATIONS_MAGIC MAKE_MAGIC('}','B','L','K')
268 #define GFD_RELOCATIONS_TYPE_MASK 0xFFF00000
269 #define GFD_RELOCATIONS_VALUE_MASK (~GFD_RELOCATIONS_TYPE_MASK)
270 #define GFD_RELOCATIONS_DATA 0xD0600000
271 #define GFD_RELOCATIONS_TEXT 0xCA700000
272
273 typedef enum
274 {
275 GFD_BLOCK_TYPE_END_OF_FILE = 1,
276 GFD_BLOCK_TYPE_PADDING = 2,
277 GFD_BLOCK_TYPE_VERTEX_SHADER_HEADER = 3,
278 GFD_BLOCK_TYPE_VERTEX_SHADER_PROGRAM = 5,
279 GFD_BLOCK_TYPE_PIXEL_SHADER_HEADER = 6,
280 GFD_BLOCK_TYPE_PIXEL_SHADER_PROGRAM = 7,
281 } GFDBlockType;
282
283 typedef struct
284 {
285 uint32_t magic;
286 uint32_t headerSize;
287 uint32_t majorVersion;
288 uint32_t minorVersion;
289 uint32_t gpuVersion;
290 uint32_t align;
291 uint32_t unk1;
292 uint32_t unk2;
293 } GFDFileHeader;
294
295 typedef struct
296 {
297 uint32_t magic;
298 uint32_t headerSize;
299 uint32_t majorVersion;
300 uint32_t minorVersion;
301 GFDBlockType type;
302 uint32_t dataSize;
303 uint32_t id;
304 uint32_t index;
305 } GFDBlockHeader;
306
307 typedef struct
308 {
309 uint32_t magic;
310 uint32_t headerSize;
311 uint32_t unk1;
312 uint32_t dataSize;
313 uint32_t dataOffset;
314 uint32_t textSize;
315 uint32_t textOffset;
316 uint32_t patchBase;
317 uint32_t patchCount;
318 uint32_t patchOffset;
319 } GFDRelocationHeader;
320
321 typedef struct
322 {
323 GFDBlockHeader header;
324 u8 data[];
325 } GFDBlock;
326
gfd_free(GFDFile * gfd)327 void gfd_free(GFDFile* gfd)
328 {
329 if(gfd)
330 {
331 MEM2_free(gfd->data);
332 free(gfd);
333 }
334 }
335
gfd_relocate_block(GFDBlock * block)336 static bool gfd_relocate_block(GFDBlock* block)
337 {
338 unsigned i;
339 GFDRelocationHeader* rel = (GFDRelocationHeader*)(block->data + block->header.dataSize) - 1;
340
341 if (rel->magic != GFD_BLOCK_RELOCATIONS_MAGIC)
342 {
343 printf("wrong relocations magic number.\n");
344 return false;
345 }
346
347 if((rel->patchOffset & GFD_RELOCATIONS_TYPE_MASK) != GFD_RELOCATIONS_DATA)
348 {
349 printf("wrong data relocations mask.\n");
350 return false;
351 }
352
353 u32* patches = (u32*)(block->data + (rel->patchOffset & GFD_RELOCATIONS_VALUE_MASK));
354
355 for (i = 0; i < rel->patchCount; i++)
356 {
357 if(patches[i])
358 {
359 if((patches[i] & GFD_RELOCATIONS_TYPE_MASK) != GFD_RELOCATIONS_DATA)
360 {
361 printf("wrong patch relocations mask.\n");
362 return false;
363 }
364
365 u32* ptr = (u32*)(block->data + (patches[i] & GFD_RELOCATIONS_VALUE_MASK));
366 if((((*ptr) & GFD_RELOCATIONS_TYPE_MASK) != GFD_RELOCATIONS_DATA) &&
367 (((*ptr) & GFD_RELOCATIONS_TYPE_MASK) != GFD_RELOCATIONS_TEXT))
368 {
369 printf("wrong relocations mask.\n");
370 return false;
371 }
372 *ptr = (u32)block->data + ((*ptr) & GFD_RELOCATIONS_VALUE_MASK);
373 }
374 }
375
376 return true;
377 }
378
gfd_open(const char * filename)379 GFDFile *gfd_open(const char *filename)
380 {
381 GFDFile* gfd = calloc(1, sizeof(*gfd));
382 FILE *fp = fopen(filename, "rb");
383
384 if (!fp)
385 goto error;
386
387 fseek(fp, 0, SEEK_END);
388 int size = ftell(fp);
389 fseek(fp, 0, SEEK_SET);
390 gfd->data = MEM2_alloc(size, GX2_SHADER_ALIGNMENT);
391 fread(gfd->data, 1, size, fp);
392 fclose(fp);
393
394 GFDFileHeader *header = (GFDFileHeader *)gfd->data;
395
396 if (header->magic != GFD_FILE_MAGIC)
397 {
398 printf("wrong file magic number.\n");
399 goto error;
400 }
401
402 if (header->headerSize != sizeof(GFDFileHeader))
403 {
404 printf("wrong file header size.\n");
405 goto error;
406 }
407
408 if (header->majorVersion != GFD_FILE_MAJOR_VERSION)
409 {
410 printf("file version not supported.\n");
411 goto error;
412 }
413
414 if (header->gpuVersion != GFD_FILE_GPU_VERSION)
415 {
416 printf("gpu version not supported.\n");
417 goto error;
418 }
419
420 if (!header->align)
421 {
422 printf("data is not aligned.\n");
423 goto error;
424 }
425
426 GFDBlock *block = (GFDBlock *)(gfd->data + header->headerSize);
427
428 while (block->header.type != GFD_BLOCK_TYPE_END_OF_FILE)
429 {
430 if (block->header.magic != GFD_BLOCK_MAGIC)
431 {
432 printf("wrong block magic number.\n");
433 goto error;
434 }
435
436 if (block->header.headerSize != sizeof(GFDBlockHeader))
437 {
438 printf("wrong block header size.\n");
439 goto error;
440 }
441
442 if (block->header.majorVersion != GFD_BLOCK_MAJOR_VERSION)
443 {
444 printf("block version not supported.\n");
445 goto error;
446 }
447
448 switch (block->header.type)
449 {
450 case GFD_BLOCK_TYPE_VERTEX_SHADER_HEADER:
451 if (gfd->vs)
452 continue;
453
454 gfd->vs = (GX2VertexShader*)block->data;
455 if(!gfd_relocate_block(block))
456 goto error;
457
458 break;
459
460 case GFD_BLOCK_TYPE_VERTEX_SHADER_PROGRAM:
461 if(gfd->vs->program)
462 continue;
463
464 GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, block->data, block->header.dataSize);
465 gfd->vs->program = block->data;
466 break;
467
468 case GFD_BLOCK_TYPE_PIXEL_SHADER_HEADER:
469 if (gfd->ps)
470 continue;
471
472 gfd->ps = (GX2PixelShader*)block->data;
473 if(!gfd_relocate_block(block))
474 goto error;
475
476 break;
477
478 case GFD_BLOCK_TYPE_PIXEL_SHADER_PROGRAM:
479 if(gfd->ps->program)
480 continue;
481
482 GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, block->data, block->header.dataSize);
483 gfd->ps->program = block->data;
484 break;
485
486 default:
487 break;
488 }
489
490 block = (GFDBlock *)((u8 *)block + block->header.headerSize + block->header.dataSize);
491 }
492
493 if(!gfd->vs)
494 {
495 printf("vertex shader is missing.\n");
496 goto error;
497 }
498
499 if(!gfd->vs->program)
500 {
501 printf("vertex shader program is missing.\n");
502 goto error;
503 }
504
505 if(!gfd->ps)
506 {
507 printf("pixel shader is missing.\n");
508 goto error;
509 }
510
511 if(!gfd->ps->program)
512 {
513 printf("pixel shader program is missing.\n");
514 goto error;
515 }
516
517 return gfd;
518
519 error:
520 printf("failed to open file : %s\n", filename);
521 gfd_free(gfd);
522
523 return NULL;
524 }
525