1 #pragma once
2 #include <wiiu/types.h>
3 #include <wiiu/gx2r/buffer.h>
4 #include "enum.h"
5 #include "sampler.h"
6 
7 #ifdef __cplusplus
8 extern "C" {
9 #endif
10 
11 typedef struct GX2FetchShader
12 {
13    GX2FetchShaderType type;
14 
15    struct
16    {
17       uint32_t sq_pgm_resources_fs;
18    } regs;
19 
20    uint32_t size;
21    uint8_t *program;
22    uint32_t attribCount;
23    uint32_t numDivisors;
24    uint32_t divisors[2];
25 } GX2FetchShader;
26 
27 typedef struct GX2UniformBlock
28 {
29    const char *name;
30    uint32_t offset;
31    uint32_t size;
32 } GX2UniformBlock;
33 
34 typedef struct GX2UniformVar
35 {
36    const char *name;
37    GX2ShaderVarType type;
38    uint32_t count;
39    uint32_t offset;
40    int32_t block;
41 } GX2UniformVar;
42 
43 typedef struct GX2UniformInitialValue
44 {
45    float value[4];
46    uint32_t offset;
47 } GX2UniformInitialValue;
48 
49 typedef struct GX2LoopVar
50 {
51    uint32_t offset;
52    uint32_t value;
53 } GX2LoopVar;
54 
55 typedef struct GX2SamplerVar
56 {
57    const char *name;
58    GX2SamplerVarType type;
59    uint32_t location;
60 } GX2SamplerVar;
61 
62 typedef struct GX2AttribVar
63 {
64    const char *name;
65    GX2ShaderVarType type;
66    uint32_t count;
67    uint32_t location;
68 } GX2AttribVar;
69 
70 typedef struct GX2VertexShader
71 {
72    union
73    {
74       struct
75       {
76          struct
77          {
78             unsigned : 2;
79             bool prime_cache_on_const : 1;
80             bool prime_cache_enable : 1;
81             bool uncached_first_inst : 1;
82             unsigned  fetch_cache_lines : 3;
83             bool prime_cache_on_draw : 1;
84             bool prime_cache_pgm_en : 1;
85             bool dx10_clamp : 1;
86             unsigned : 5;
87             unsigned stack_size : 8;
88             unsigned num_gprs : 8;
89          } sq_pgm_resources_vs;
90 
91          struct
92          {
93             unsigned : 31;
94             unsigned enable: 1;
95          } vgt_primitiveid_en;
96 
97          struct
98          {
99             unsigned : 18;
100             unsigned vs_out_fog_vec_addr : 5;
101             bool vs_exports_fog : 1;
102             unsigned : 2;
103             unsigned vs_export_count : 5;
104             bool vs_per_component : 1;
105          } spi_vs_out_config;
106 
107          uint32_t num_spi_vs_out_id;
108          struct
109          {
110             uint8_t semantic_3;
111             uint8_t semantic_2;
112             uint8_t semantic_1;
113             uint8_t semantic_0;
114          } spi_vs_out_id[10];
115          struct
116          {
117             bool clip_dist_ena_7 : 1;
118             bool clip_dist_ena_6 : 1;
119             bool clip_dist_ena_5 : 1;
120             bool clip_dist_ena_4 : 1;
121             bool clip_dist_ena_3 : 1;
122             bool clip_dist_ena_2 : 1;
123             bool clip_dist_ena_1 : 1;
124             bool clip_dist_ena_0 : 1;
125             bool cull_dist_ena_7 : 1;
126             bool cull_dist_ena_6 : 1;
127             bool cull_dist_ena_5 : 1;
128             bool cull_dist_ena_0 : 1;
129             bool cull_dist_ena_4 : 1;
130             bool cull_dist_ena_3 : 1;
131             bool cull_dist_ena_2 : 1;
132             bool cull_dist_ena_1 : 1;
133             bool vs_out_misc_side_bus_ena : 1;
134             bool vs_out_ccdist1_vec_ena : 1;
135             bool vs_out_ccdist0_vec_ena : 1;
136             bool vs_out_misc_vec_ena : 1;
137             bool use_vtx_kill_flag : 1;
138             bool use_vtx_viewport_indx : 1;
139             bool use_vtx_render_target_indx : 1;
140             bool use_vtx_edge_flag : 1;
141             unsigned : 6;
142             bool use_vtx_point_size : 1;
143             bool use_vtx_gs_cut_flag : 1;
144          } pa_cl_vs_out_cntl;
145          uint32_t sq_vtx_semantic_clear;
146          uint32_t num_sq_vtx_semantic;
147          uint32_t sq_vtx_semantic[32]; /* 8 bit */
148          struct
149          {
150             bool buffer_3_en : 1;
151             bool buffer_2_en : 1;
152             bool buffer_1_en : 1;
153             bool buffer_0_en : 1;
154          } vgt_strmout_buffer_en;
155          struct
156          {
157             unsigned : 24;
158             unsigned vtx_reuse_depth : 8;
159          } vgt_vertex_reuse_block_cntl;
160          struct
161          {
162             unsigned : 24;
163             unsigned reuse_depth : 8;
164          } vgt_hos_reuse_depth;
165       };
166       u32 vals[52];
167    } regs;
168 
169    uint32_t size;
170    uint8_t *program;
171    GX2ShaderMode mode;
172 
173    uint32_t uniformBlockCount;
174    GX2UniformBlock *uniformBlocks;
175 
176    uint32_t uniformVarCount;
177    GX2UniformVar *uniformVars;
178 
179    uint32_t initialValueCount;
180    GX2UniformInitialValue *initialValues;
181 
182    uint32_t loopVarCount;
183    GX2LoopVar *loopVars;
184 
185    uint32_t samplerVarCount;
186    GX2SamplerVar *samplerVars;
187 
188    uint32_t attribVarCount;
189    GX2AttribVar *attribVars;
190 
191    uint32_t ringItemSize;
192 
193    BOOL hasStreamOut;
194    uint32_t streamOutStride[4];
195 
196    GX2RBuffer gx2rBuffer;
197 } GX2VertexShader;
198 
199 typedef enum
200 {
201    spi_baryc_cntl_centroids_only        = 0,
202    spi_baryc_cntl_centers_only          = 1,
203    spi_baryc_cntl_centroids_and_centers = 2,
204 } spi_baryc_cntl;
205 
206 typedef enum
207 {
208    db_z_order_late_z              = 0,
209    db_z_order_early_z_then_late_z = 1,
210    db_z_order_re_z                = 2,
211    db_z_order_early_z_then_re_z   = 3,
212 } db_z_order;
213 
214 typedef struct GX2PixelShader
215 {
216    union
217    {
218       struct
219       {
220          struct
221          {
222             unsigned : 2;
223             bool prime_cache_on_const : 1;
224             bool prime_cache_enable : 1;
225             bool uncached_first_inst : 1;
226             unsigned  fetch_cache_lines : 3;
227             bool prime_cache_on_draw : 1;
228             bool prime_cache_pgm_en : 1;
229             bool dx10_clamp : 1;
230             unsigned : 5;
231             unsigned stack_size : 8;
232             unsigned num_gprs : 8;
233          } sq_pgm_resources_ps;
234 
235          struct
236          {
237             unsigned : 27;
238             unsigned export_mode : 5;
239          } sq_pgm_exports_ps;
240 
241          struct
242          {
243             bool baryc_at_sample_ena : 1;
244             bool position_sample : 1;
245             bool linear_gradient_ena : 1;
246             bool persp_gradient_ena : 1;
247             spi_baryc_cntl baryc_sample_cntl : 2;
248             unsigned param_gen_addr : 7;
249             unsigned param_gen : 4;
250             unsigned position_addr : 5;
251             bool position_centroid : 1;
252             bool position_ena : 1;
253             unsigned : 2;
254             unsigned num_interp : 6;
255          } spi_ps_in_control_0;
256 
257          struct
258          {
259             unsigned : 1;
260             bool position_ulc : 1;
261             unsigned fixed_pt_position_addr : 5;
262             bool fixed_pt_position_ena : 1;
263             unsigned fog_addr : 7;
264             unsigned front_face_addr : 5;
265             bool front_face_all_bits : 1;
266             unsigned front_face_chan : 2;
267             bool front_face_ena : 1;
268             unsigned gen_index_pix_addr : 7;
269             bool gen_index_pix : 1;
270          } spi_ps_in_control_1;
271 
272          uint32_t num_spi_ps_input_cntl;
273 
274          struct
275          {
276             unsigned : 13;
277             bool sel_sample : 1;
278             bool pt_sprite_tex : 1;
279             unsigned cyl_wrap : 4;
280             bool sel_linear : 1;
281             bool sel_centroid : 1;
282             bool flat_shade : 1;
283             unsigned default_val : 2;
284             unsigned semantic : 8;
285          } spi_ps_input_cntls[32];
286 
287          struct
288          {
289             unsigned output7_enable : 4;
290             unsigned output6_enable : 4;
291             unsigned output5_enable : 4;
292             unsigned output4_enable : 4;
293             unsigned output3_enable : 4;
294             unsigned output2_enable : 4;
295             unsigned output1_enable : 4;
296             unsigned output0_enable : 4;
297          } cb_shader_mask;
298          struct
299          {
300             unsigned : 24;
301             bool rt7_enable : 1;
302             bool rt6_enable : 1;
303             bool rt5_enable : 1;
304             bool rt4_enable : 1;
305             bool rt3_enable : 1;
306             bool rt2_enable : 1;
307             bool rt1_enable : 1;
308             bool rt0_enable : 1;
309          } cb_shader_control;
310          struct
311          {
312             unsigned : 19;
313             bool alpha_to_mask_disable : 1;
314             bool exec_on_noop : 1;
315             bool exec_on_hier_fail : 1;
316             bool dual_export_enable : 1;
317             bool mask_export_enable : 1;
318             bool coverage_to_mask_enable : 1;
319             bool kill_enable : 1;
320             db_z_order z_order : 2;
321             unsigned : 2;
322             bool z_export_enable : 1;
323             bool stencil_ref_export_enable : 1;
324          } db_shader_control;
325 
326          bool spi_input_z;
327       };
328       u32 vals[41];
329    } regs;
330 
331    uint32_t size;
332    uint8_t *program;
333    GX2ShaderMode mode;
334 
335    uint32_t uniformBlockCount;
336    GX2UniformBlock *uniformBlocks;
337 
338    uint32_t uniformVarCount;
339    GX2UniformVar *uniformVars;
340 
341    uint32_t initialValueCount;
342    GX2UniformInitialValue *initialValues;
343 
344    uint32_t loopVarCount;
345    GX2LoopVar *loopVars;
346 
347    uint32_t samplerVarCount;
348    GX2SamplerVar *samplerVars;
349 
350    GX2RBuffer gx2rBuffer;
351 } GX2PixelShader;
352 
353 typedef enum
354 {
355    VGT_GS_OUT_PRIMITIVE_TYPE_POINTLIST = 0,
356    VGT_GS_OUT_PRIMITIVE_TYPE_LINESTRIP = 1,
357    VGT_GS_OUT_PRIMITIVE_TYPE_TRISTRIP  = 2,
358    VGT_GS_OUT_PRIMITIVE_TYPE_MAX_ENUM  = 0xFFFFFFFF
359 } vgt_gs_out_primitive_type;
360 
361 typedef enum
362 {
363    VGT_GS_ENABLE_MODE_OFF        = 0,
364    VGT_GS_ENABLE_MODE_SCENARIO_A = 1,
365    VGT_GS_ENABLE_MODE_SCENARIO_B = 2,
366    VGT_GS_ENABLE_MODE_SCENARIO_G = 3,
367 } vgt_gs_enable_mode;
368 
369 typedef enum
370 {
371    VGT_GS_CUT_MODE_1024 = 0,
372    VGT_GS_CUT_MODE_512  = 1,
373    VGT_GS_CUT_MODE_256  = 2,
374    VGT_GS_CUT_MODE_128  = 3,
375 } vgt_gs_cut_mode;
376 
377 typedef struct GX2GeometryShader
378 {
379    union
380    {
381       struct
382       {
383          struct
384          {
385             unsigned : 2;
386             bool prime_cache_on_const : 1;
387             bool prime_cache_enable : 1;
388             bool uncached_first_inst : 1;
389             unsigned  fetch_cache_lines : 3;
390             bool prime_cache_on_draw : 1;
391             bool prime_cache_pgm_en : 1;
392             bool dx10_clamp : 1;
393             unsigned : 5;
394             unsigned stack_size : 8;
395             unsigned num_gprs : 8;
396          } sq_pgm_resources_gs;
397          vgt_gs_out_primitive_type vgt_gs_out_prim_type;
398          struct
399          {
400             unsigned : 14;
401             bool partial_thd_at_eoi : 1;
402             bool element_info_en : 1;
403             bool fast_compute_mode : 1;
404             bool compute_mode : 1;
405             unsigned : 2;
406             bool gs_c_pack_en : 1;
407             unsigned : 2;
408             bool mode_hi : 1;
409             unsigned : 3;
410             vgt_gs_cut_mode cut_mode : 2;
411             bool es_passthru : 1;
412             vgt_gs_enable_mode mode : 2;
413          } vgt_gs_mode;
414          struct
415          {
416             bool clip_dist_ena_7 : 1;
417             bool clip_dist_ena_6 : 1;
418             bool clip_dist_ena_5 : 1;
419             bool clip_dist_ena_4 : 1;
420             bool clip_dist_ena_3 : 1;
421             bool clip_dist_ena_2 : 1;
422             bool clip_dist_ena_1 : 1;
423             bool clip_dist_ena_0 : 1;
424             bool cull_dist_ena_7 : 1;
425             bool cull_dist_ena_6 : 1;
426             bool cull_dist_ena_5 : 1;
427             bool cull_dist_ena_0 : 1;
428             bool cull_dist_ena_4 : 1;
429             bool cull_dist_ena_3 : 1;
430             bool cull_dist_ena_2 : 1;
431             bool cull_dist_ena_1 : 1;
432             bool vs_out_misc_side_bus_ena : 1;
433             bool vs_out_ccdist1_vec_ena : 1;
434             bool vs_out_ccdist0_vec_ena : 1;
435             bool vs_out_misc_vec_ena : 1;
436             bool use_vtx_kill_flag : 1;
437             bool use_vtx_viewport_indx : 1;
438             bool use_vtx_render_target_indx : 1;
439             bool use_vtx_edge_flag : 1;
440             unsigned : 6;
441             bool use_vtx_point_size : 1;
442             bool use_vtx_gs_cut_flag : 1;
443          } pa_cl_vs_out_cntl;
444          struct
445          {
446             unsigned : 2;
447             bool prime_cache_on_const : 1;
448             bool prime_cache_enable : 1;
449             bool uncached_first_inst : 1;
450             unsigned  fetch_cache_lines : 3;
451             bool prime_cache_on_draw : 1;
452             bool prime_cache_pgm_en : 1;
453             bool dx10_clamp : 1;
454             unsigned : 5;
455             unsigned stack_size : 8;
456             unsigned num_gprs : 8;
457          } sq_pgm_resources_vs;
458 
459          uint32_t sq_gs_vert_itemsize; /* 15-bit */
460 
461          struct
462          {
463             unsigned : 18;
464             unsigned vs_out_fog_vec_addr : 5;
465             bool vs_exports_fog : 1;
466             unsigned : 2;
467             unsigned vs_export_count : 5;
468             bool vs_per_component : 1;
469          } spi_vs_out_config;
470 
471          uint32_t num_spi_vs_out_id;
472 
473          struct
474          {
475             uint8_t semantic_3;
476             uint8_t semantic_2;
477             uint8_t semantic_1;
478             uint8_t semantic_0;
479          } spi_vs_out_id[10];
480 
481          struct
482          {
483             bool buffer_3_en : 1;
484             bool buffer_2_en : 1;
485             bool buffer_1_en : 1;
486             bool buffer_0_en : 1;
487          } vgt_strmout_buffer_en;
488       };
489       u32 vals[19];
490    } regs;
491    uint32_t size;
492    uint8_t *program;
493    uint32_t copyProgramSize;
494    uint8_t *copyProgram;
495    GX2ShaderMode mode;
496 
497    uint32_t uniformBlockCount;
498    GX2UniformBlock *uniformBlocks;
499 
500    uint32_t uniformVarCount;
501    GX2UniformVar *uniformVars;
502 
503    uint32_t initialValueCount;
504    GX2UniformInitialValue *initialValues;
505 
506    uint32_t loopVarCount;
507    GX2LoopVar *loopVars;
508 
509    uint32_t samplerVarCount;
510    GX2SamplerVar *samplerVars;
511 
512    uint32_t ringItemSize;
513    BOOL hasStreamOut;
514    uint32_t streamOutStride[4];
515 
516    GX2RBuffer gx2rBuffer;
517 } GX2GeometryShader;
518 
519 typedef struct GX2AttribStream
520 {
521    uint32_t location;
522    uint32_t buffer;
523    uint32_t offset;
524    GX2AttribFormat format;
525    GX2AttribIndexType type;
526    uint32_t aluDivisor;
527    uint32_t mask;
528    GX2EndianSwapMode endianSwap;
529 } GX2AttribStream;
530 
531 uint32_t GX2CalcGeometryShaderInputRingBufferSize(uint32_t ringItemSize);
532 uint32_t GX2CalcGeometryShaderOutputRingBufferSize(uint32_t ringItemSize);
533 
534 uint32_t GX2CalcFetchShaderSizeEx(uint32_t attribs, GX2FetchShaderType fetchShaderType,
535                                   GX2TessellationMode tesellationMode);
536 
537 void GX2InitFetchShaderEx(GX2FetchShader *fetchShader, uint8_t *buffer, uint32_t attribCount,
538                           GX2AttribStream *attribs, GX2FetchShaderType type, GX2TessellationMode tessMode);
539 
540 void GX2SetFetchShader(GX2FetchShader *shader);
541 void GX2SetVertexShader(GX2VertexShader *shader);
542 void GX2SetPixelShader(GX2PixelShader *shader);
543 void GX2SetGeometryShader(GX2GeometryShader *shader);
544 
545 void GX2SetVertexSampler(GX2Sampler *sampler, uint32_t id);
546 void GX2SetPixelSampler(GX2Sampler *sampler, uint32_t id);
547 void GX2SetGeometrySampler(GX2Sampler *sampler, uint32_t id);
548 void GX2SetVertexUniformReg(uint32_t offset, uint32_t count, uint32_t *data);
549 void GX2SetPixelUniformReg(uint32_t offset, uint32_t count, uint32_t *data);
550 void GX2SetVertexUniformBlock(uint32_t location, uint32_t size, const void *data);
551 void GX2SetPixelUniformBlock(uint32_t location, uint32_t size, const void *data);
552 void GX2SetGeometryUniformBlock(uint32_t location, uint32_t size, const void *data);
553 
554 void GX2SetShaderModeEx(GX2ShaderMode mode,
555                         uint32_t numVsGpr, uint32_t numVsStackEntries,
556                         uint32_t numGsGpr, uint32_t numGsStackEntries,
557                         uint32_t numPsGpr, uint32_t numPsStackEntries);
558 
GX2SetShaderMode(GX2ShaderMode mode)559 static inline void GX2SetShaderMode(GX2ShaderMode mode)
560 {
561    if (mode == GX2_SHADER_MODE_GEOMETRY_SHADER)
562       GX2SetShaderModeEx(mode, 44, 32, 64, 48, 76, 176);
563    else
564       GX2SetShaderModeEx(mode, 48, 64, 0, 0, 200, 192);
565 }
566 
567 void GX2SetStreamOutEnable(BOOL enable);
568 void GX2SetGeometryShaderInputRingBuffer(void *buffer, uint32_t size);
569 void GX2SetGeometryShaderOutputRingBuffer(void *buffer, uint32_t size);
570 
571 uint32_t GX2GetPixelShaderGPRs(GX2PixelShader *shader);
572 uint32_t GX2GetPixelShaderStackEntries(GX2PixelShader *shader);
573 uint32_t GX2GetVertexShaderGPRs(GX2VertexShader *shader);
574 uint32_t GX2GetVertexShaderStackEntries(GX2VertexShader *shader);
575 uint32_t GX2GetGeometryShaderGPRs(GX2GeometryShader *shader);
576 uint32_t GX2GetGeometryShaderStackEntries(GX2GeometryShader *shader);
577 
578 #ifdef __cplusplus
579 }
580 #endif
581 
582 /** @} */
583