xref: /reactos/dll/directx/wine/wined3d/shader.c (revision 81cffd76)
1 /*
2  * Copyright 2002-2003 Jason Edmeades
3  * Copyright 2002-2003 Raphael Junqueira
4  * Copyright 2004 Christian Costa
5  * Copyright 2005 Oliver Stieber
6  * Copyright 2006 Ivan Gyurdiev
7  * Copyright 2007-2008, 2013 Stefan Dösinger for CodeWeavers
8  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24 
25 #include "config.h"
26 #include "wine/port.h"
27 
28 #include <stdio.h>
29 #include <string.h>
30 
31 #include "wined3d_private.h"
32 
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 
35 /* pow, mul_high, sub_high, mul_low */
36 const float wined3d_srgb_const0[] = {0.41666f, 1.055f, 0.055f, 12.92f};
37 /* cmp */
38 const float wined3d_srgb_const1[] = {0.0031308f, 0.0f, 0.0f, 0.0f};
39 
40 static const char * const shader_opcode_names[] =
41 {
42     /* WINED3DSIH_ABS                              */ "abs",
43     /* WINED3DSIH_ADD                              */ "add",
44     /* WINED3DSIH_AND                              */ "and",
45     /* WINED3DSIH_ATOMIC_AND                       */ "atomic_and",
46     /* WINED3DSIH_ATOMIC_CMP_STORE                 */ "atomic_cmp_store",
47     /* WINED3DSIH_ATOMIC_IADD                      */ "atomic_iadd",
48     /* WINED3DSIH_ATOMIC_IMAX                      */ "atomic_imax",
49     /* WINED3DSIH_ATOMIC_IMIN                      */ "atomic_imin",
50     /* WINED3DSIH_ATOMIC_OR                        */ "atomic_or",
51     /* WINED3DSIH_ATOMIC_UMAX                      */ "atomic_umax",
52     /* WINED3DSIH_ATOMIC_UMIN                      */ "atomic_umin",
53     /* WINED3DSIH_ATOMIC_XOR                       */ "atomic_xor",
54     /* WINED3DSIH_BEM                              */ "bem",
55     /* WINED3DSIH_BFI                              */ "bfi",
56     /* WINED3DSIH_BFREV                            */ "bfrev",
57     /* WINED3DSIH_BREAK                            */ "break",
58     /* WINED3DSIH_BREAKC                           */ "breakc",
59     /* WINED3DSIH_BREAKP                           */ "breakp",
60     /* WINED3DSIH_BUFINFO                          */ "bufinfo",
61     /* WINED3DSIH_CALL                             */ "call",
62     /* WINED3DSIH_CALLNZ                           */ "callnz",
63     /* WINED3DSIH_CASE                             */ "case",
64     /* WINED3DSIH_CMP                              */ "cmp",
65     /* WINED3DSIH_CND                              */ "cnd",
66     /* WINED3DSIH_CONTINUE                         */ "continue",
67     /* WINED3DSIH_CONTINUEP                        */ "continuec",
68     /* WINED3DSIH_COUNTBITS                        */ "countbits",
69     /* WINED3DSIH_CRS                              */ "crs",
70     /* WINED3DSIH_CUT                              */ "cut",
71     /* WINED3DSIH_CUT_STREAM                       */ "cut_stream",
72     /* WINED3DSIH_DCL                              */ "dcl",
73     /* WINED3DSIH_DCL_CONSTANT_BUFFER              */ "dcl_constantBuffer",
74     /* WINED3DSIH_DCL_FUNCTION_BODY                */ "dcl_function_body",
75     /* WINED3DSIH_DCL_FUNCTION_TABLE               */ "dcl_function_table",
76     /* WINED3DSIH_DCL_GLOBAL_FLAGS                 */ "dcl_globalFlags",
77     /* WINED3DSIH_DCL_GS_INSTANCES                 */ "dcl_gs_instances",
78     /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ "dcl_hs_fork_phase_instance_count",
79     /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ "dcl_hs_join_phase_instance_count",
80     /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR            */ "dcl_hs_max_tessfactor",
81     /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER    */ "dcl_immediateConstantBuffer",
82     /* WINED3DSIH_DCL_INDEX_RANGE                  */ "dcl_index_range",
83     /* WINED3DSIH_DCL_INDEXABLE_TEMP               */ "dcl_indexableTemp",
84     /* WINED3DSIH_DCL_INPUT                        */ "dcl_input",
85     /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT    */ "dcl_input_control_point_count",
86     /* WINED3DSIH_DCL_INPUT_PRIMITIVE              */ "dcl_inputPrimitive",
87     /* WINED3DSIH_DCL_INPUT_PS                     */ "dcl_input_ps",
88     /* WINED3DSIH_DCL_INPUT_PS_SGV                 */ "dcl_input_ps_sgv",
89     /* WINED3DSIH_DCL_INPUT_PS_SIV                 */ "dcl_input_ps_siv",
90     /* WINED3DSIH_DCL_INPUT_SGV                    */ "dcl_input_sgv",
91     /* WINED3DSIH_DCL_INPUT_SIV                    */ "dcl_input_siv",
92     /* WINED3DSIH_DCL_INTERFACE                    */ "dcl_interface",
93     /* WINED3DSIH_DCL_OUTPUT                       */ "dcl_output",
94     /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT   */ "dcl_output_control_point_count",
95     /* WINED3DSIH_DCL_OUTPUT_SIV                   */ "dcl_output_siv",
96     /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY              */ "dcl_outputTopology",
97     /* WINED3DSIH_DCL_RESOURCE_RAW                 */ "dcl_resource_raw",
98     /* WINED3DSIH_DCL_RESOURCE_STRUCTURED          */ "dcl_resource_structured",
99     /* WINED3DSIH_DCL_SAMPLER                      */ "dcl_sampler",
100     /* WINED3DSIH_DCL_STREAM                       */ "dcl_stream",
101     /* WINED3DSIH_DCL_TEMPS                        */ "dcl_temps",
102     /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN           */ "dcl_tessellator_domain",
103     /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ "dcl_tessellator_output_primitive",
104     /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING     */ "dcl_tessellator_partitioning",
105     /* WINED3DSIH_DCL_TGSM_RAW                     */ "dcl_tgsm_raw",
106     /* WINED3DSIH_DCL_TGSM_STRUCTURED              */ "dcl_tgsm_structured",
107     /* WINED3DSIH_DCL_THREAD_GROUP                 */ "dcl_thread_group",
108     /* WINED3DSIH_DCL_UAV_RAW                      */ "dcl_uav_raw",
109     /* WINED3DSIH_DCL_UAV_STRUCTURED               */ "dcl_uav_structured",
110     /* WINED3DSIH_DCL_UAV_TYPED                    */ "dcl_uav_typed",
111     /* WINED3DSIH_DCL_VERTICES_OUT                 */ "dcl_maxOutputVertexCount",
112     /* WINED3DSIH_DEF                              */ "def",
113     /* WINED3DSIH_DEFAULT                          */ "default",
114     /* WINED3DSIH_DEFB                             */ "defb",
115     /* WINED3DSIH_DEFI                             */ "defi",
116     /* WINED3DSIH_DIV                              */ "div",
117     /* WINED3DSIH_DP2                              */ "dp2",
118     /* WINED3DSIH_DP2ADD                           */ "dp2add",
119     /* WINED3DSIH_DP3                              */ "dp3",
120     /* WINED3DSIH_DP4                              */ "dp4",
121     /* WINED3DSIH_DST                              */ "dst",
122     /* WINED3DSIH_DSX                              */ "dsx",
123     /* WINED3DSIH_DSX_COARSE                       */ "deriv_rtx_coarse",
124     /* WINED3DSIH_DSX_FINE                         */ "deriv_rtx_fine",
125     /* WINED3DSIH_DSY                              */ "dsy",
126     /* WINED3DSIH_DSY_COARSE                       */ "deriv_rty_coarse",
127     /* WINED3DSIH_DSY_FINE                         */ "deriv_rty_fine",
128     /* WINED3DSIH_EVAL_SAMPLE_INDEX                */ "eval_sample_index",
129     /* WINED3DSIH_ELSE                             */ "else",
130     /* WINED3DSIH_EMIT                             */ "emit",
131     /* WINED3DSIH_EMIT_STREAM                      */ "emit_stream",
132     /* WINED3DSIH_ENDIF                            */ "endif",
133     /* WINED3DSIH_ENDLOOP                          */ "endloop",
134     /* WINED3DSIH_ENDREP                           */ "endrep",
135     /* WINED3DSIH_ENDSWITCH                        */ "endswitch",
136     /* WINED3DSIH_EQ                               */ "eq",
137     /* WINED3DSIH_EXP                              */ "exp",
138     /* WINED3DSIH_EXPP                             */ "expp",
139     /* WINED3DSIH_F16TOF32                         */ "f16tof32",
140     /* WINED3DSIH_F32TOF16                         */ "f32tof16",
141     /* WINED3DSIH_FCALL                            */ "fcall",
142     /* WINED3DSIH_FIRSTBIT_HI                      */ "firstbit_hi",
143     /* WINED3DSIH_FIRSTBIT_LO                      */ "firstbit_lo",
144     /* WINED3DSIH_FIRSTBIT_SHI                     */ "firstbit_shi",
145     /* WINED3DSIH_FRC                              */ "frc",
146     /* WINED3DSIH_FTOI                             */ "ftoi",
147     /* WINED3DSIH_FTOU                             */ "ftou",
148     /* WINED3DSIH_GATHER4                          */ "gather4",
149     /* WINED3DSIH_GATHER4_C                        */ "gather4_c",
150     /* WINED3DSIH_GATHER4_PO                       */ "gather4_po",
151     /* WINED3DSIH_GATHER4_PO_C                     */ "gather4_po_c",
152     /* WINED3DSIH_GE                               */ "ge",
153     /* WINED3DSIH_HS_CONTROL_POINT_PHASE           */ "hs_control_point_phase",
154     /* WINED3DSIH_HS_DECLS                         */ "hs_decls",
155     /* WINED3DSIH_HS_FORK_PHASE                    */ "hs_fork_phase",
156     /* WINED3DSIH_HS_JOIN_PHASE                    */ "hs_join_phase",
157     /* WINED3DSIH_IADD                             */ "iadd",
158     /* WINED3DSIH_IBFE                             */ "ibfe",
159     /* WINED3DSIH_IEQ                              */ "ieq",
160     /* WINED3DSIH_IF                               */ "if",
161     /* WINED3DSIH_IFC                              */ "ifc",
162     /* WINED3DSIH_IGE                              */ "ige",
163     /* WINED3DSIH_ILT                              */ "ilt",
164     /* WINED3DSIH_IMAD                             */ "imad",
165     /* WINED3DSIH_IMAX                             */ "imax",
166     /* WINED3DSIH_IMIN                             */ "imin",
167     /* WINED3DSIH_IMM_ATOMIC_ALLOC                 */ "imm_atomic_alloc",
168     /* WINED3DSIH_IMM_ATOMIC_AND                   */ "imm_atomic_and",
169     /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH              */ "imm_atomic_cmp_exch",
170     /* WINED3DSIH_IMM_ATOMIC_CONSUME               */ "imm_atomic_consume",
171     /* WINED3DSIH_IMM_ATOMIC_EXCH                  */ "imm_atomic_exch",
172     /* WINED3DSIH_IMM_ATOMIC_IADD                  */ "imm_atomic_iadd",
173     /* WINED3DSIH_IMM_ATOMIC_IMAX                  */ "imm_atomic_imax",
174     /* WINED3DSIH_IMM_ATOMIC_IMIN                  */ "imm_atomic_imin",
175     /* WINED3DSIH_IMM_ATOMIC_OR                    */ "imm_atomic_or",
176     /* WINED3DSIH_IMM_ATOMIC_UMAX                  */ "imm_atomic_umax",
177     /* WINED3DSIH_IMM_ATOMIC_UMIN                  */ "imm_atomic_umin",
178     /* WINED3DSIH_IMM_ATOMIC_XOR                   */ "imm_atomic_xor",
179     /* WINED3DSIH_IMUL                             */ "imul",
180     /* WINED3DSIH_INE                              */ "ine",
181     /* WINED3DSIH_INEG                             */ "ineg",
182     /* WINED3DSIH_ISHL                             */ "ishl",
183     /* WINED3DSIH_ISHR                             */ "ishr",
184     /* WINED3DSIH_ITOF                             */ "itof",
185     /* WINED3DSIH_LABEL                            */ "label",
186     /* WINED3DSIH_LD                               */ "ld",
187     /* WINED3DSIH_LD2DMS                           */ "ld2dms",
188     /* WINED3DSIH_LD_RAW                           */ "ld_raw",
189     /* WINED3DSIH_LD_STRUCTURED                    */ "ld_structured",
190     /* WINED3DSIH_LD_UAV_TYPED                     */ "ld_uav_typed",
191     /* WINED3DSIH_LIT                              */ "lit",
192     /* WINED3DSIH_LOD                              */ "lod",
193     /* WINED3DSIH_LOG                              */ "log",
194     /* WINED3DSIH_LOGP                             */ "logp",
195     /* WINED3DSIH_LOOP                             */ "loop",
196     /* WINED3DSIH_LRP                              */ "lrp",
197     /* WINED3DSIH_LT                               */ "lt",
198     /* WINED3DSIH_M3x2                             */ "m3x2",
199     /* WINED3DSIH_M3x3                             */ "m3x3",
200     /* WINED3DSIH_M3x4                             */ "m3x4",
201     /* WINED3DSIH_M4x3                             */ "m4x3",
202     /* WINED3DSIH_M4x4                             */ "m4x4",
203     /* WINED3DSIH_MAD                              */ "mad",
204     /* WINED3DSIH_MAX                              */ "max",
205     /* WINED3DSIH_MIN                              */ "min",
206     /* WINED3DSIH_MOV                              */ "mov",
207     /* WINED3DSIH_MOVA                             */ "mova",
208     /* WINED3DSIH_MOVC                             */ "movc",
209     /* WINED3DSIH_MUL                              */ "mul",
210     /* WINED3DSIH_NE                               */ "ne",
211     /* WINED3DSIH_NOP                              */ "nop",
212     /* WINED3DSIH_NOT                              */ "not",
213     /* WINED3DSIH_NRM                              */ "nrm",
214     /* WINED3DSIH_OR                               */ "or",
215     /* WINED3DSIH_PHASE                            */ "phase",
216     /* WINED3DSIH_POW                              */ "pow",
217     /* WINED3DSIH_RCP                              */ "rcp",
218     /* WINED3DSIH_REP                              */ "rep",
219     /* WINED3DSIH_RESINFO                          */ "resinfo",
220     /* WINED3DSIH_RET                              */ "ret",
221     /* WINED3DSIH_RETP                             */ "retp",
222     /* WINED3DSIH_ROUND_NE                         */ "round_ne",
223     /* WINED3DSIH_ROUND_NI                         */ "round_ni",
224     /* WINED3DSIH_ROUND_PI                         */ "round_pi",
225     /* WINED3DSIH_ROUND_Z                          */ "round_z",
226     /* WINED3DSIH_RSQ                              */ "rsq",
227     /* WINED3DSIH_SAMPLE                           */ "sample",
228     /* WINED3DSIH_SAMPLE_B                         */ "sample_b",
229     /* WINED3DSIH_SAMPLE_C                         */ "sample_c",
230     /* WINED3DSIH_SAMPLE_C_LZ                      */ "sample_c_lz",
231     /* WINED3DSIH_SAMPLE_GRAD                      */ "sample_d",
232     /* WINED3DSIH_SAMPLE_INFO                      */ "sample_info",
233     /* WINED3DSIH_SAMPLE_LOD                       */ "sample_l",
234     /* WINED3DSIH_SAMPLE_POS                       */ "sample_pos",
235     /* WINED3DSIH_SETP                             */ "setp",
236     /* WINED3DSIH_SGE                              */ "sge",
237     /* WINED3DSIH_SGN                              */ "sgn",
238     /* WINED3DSIH_SINCOS                           */ "sincos",
239     /* WINED3DSIH_SLT                              */ "slt",
240     /* WINED3DSIH_SQRT                             */ "sqrt",
241     /* WINED3DSIH_STORE_RAW                        */ "store_raw",
242     /* WINED3DSIH_STORE_STRUCTURED                 */ "store_structured",
243     /* WINED3DSIH_STORE_UAV_TYPED                  */ "store_uav_typed",
244     /* WINED3DSIH_SUB                              */ "sub",
245     /* WINED3DSIH_SWAPC                            */ "swapc",
246     /* WINED3DSIH_SWITCH                           */ "switch",
247     /* WINED3DSIH_SYNC                             */ "sync",
248     /* WINED3DSIH_TEX                              */ "texld",
249     /* WINED3DSIH_TEXBEM                           */ "texbem",
250     /* WINED3DSIH_TEXBEML                          */ "texbeml",
251     /* WINED3DSIH_TEXCOORD                         */ "texcrd",
252     /* WINED3DSIH_TEXDEPTH                         */ "texdepth",
253     /* WINED3DSIH_TEXDP3                           */ "texdp3",
254     /* WINED3DSIH_TEXDP3TEX                        */ "texdp3tex",
255     /* WINED3DSIH_TEXKILL                          */ "texkill",
256     /* WINED3DSIH_TEXLDD                           */ "texldd",
257     /* WINED3DSIH_TEXLDL                           */ "texldl",
258     /* WINED3DSIH_TEXM3x2DEPTH                     */ "texm3x2depth",
259     /* WINED3DSIH_TEXM3x2PAD                       */ "texm3x2pad",
260     /* WINED3DSIH_TEXM3x2TEX                       */ "texm3x2tex",
261     /* WINED3DSIH_TEXM3x3                          */ "texm3x3",
262     /* WINED3DSIH_TEXM3x3DIFF                      */ "texm3x3diff",
263     /* WINED3DSIH_TEXM3x3PAD                       */ "texm3x3pad",
264     /* WINED3DSIH_TEXM3x3SPEC                      */ "texm3x3spec",
265     /* WINED3DSIH_TEXM3x3TEX                       */ "texm3x3tex",
266     /* WINED3DSIH_TEXM3x3VSPEC                     */ "texm3x3vspec",
267     /* WINED3DSIH_TEXREG2AR                        */ "texreg2ar",
268     /* WINED3DSIH_TEXREG2GB                        */ "texreg2gb",
269     /* WINED3DSIH_TEXREG2RGB                       */ "texreg2rgb",
270     /* WINED3DSIH_UBFE                             */ "ubfe",
271     /* WINED3DSIH_UDIV                             */ "udiv",
272     /* WINED3DSIH_UGE                              */ "uge",
273     /* WINED3DSIH_ULT                              */ "ult",
274     /* WINED3DSIH_UMAX                             */ "umax",
275     /* WINED3DSIH_UMIN                             */ "umin",
276     /* WINED3DSIH_UMUL                             */ "umul",
277     /* WINED3DSIH_USHR                             */ "ushr",
278     /* WINED3DSIH_UTOF                             */ "utof",
279     /* WINED3DSIH_XOR                              */ "xor",
280 };
281 
282 static const char * const semantic_names[] =
283 {
284     /* WINED3D_DECL_USAGE_POSITION      */ "SV_POSITION",
285     /* WINED3D_DECL_USAGE_BLEND_WEIGHT  */ "BLENDWEIGHT",
286     /* WINED3D_DECL_USAGE_BLEND_INDICES */ "BLENDINDICES",
287     /* WINED3D_DECL_USAGE_NORMAL        */ "NORMAL",
288     /* WINED3D_DECL_USAGE_PSIZE         */ "PSIZE",
289     /* WINED3D_DECL_USAGE_TEXCOORD      */ "TEXCOORD",
290     /* WINED3D_DECL_USAGE_TANGENT       */ "TANGENT",
291     /* WINED3D_DECL_USAGE_BINORMAL      */ "BINORMAL",
292     /* WINED3D_DECL_USAGE_TESS_FACTOR   */ "TESSFACTOR",
293     /* WINED3D_DECL_USAGE_POSITIONT     */ "POSITIONT",
294     /* WINED3D_DECL_USAGE_COLOR         */ "COLOR",
295     /* WINED3D_DECL_USAGE_FOG           */ "FOG",
296     /* WINED3D_DECL_USAGE_DEPTH         */ "DEPTH",
297     /* WINED3D_DECL_USAGE_SAMPLE        */ "SAMPLE",
298 };
299 
300 static const struct
301 {
302     enum wined3d_shader_input_sysval_semantic sysval_semantic;
303     const char *sysval_name;
304 }
305 shader_input_sysval_semantic_names[] =
306 {
307     {WINED3D_SIV_POSITION,                   "position"},
308     {WINED3D_SIV_CLIP_DISTANCE,              "clip_distance"},
309     {WINED3D_SIV_CULL_DISTANCE,              "cull_distance"},
310     {WINED3D_SIV_RENDER_TARGET_ARRAY_INDEX,  "render_target_array_index"},
311     {WINED3D_SIV_VIEWPORT_ARRAY_INDEX,       "viewport_array_index"},
312     {WINED3D_SIV_VERTEX_ID,                  "vertex_id"},
313     {WINED3D_SIV_INSTANCE_ID,                "instance_id"},
314     {WINED3D_SIV_PRIMITIVE_ID,               "primitive_id"},
315     {WINED3D_SIV_IS_FRONT_FACE,              "is_front_face"},
316     {WINED3D_SIV_SAMPLE_INDEX,               "sample_index"},
317     {WINED3D_SIV_QUAD_U0_TESS_FACTOR,        "finalQuadUeq0EdgeTessFactor"},
318     {WINED3D_SIV_QUAD_V0_TESS_FACTOR,        "finalQuadVeq0EdgeTessFactor"},
319     {WINED3D_SIV_QUAD_U1_TESS_FACTOR,        "finalQuadUeq1EdgeTessFactor"},
320     {WINED3D_SIV_QUAD_V1_TESS_FACTOR,        "finalQuadVeq1EdgeTessFactor"},
321     {WINED3D_SIV_QUAD_U_INNER_TESS_FACTOR,   "finalQuadUInsideTessFactor"},
322     {WINED3D_SIV_QUAD_V_INNER_TESS_FACTOR,   "finalQuadVInsideTessFactor"},
323     {WINED3D_SIV_TRIANGLE_U_TESS_FACTOR,     "finalTriUeq0EdgeTessFactor"},
324     {WINED3D_SIV_TRIANGLE_V_TESS_FACTOR,     "finalTriVeq0EdgeTessFactor"},
325     {WINED3D_SIV_TRIANGLE_W_TESS_FACTOR,     "finalTriWeq0EdgeTessFactor"},
326     {WINED3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"},
327     {WINED3D_SIV_LINE_DETAIL_TESS_FACTOR,    "finalLineDetailTessFactor"},
328     {WINED3D_SIV_LINE_DENSITY_TESS_FACTOR,   "finalLineDensityTessFactor"},
329 };
330 
331 static void shader_dump_src_param(struct wined3d_string_buffer *buffer,
332         const struct wined3d_shader_src_param *param, const struct wined3d_shader_version *shader_version);
333 
debug_d3dshaderinstructionhandler(enum WINED3D_SHADER_INSTRUCTION_HANDLER handler_idx)334 const char *debug_d3dshaderinstructionhandler(enum WINED3D_SHADER_INSTRUCTION_HANDLER handler_idx)
335 {
336     if (handler_idx >= ARRAY_SIZE(shader_opcode_names))
337         return wine_dbg_sprintf("UNRECOGNIZED(%#x)", handler_idx);
338 
339     return shader_opcode_names[handler_idx];
340 }
341 
shader_semantic_name_from_usage(enum wined3d_decl_usage usage)342 static const char *shader_semantic_name_from_usage(enum wined3d_decl_usage usage)
343 {
344     if (usage >= ARRAY_SIZE(semantic_names))
345     {
346         FIXME("Unrecognized usage %#x.\n", usage);
347         return "UNRECOGNIZED";
348     }
349 
350     return semantic_names[usage];
351 }
352 
shader_usage_from_semantic_name(const char * name)353 static enum wined3d_decl_usage shader_usage_from_semantic_name(const char *name)
354 {
355     unsigned int i;
356 
357     for (i = 0; i < ARRAY_SIZE(semantic_names); ++i)
358     {
359         if (!strcmp(name, semantic_names[i]))
360             return i;
361     }
362 
363     return ~0U;
364 }
365 
shader_sysval_semantic_from_usage(enum wined3d_decl_usage usage)366 static enum wined3d_sysval_semantic shader_sysval_semantic_from_usage(enum wined3d_decl_usage usage)
367 {
368     switch (usage)
369     {
370         case WINED3D_DECL_USAGE_POSITION:
371             return WINED3D_SV_POSITION;
372         default:
373             return 0;
374     }
375 }
376 
shader_match_semantic(const char * semantic_name,enum wined3d_decl_usage usage)377 BOOL shader_match_semantic(const char *semantic_name, enum wined3d_decl_usage usage)
378 {
379     return !strcmp(semantic_name, shader_semantic_name_from_usage(usage));
380 }
381 
shader_signature_from_semantic(struct wined3d_shader_signature_element * e,const struct wined3d_shader_semantic * s)382 static void shader_signature_from_semantic(struct wined3d_shader_signature_element *e,
383         const struct wined3d_shader_semantic *s)
384 {
385     e->semantic_name = shader_semantic_name_from_usage(s->usage);
386     e->semantic_idx = s->usage_idx;
387     e->stream_idx = 0;
388     e->sysval_semantic = shader_sysval_semantic_from_usage(s->usage);
389     e->component_type = WINED3D_TYPE_FLOAT;
390     e->register_idx = s->reg.reg.idx[0].offset;
391     e->mask = s->reg.write_mask;
392 }
393 
shader_signature_from_usage(struct wined3d_shader_signature_element * e,enum wined3d_decl_usage usage,UINT usage_idx,UINT reg_idx,DWORD write_mask)394 static void shader_signature_from_usage(struct wined3d_shader_signature_element *e,
395         enum wined3d_decl_usage usage, UINT usage_idx, UINT reg_idx, DWORD write_mask)
396 {
397     e->semantic_name = shader_semantic_name_from_usage(usage);
398     e->semantic_idx = usage_idx;
399     e->stream_idx = 0;
400     e->sysval_semantic = shader_sysval_semantic_from_usage(usage);
401     e->component_type = WINED3D_TYPE_FLOAT;
402     e->register_idx = reg_idx;
403     e->mask = write_mask;
404 }
405 
shader_select_frontend(enum wined3d_shader_byte_code_format format)406 static const struct wined3d_shader_frontend *shader_select_frontend(enum wined3d_shader_byte_code_format format)
407 {
408     switch (format)
409     {
410         case WINED3D_SHADER_BYTE_CODE_FORMAT_SM1:
411             return &sm1_shader_frontend;
412 
413         case WINED3D_SHADER_BYTE_CODE_FORMAT_SM4:
414             return &sm4_shader_frontend;
415 
416         default:
417             WARN("Invalid byte code format %#x specified.\n", format);
418             return NULL;
419     }
420 }
421 
string_buffer_clear(struct wined3d_string_buffer * buffer)422 void string_buffer_clear(struct wined3d_string_buffer *buffer)
423 {
424     buffer->buffer[0] = '\0';
425     buffer->content_size = 0;
426 }
427 
string_buffer_init(struct wined3d_string_buffer * buffer)428 BOOL string_buffer_init(struct wined3d_string_buffer *buffer)
429 {
430     buffer->buffer_size = 32;
431     if (!(buffer->buffer = heap_alloc(buffer->buffer_size)))
432     {
433         ERR("Failed to allocate shader buffer memory.\n");
434         return FALSE;
435     }
436 
437     string_buffer_clear(buffer);
438     return TRUE;
439 }
440 
string_buffer_free(struct wined3d_string_buffer * buffer)441 void string_buffer_free(struct wined3d_string_buffer *buffer)
442 {
443     heap_free(buffer->buffer);
444 }
445 
string_buffer_resize(struct wined3d_string_buffer * buffer,int rc)446 BOOL string_buffer_resize(struct wined3d_string_buffer *buffer, int rc)
447 {
448     char *new_buffer;
449     unsigned int new_buffer_size = buffer->buffer_size * 2;
450 
451     while (rc > 0 && (unsigned int)rc >= new_buffer_size - buffer->content_size)
452         new_buffer_size *= 2;
453     if (!(new_buffer = heap_realloc(buffer->buffer, new_buffer_size)))
454     {
455         ERR("Failed to grow buffer.\n");
456         buffer->buffer[buffer->content_size] = '\0';
457         return FALSE;
458     }
459     buffer->buffer = new_buffer;
460     buffer->buffer_size = new_buffer_size;
461     return TRUE;
462 }
463 
shader_vaddline(struct wined3d_string_buffer * buffer,const char * format,va_list args)464 int shader_vaddline(struct wined3d_string_buffer *buffer, const char *format, va_list args)
465 {
466     unsigned int rem;
467     int rc;
468 
469     rem = buffer->buffer_size - buffer->content_size;
470     rc = vsnprintf(&buffer->buffer[buffer->content_size], rem, format, args);
471     if (rc < 0 /* C89 */ || (unsigned int)rc >= rem /* C99 */)
472         return rc;
473 
474     buffer->content_size += rc;
475     return 0;
476 }
477 
shader_addline(struct wined3d_string_buffer * buffer,const char * format,...)478 int shader_addline(struct wined3d_string_buffer *buffer, const char *format, ...)
479 {
480     va_list args;
481     int ret;
482 
483     for (;;)
484     {
485         va_start(args, format);
486         ret = shader_vaddline(buffer, format, args);
487         va_end(args);
488         if (!ret)
489             return ret;
490         if (!string_buffer_resize(buffer, ret))
491             return -1;
492     }
493 }
494 
string_buffer_get(struct wined3d_string_buffer_list * list)495 struct wined3d_string_buffer *string_buffer_get(struct wined3d_string_buffer_list *list)
496 {
497     struct wined3d_string_buffer *buffer;
498 
499     if (list_empty(&list->list))
500     {
501         buffer = heap_alloc(sizeof(*buffer));
502         if (!buffer || !string_buffer_init(buffer))
503         {
504             ERR("Couldn't allocate buffer for temporary string.\n");
505             heap_free(buffer);
506             return NULL;
507         }
508     }
509     else
510     {
511         buffer = LIST_ENTRY(list_head(&list->list), struct wined3d_string_buffer, entry);
512         list_remove(&buffer->entry);
513     }
514     string_buffer_clear(buffer);
515     return buffer;
516 }
517 
string_buffer_vsprintf(struct wined3d_string_buffer * buffer,const char * format,va_list args)518 static int string_buffer_vsprintf(struct wined3d_string_buffer *buffer, const char *format, va_list args)
519 {
520     if (!buffer)
521         return 0;
522     string_buffer_clear(buffer);
523     return shader_vaddline(buffer, format, args);
524 }
525 
string_buffer_sprintf(struct wined3d_string_buffer * buffer,const char * format,...)526 void string_buffer_sprintf(struct wined3d_string_buffer *buffer, const char *format, ...)
527 {
528     va_list args;
529     int ret;
530 
531     for (;;)
532     {
533         va_start(args, format);
534         ret = string_buffer_vsprintf(buffer, format, args);
535         va_end(args);
536         if (!ret)
537             return;
538         if (!string_buffer_resize(buffer, ret))
539             return;
540     }
541 }
542 
string_buffer_release(struct wined3d_string_buffer_list * list,struct wined3d_string_buffer * buffer)543 void string_buffer_release(struct wined3d_string_buffer_list *list, struct wined3d_string_buffer *buffer)
544 {
545     if (!buffer)
546         return;
547     list_add_head(&list->list, &buffer->entry);
548 }
549 
string_buffer_list_init(struct wined3d_string_buffer_list * list)550 void string_buffer_list_init(struct wined3d_string_buffer_list *list)
551 {
552     list_init(&list->list);
553 }
554 
string_buffer_list_cleanup(struct wined3d_string_buffer_list * list)555 void string_buffer_list_cleanup(struct wined3d_string_buffer_list *list)
556 {
557     struct wined3d_string_buffer *buffer, *buffer_next;
558 
559     LIST_FOR_EACH_ENTRY_SAFE(buffer, buffer_next, &list->list, struct wined3d_string_buffer, entry)
560     {
561         string_buffer_free(buffer);
562         heap_free(buffer);
563     }
564     list_init(&list->list);
565 }
566 
567 /* Convert floating point offset relative to a register file to an absolute
568  * offset for float constants. */
shader_get_float_offset(enum wined3d_shader_register_type register_type,UINT register_idx)569 static unsigned int shader_get_float_offset(enum wined3d_shader_register_type register_type, UINT register_idx)
570 {
571     switch (register_type)
572     {
573         case WINED3DSPR_CONST: return register_idx;
574         case WINED3DSPR_CONST2: return 2048 + register_idx;
575         case WINED3DSPR_CONST3: return 4096 + register_idx;
576         case WINED3DSPR_CONST4: return 6144 + register_idx;
577         default:
578             FIXME("Unsupported register type: %u.\n", register_type);
579             return register_idx;
580     }
581 }
582 
shader_delete_constant_list(struct list * clist)583 static void shader_delete_constant_list(struct list *clist)
584 {
585     struct wined3d_shader_lconst *constant, *constant_next;
586 
587     LIST_FOR_EACH_ENTRY_SAFE(constant, constant_next, clist, struct wined3d_shader_lconst, entry)
588         heap_free(constant);
589     list_init(clist);
590 }
591 
shader_set_limits(struct wined3d_shader * shader)592 static void shader_set_limits(struct wined3d_shader *shader)
593 {
594     static const struct limits_entry
595     {
596         unsigned int min_version;
597         unsigned int max_version;
598         struct wined3d_shader_limits limits;
599     }
600     vs_limits[] =
601     {
602         /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
603         {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 1), { 0,  0, 256,  0, 12,  0}},
604         {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 1), { 0, 16, 256, 16, 12,  0}},
605         /* DX10 cards on Windows advertise a D3D9 constant limit of 256
606          * even though they are capable of supporting much more (GL
607          * drivers advertise 1024). d3d9.dll and d3d8.dll clamp the
608          * wined3d-advertised maximum. Clamp the constant limit for <= 3.0
609          * shaders to 256. */
610         {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), { 4, 16, 256, 16, 12,  0}},
611         {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16,  0,   0,  0, 16,  0}},
612         {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0, 32,  0}},
613         {0}
614     },
615     hs_limits[] =
616     {
617         /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
618         {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0, 32, 32}},
619     },
620     ds_limits[] =
621     {
622         /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
623         {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0, 32, 32}},
624     },
625     gs_limits[] =
626     {
627         /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
628         {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16,  0,   0,  0, 32, 16}},
629         {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0, 32, 32}},
630         {0}
631     },
632     ps_limits[] =
633     {
634         /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
635         {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 3), { 4,  0,   8,  0,  0,  0}},
636         {WINED3D_SHADER_VERSION(1, 4), WINED3D_SHADER_VERSION(1, 4), { 6,  0,   8,  0,  0,  0}},
637         {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 0), {16,  0,  32,  0,  0,  0}},
638         {WINED3D_SHADER_VERSION(2, 1), WINED3D_SHADER_VERSION(2, 1), {16, 16,  32, 16,  0,  0}},
639         {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), {16, 16, 224, 16,  0, 10}},
640         {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0,  0, 32}},
641         {0}
642     },
643     cs_limits[] =
644     {
645         /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
646         {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0,  0,  0}},
647     };
648     const struct limits_entry *limits_array;
649     DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major,
650             shader->reg_maps.shader_version.minor);
651     int i = 0;
652 
653     switch (shader->reg_maps.shader_version.type)
654     {
655         default:
656             FIXME("Unexpected shader type %u found.\n", shader->reg_maps.shader_version.type);
657             /* Fall-through. */
658         case WINED3D_SHADER_TYPE_VERTEX:
659             limits_array = vs_limits;
660             break;
661         case WINED3D_SHADER_TYPE_HULL:
662             limits_array = hs_limits;
663             break;
664         case WINED3D_SHADER_TYPE_DOMAIN:
665             limits_array = ds_limits;
666             break;
667         case WINED3D_SHADER_TYPE_GEOMETRY:
668             limits_array = gs_limits;
669             break;
670         case WINED3D_SHADER_TYPE_PIXEL:
671             limits_array = ps_limits;
672             break;
673         case WINED3D_SHADER_TYPE_COMPUTE:
674             limits_array = cs_limits;
675             break;
676     }
677 
678     while (limits_array[i].min_version && limits_array[i].min_version <= shader_version)
679     {
680         if (shader_version <= limits_array[i].max_version)
681         {
682             shader->limits = &limits_array[i].limits;
683             break;
684         }
685         ++i;
686     }
687     if (!shader->limits)
688     {
689         FIXME("Unexpected shader version \"%u.%u\".\n",
690                 shader->reg_maps.shader_version.major,
691                 shader->reg_maps.shader_version.minor);
692         shader->limits = &limits_array[max(0, i - 1)].limits;
693     }
694 }
695 
shader_record_register_usage(struct wined3d_shader * shader,struct wined3d_shader_reg_maps * reg_maps,const struct wined3d_shader_register * reg,enum wined3d_shader_type shader_type,unsigned int constf_size)696 static BOOL shader_record_register_usage(struct wined3d_shader *shader, struct wined3d_shader_reg_maps *reg_maps,
697         const struct wined3d_shader_register *reg, enum wined3d_shader_type shader_type, unsigned int constf_size)
698 {
699     switch (reg->type)
700     {
701         case WINED3DSPR_TEXTURE: /* WINED3DSPR_ADDR */
702             if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
703                 reg_maps->texcoord |= 1u << reg->idx[0].offset;
704             else
705                 reg_maps->address |= 1u << reg->idx[0].offset;
706             break;
707 
708         case WINED3DSPR_TEMP:
709             reg_maps->temporary |= 1u << reg->idx[0].offset;
710             break;
711 
712         case WINED3DSPR_INPUT:
713             if (reg->idx[0].rel_addr)
714                 reg_maps->input_rel_addressing = 1;
715             if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
716             {
717                 /* If relative addressing is used, we must assume that all
718                  * registers are used. Even if it is a construct like v3[aL],
719                  * we can't assume that v0, v1 and v2 aren't read because aL
720                  * can be negative. */
721                 if (reg->idx[0].rel_addr)
722                     shader->u.ps.input_reg_used = ~0u;
723                 else
724                     shader->u.ps.input_reg_used |= 1u << reg->idx[0].offset;
725             }
726             else
727             {
728                 reg_maps->input_registers |= 1u << reg->idx[0].offset;
729             }
730             break;
731 
732         case WINED3DSPR_RASTOUT:
733             if (reg->idx[0].offset == 1)
734                 reg_maps->fog = 1;
735             if (reg->idx[0].offset == 2)
736                 reg_maps->point_size = 1;
737             break;
738 
739         case WINED3DSPR_MISCTYPE:
740             if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
741             {
742                 if (!reg->idx[0].offset)
743                     reg_maps->vpos = 1;
744                 else if (reg->idx[0].offset == 1)
745                     reg_maps->usesfacing = 1;
746             }
747             break;
748 
749         case WINED3DSPR_CONST:
750             if (reg->idx[0].rel_addr)
751             {
752                 if (reg->idx[0].offset < reg_maps->min_rel_offset)
753                     reg_maps->min_rel_offset = reg->idx[0].offset;
754                 if (reg->idx[0].offset > reg_maps->max_rel_offset)
755                     reg_maps->max_rel_offset = reg->idx[0].offset;
756                 reg_maps->usesrelconstF = TRUE;
757             }
758             else
759             {
760                 if (reg->idx[0].offset >= min(shader->limits->constant_float, constf_size))
761                 {
762                     WARN("Shader using float constant %u which is not supported.\n", reg->idx[0].offset);
763                     return FALSE;
764                 }
765                 else
766                 {
767                     wined3d_insert_bits(reg_maps->constf, reg->idx[0].offset, 1, 0x1);
768                 }
769             }
770             break;
771 
772         case WINED3DSPR_CONSTINT:
773             if (reg->idx[0].offset >= shader->limits->constant_int)
774             {
775                 WARN("Shader using integer constant %u which is not supported.\n", reg->idx[0].offset);
776                 return FALSE;
777             }
778             else
779             {
780                 reg_maps->integer_constants |= (1u << reg->idx[0].offset);
781             }
782             break;
783 
784         case WINED3DSPR_CONSTBOOL:
785             if (reg->idx[0].offset >= shader->limits->constant_bool)
786             {
787                 WARN("Shader using bool constant %u which is not supported.\n", reg->idx[0].offset);
788                 return FALSE;
789             }
790             else
791             {
792                 reg_maps->boolean_constants |= (1u << reg->idx[0].offset);
793             }
794             break;
795 
796         case WINED3DSPR_COLOROUT:
797             reg_maps->rt_mask |= (1u << reg->idx[0].offset);
798             break;
799 
800         case WINED3DSPR_OUTCONTROLPOINT:
801             reg_maps->vocp = 1;
802             break;
803 
804         default:
805             TRACE("Not recording register of type %#x and [%#x][%#x].\n",
806                     reg->type, reg->idx[0].offset, reg->idx[1].offset);
807             break;
808     }
809     return TRUE;
810 }
811 
shader_record_sample(struct wined3d_shader_reg_maps * reg_maps,unsigned int resource_idx,unsigned int sampler_idx,unsigned int bind_idx)812 static void shader_record_sample(struct wined3d_shader_reg_maps *reg_maps,
813         unsigned int resource_idx, unsigned int sampler_idx, unsigned int bind_idx)
814 {
815     struct wined3d_shader_sampler_map_entry *entries, *entry;
816     struct wined3d_shader_sampler_map *map;
817     unsigned int i;
818 
819     map = &reg_maps->sampler_map;
820     entries = map->entries;
821     for (i = 0; i < map->count; ++i)
822     {
823         if (entries[i].resource_idx == resource_idx && entries[i].sampler_idx == sampler_idx)
824             return;
825     }
826 
827     if (!map->size)
828     {
829         if (!(entries = heap_calloc(4, sizeof(*entries))))
830         {
831             ERR("Failed to allocate sampler map entries.\n");
832             return;
833         }
834         map->size = 4;
835         map->entries = entries;
836     }
837     else if (map->count == map->size)
838     {
839         size_t new_size = map->size * 2;
840 
841         if (sizeof(*entries) * new_size <= sizeof(*entries) * map->size
842                 || !(entries = heap_realloc(entries, sizeof(*entries) * new_size)))
843         {
844             ERR("Failed to resize sampler map entries.\n");
845             return;
846         }
847         map->size = new_size;
848         map->entries = entries;
849     }
850 
851     entry = &entries[map->count++];
852     entry->resource_idx = resource_idx;
853     entry->sampler_idx = sampler_idx;
854     entry->bind_idx = bind_idx;
855 }
856 
get_instr_extra_regcount(enum WINED3D_SHADER_INSTRUCTION_HANDLER instr,unsigned int param)857 static unsigned int get_instr_extra_regcount(enum WINED3D_SHADER_INSTRUCTION_HANDLER instr, unsigned int param)
858 {
859     switch (instr)
860     {
861         case WINED3DSIH_M4x4:
862         case WINED3DSIH_M3x4:
863             return param == 1 ? 3 : 0;
864 
865         case WINED3DSIH_M4x3:
866         case WINED3DSIH_M3x3:
867             return param == 1 ? 2 : 0;
868 
869         case WINED3DSIH_M3x2:
870             return param == 1 ? 1 : 0;
871 
872         default:
873             return 0;
874     }
875 }
876 
shader_reg_maps_add_tgsm(struct wined3d_shader_reg_maps * reg_maps,unsigned int register_idx,unsigned int size,unsigned int stride)877 static HRESULT shader_reg_maps_add_tgsm(struct wined3d_shader_reg_maps *reg_maps,
878         unsigned int register_idx, unsigned int size, unsigned int stride)
879 {
880     struct wined3d_shader_tgsm *tgsm;
881 
882     if (register_idx >= MAX_TGSM_REGISTERS)
883     {
884         ERR("Invalid TGSM register index %u.\n", register_idx);
885         return S_OK;
886     }
887     if (reg_maps->shader_version.type != WINED3D_SHADER_TYPE_COMPUTE)
888     {
889         FIXME("TGSM declarations are allowed only in compute shaders.\n");
890         return S_OK;
891     }
892 
893     if (!wined3d_array_reserve((void **)&reg_maps->tgsm, &reg_maps->tgsm_capacity,
894             register_idx + 1, sizeof(*reg_maps->tgsm)))
895         return E_OUTOFMEMORY;
896 
897     reg_maps->tgsm_count = max(register_idx + 1, reg_maps->tgsm_count);
898     tgsm = &reg_maps->tgsm[register_idx];
899     tgsm->size = size;
900     tgsm->stride = stride;
901     return S_OK;
902 }
903 
shader_record_shader_phase(struct wined3d_shader * shader,struct wined3d_shader_phase ** current_phase,const struct wined3d_shader_instruction * ins,const DWORD * current_instruction_ptr,const DWORD * previous_instruction_ptr)904 static HRESULT shader_record_shader_phase(struct wined3d_shader *shader,
905         struct wined3d_shader_phase **current_phase, const struct wined3d_shader_instruction *ins,
906         const DWORD *current_instruction_ptr, const DWORD *previous_instruction_ptr)
907 {
908     struct wined3d_shader_phase *phase;
909 
910     if ((phase = *current_phase))
911     {
912         phase->end = previous_instruction_ptr;
913         *current_phase = NULL;
914     }
915 
916     if (shader->reg_maps.shader_version.type != WINED3D_SHADER_TYPE_HULL)
917     {
918         ERR("Unexpected shader type %#x.\n", shader->reg_maps.shader_version.type);
919         return E_FAIL;
920     }
921 
922     switch (ins->handler_idx)
923     {
924         case WINED3DSIH_HS_CONTROL_POINT_PHASE:
925             if (shader->u.hs.phases.control_point)
926             {
927                 FIXME("Multiple control point phases.\n");
928                 heap_free(shader->u.hs.phases.control_point);
929             }
930             if (!(shader->u.hs.phases.control_point = heap_alloc_zero(sizeof(*shader->u.hs.phases.control_point))))
931                 return E_OUTOFMEMORY;
932             phase = shader->u.hs.phases.control_point;
933             break;
934         case WINED3DSIH_HS_FORK_PHASE:
935             if (!wined3d_array_reserve((void **)&shader->u.hs.phases.fork,
936                     &shader->u.hs.phases.fork_size, shader->u.hs.phases.fork_count + 1,
937                     sizeof(*shader->u.hs.phases.fork)))
938                 return E_OUTOFMEMORY;
939             phase = &shader->u.hs.phases.fork[shader->u.hs.phases.fork_count++];
940             break;
941         case WINED3DSIH_HS_JOIN_PHASE:
942             if (!wined3d_array_reserve((void **)&shader->u.hs.phases.join,
943                     &shader->u.hs.phases.join_size, shader->u.hs.phases.join_count + 1,
944                     sizeof(*shader->u.hs.phases.join)))
945                 return E_OUTOFMEMORY;
946             phase = &shader->u.hs.phases.join[shader->u.hs.phases.join_count++];
947             break;
948         default:
949             ERR("Unexpected opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
950             return E_FAIL;
951     }
952 
953     phase->start = current_instruction_ptr;
954     *current_phase = phase;
955 
956     return WINED3D_OK;
957 }
958 
shader_calculate_clip_or_cull_distance_mask(const struct wined3d_shader_signature_element * e,unsigned int * mask)959 static HRESULT shader_calculate_clip_or_cull_distance_mask(
960         const struct wined3d_shader_signature_element *e, unsigned int *mask)
961 {
962     /* Clip and cull distances are packed in 4 component registers. 0 and 1 are
963      * the only allowed semantic indices.
964      */
965     if (e->semantic_idx >= MAX_CLIP_DISTANCES / 4)
966     {
967         *mask = 0;
968         WARN("Invalid clip/cull distance index %u.\n", e->semantic_idx);
969         return WINED3DERR_INVALIDCALL;
970     }
971 
972     *mask = (e->mask & WINED3DSP_WRITEMASK_ALL) << (4 * e->semantic_idx);
973     return WINED3D_OK;
974 }
975 
wined3d_insert_interpolation_mode(DWORD * packed_interpolation_mode,unsigned int register_idx,enum wined3d_shader_interpolation_mode mode)976 static void wined3d_insert_interpolation_mode(DWORD *packed_interpolation_mode,
977         unsigned int register_idx, enum wined3d_shader_interpolation_mode mode)
978 {
979     if (mode > WINED3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE)
980         FIXME("Unexpected interpolation mode %#x.\n", mode);
981 
982     wined3d_insert_bits(packed_interpolation_mode,
983             register_idx * WINED3D_PACKED_INTERPOLATION_BIT_COUNT, WINED3D_PACKED_INTERPOLATION_BIT_COUNT, mode);
984 }
985 
986 /* Note that this does not count the loop register as an address register. */
shader_get_registers_used(struct wined3d_shader * shader,const struct wined3d_shader_frontend * fe,struct wined3d_shader_reg_maps * reg_maps,struct wined3d_shader_signature * input_signature,struct wined3d_shader_signature * output_signature,DWORD constf_size)987 static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const struct wined3d_shader_frontend *fe,
988         struct wined3d_shader_reg_maps *reg_maps, struct wined3d_shader_signature *input_signature,
989         struct wined3d_shader_signature *output_signature, DWORD constf_size)
990 {
991     struct wined3d_shader_signature_element input_signature_elements[max(MAX_ATTRIBS, MAX_REG_INPUT)];
992     struct wined3d_shader_signature_element output_signature_elements[MAX_REG_OUTPUT];
993     unsigned int cur_loop_depth = 0, max_loop_depth = 0;
994     struct wined3d_shader_version shader_version;
995     struct wined3d_shader_phase *phase = NULL;
996     const DWORD *ptr, *prev_ins, *current_ins;
997     void *fe_data = shader->frontend_data;
998     unsigned int i;
999     HRESULT hr;
1000 
1001     memset(reg_maps, 0, sizeof(*reg_maps));
1002     memset(input_signature_elements, 0, sizeof(input_signature_elements));
1003     memset(output_signature_elements, 0, sizeof(output_signature_elements));
1004     reg_maps->min_rel_offset = ~0U;
1005     list_init(&reg_maps->indexable_temps);
1006 
1007     fe->shader_read_header(fe_data, &ptr, &shader_version);
1008     prev_ins = current_ins = ptr;
1009     reg_maps->shader_version = shader_version;
1010 
1011     shader_set_limits(shader);
1012 
1013     if (!(reg_maps->constf = heap_calloc(((min(shader->limits->constant_float, constf_size) + 31) / 32),
1014             sizeof(*reg_maps->constf))))
1015     {
1016         ERR("Failed to allocate constant map memory.\n");
1017         return E_OUTOFMEMORY;
1018     }
1019 
1020     while (!fe->shader_is_end(fe_data, &ptr))
1021     {
1022         struct wined3d_shader_instruction ins;
1023 
1024         current_ins = ptr;
1025         /* Fetch opcode. */
1026         fe->shader_read_instruction(fe_data, &ptr, &ins);
1027 
1028         /* Unhandled opcode, and its parameters. */
1029         if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
1030         {
1031             WARN("Encountered unrecognised or invalid instruction.\n");
1032             return WINED3DERR_INVALIDCALL;
1033         }
1034 
1035         /* Handle declarations. */
1036         if (ins.handler_idx == WINED3DSIH_DCL
1037                 || ins.handler_idx == WINED3DSIH_DCL_UAV_TYPED)
1038         {
1039             struct wined3d_shader_semantic *semantic = &ins.declaration.semantic;
1040             unsigned int reg_idx = semantic->reg.reg.idx[0].offset;
1041 
1042             switch (semantic->reg.reg.type)
1043             {
1044                 /* Mark input registers used. */
1045                 case WINED3DSPR_INPUT:
1046                     if (reg_idx >= MAX_REG_INPUT)
1047                     {
1048                         ERR("Invalid input register index %u.\n", reg_idx);
1049                         break;
1050                     }
1051                     if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL && shader_version.major == 3
1052                             && semantic->usage == WINED3D_DECL_USAGE_POSITION && !semantic->usage_idx)
1053                         return WINED3DERR_INVALIDCALL;
1054                     reg_maps->input_registers |= 1u << reg_idx;
1055                     shader_signature_from_semantic(&input_signature_elements[reg_idx], semantic);
1056                     break;
1057 
1058                 /* Vertex shader: mark 3.0 output registers used, save token. */
1059                 case WINED3DSPR_OUTPUT:
1060                     if (reg_idx >= MAX_REG_OUTPUT)
1061                     {
1062                         ERR("Invalid output register index %u.\n", reg_idx);
1063                         break;
1064                     }
1065                     reg_maps->output_registers |= 1u << reg_idx;
1066                     shader_signature_from_semantic(&output_signature_elements[reg_idx], semantic);
1067                     if (semantic->usage == WINED3D_DECL_USAGE_FOG)
1068                         reg_maps->fog = 1;
1069                     if (semantic->usage == WINED3D_DECL_USAGE_PSIZE)
1070                         reg_maps->point_size = 1;
1071                     break;
1072 
1073                 case WINED3DSPR_SAMPLER:
1074                     shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1075                 case WINED3DSPR_RESOURCE:
1076                     if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1077                     {
1078                         ERR("Invalid resource index %u.\n", reg_idx);
1079                         break;
1080                     }
1081                     reg_maps->resource_info[reg_idx].type = semantic->resource_type;
1082                     reg_maps->resource_info[reg_idx].data_type = semantic->resource_data_type;
1083                     break;
1084 
1085                 case WINED3DSPR_UAV:
1086                     if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1087                     {
1088                         ERR("Invalid UAV resource index %u.\n", reg_idx);
1089                         break;
1090                     }
1091                     reg_maps->uav_resource_info[reg_idx].type = semantic->resource_type;
1092                     reg_maps->uav_resource_info[reg_idx].data_type = semantic->resource_data_type;
1093                     if (ins.flags)
1094                         FIXME("Ignoring typed UAV flags %#x.\n", ins.flags);
1095                     break;
1096 
1097                 default:
1098                     TRACE("Not recording DCL register type %#x.\n", semantic->reg.reg.type);
1099                     break;
1100             }
1101         }
1102         else if (ins.handler_idx == WINED3DSIH_DCL_CONSTANT_BUFFER)
1103         {
1104             struct wined3d_shader_register *reg = &ins.declaration.src.reg;
1105             if (reg->idx[0].offset >= WINED3D_MAX_CBS)
1106                 ERR("Invalid CB index %u.\n", reg->idx[0].offset);
1107             else
1108                 reg_maps->cb_sizes[reg->idx[0].offset] = reg->idx[1].offset;
1109         }
1110         else if (ins.handler_idx == WINED3DSIH_DCL_GLOBAL_FLAGS)
1111         {
1112             if (ins.flags & WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL)
1113             {
1114                 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1115                     shader->u.ps.force_early_depth_stencil = TRUE;
1116                 else
1117                     FIXME("Invalid instruction %#x for shader type %#x.\n",
1118                             ins.handler_idx, shader_version.type);
1119             }
1120             else
1121             {
1122                 WARN("Ignoring global flags %#x.\n", ins.flags);
1123             }
1124         }
1125         else if (ins.handler_idx == WINED3DSIH_DCL_GS_INSTANCES)
1126         {
1127             if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1128                 shader->u.gs.instance_count = ins.declaration.count;
1129             else
1130                 FIXME("Invalid instruction %#x for shader type %#x.\n",
1131                         ins.handler_idx, shader_version.type);
1132         }
1133         else if (ins.handler_idx == WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT
1134                 || ins.handler_idx == WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT)
1135         {
1136             if (phase)
1137                 phase->instance_count = ins.declaration.count;
1138             else
1139                 FIXME("Instruction %s outside of shader phase.\n",
1140                         debug_d3dshaderinstructionhandler(ins.handler_idx));
1141         }
1142         else if (ins.handler_idx == WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER)
1143         {
1144             if (reg_maps->icb)
1145                 FIXME("Multiple immediate constant buffers.\n");
1146             reg_maps->icb = ins.declaration.icb;
1147         }
1148         else if (ins.handler_idx == WINED3DSIH_DCL_INDEXABLE_TEMP)
1149         {
1150             if (phase)
1151             {
1152                 FIXME("Indexable temporary registers not supported.\n");
1153             }
1154             else
1155             {
1156                 struct wined3d_shader_indexable_temp *reg;
1157 
1158                 if (!(reg = heap_alloc(sizeof(*reg))))
1159                     return E_OUTOFMEMORY;
1160 
1161                 *reg = ins.declaration.indexable_temp;
1162                 list_add_tail(&reg_maps->indexable_temps, &reg->entry);
1163             }
1164         }
1165         else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PRIMITIVE)
1166         {
1167             if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1168                 shader->u.gs.input_type = ins.declaration.primitive_type.type;
1169             else
1170                 FIXME("Invalid instruction %#x for shader type %#x.\n",
1171                         ins.handler_idx, shader_version.type);
1172         }
1173         else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS)
1174         {
1175             unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1176             if (reg_idx >= MAX_REG_INPUT)
1177             {
1178                 ERR("Invalid register index %u.\n", reg_idx);
1179                 break;
1180             }
1181             if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1182                 wined3d_insert_interpolation_mode(shader->u.ps.interpolation_mode, reg_idx, ins.flags);
1183             else
1184                 FIXME("Invalid instruction %#x for shader type %#x.\n",
1185                         ins.handler_idx, shader_version.type);
1186         }
1187         else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT)
1188         {
1189             if (ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUT
1190                     || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTGE
1191                     || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTLE)
1192             {
1193                 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1194                     shader->u.ps.depth_output = ins.declaration.dst.reg.type;
1195                 else
1196                     FIXME("Invalid instruction %#x for shader type %#x.\n",
1197                             ins.handler_idx, shader_version.type);
1198             }
1199         }
1200         else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT)
1201         {
1202             if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1203                 shader->u.hs.output_vertex_count = ins.declaration.count;
1204             else
1205                 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1206         }
1207         else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT_TOPOLOGY)
1208         {
1209             if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1210                 shader->u.gs.output_type = ins.declaration.primitive_type.type;
1211             else
1212                 FIXME("Invalid instruction %#x for shader type %#x.\n",
1213                         ins.handler_idx, shader_version.type);
1214         }
1215         else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_RAW)
1216         {
1217             unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1218             if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1219             {
1220                 ERR("Invalid resource index %u.\n", reg_idx);
1221                 break;
1222             }
1223             reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1224             reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1225             reg_maps->resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
1226         }
1227         else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_STRUCTURED)
1228         {
1229             unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
1230             if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1231             {
1232                 ERR("Invalid resource index %u.\n", reg_idx);
1233                 break;
1234             }
1235             reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1236             reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1237             reg_maps->resource_info[reg_idx].flags = 0;
1238             reg_maps->resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
1239         }
1240         else if (ins.handler_idx == WINED3DSIH_DCL_SAMPLER)
1241         {
1242             if (ins.flags & WINED3DSI_SAMPLER_COMPARISON_MODE)
1243                 reg_maps->sampler_comparison_mode |= (1u << ins.declaration.dst.reg.idx[0].offset);
1244         }
1245         else if (ins.handler_idx == WINED3DSIH_DCL_TEMPS)
1246         {
1247             if (phase)
1248                 phase->temporary_count = ins.declaration.count;
1249             else
1250                 reg_maps->temporary_count = ins.declaration.count;
1251         }
1252         else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_DOMAIN)
1253         {
1254             if (shader_version.type == WINED3D_SHADER_TYPE_DOMAIN)
1255                 shader->u.ds.tessellator_domain = ins.declaration.tessellator_domain;
1256             else if (shader_version.type != WINED3D_SHADER_TYPE_HULL)
1257                 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1258         }
1259         else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE)
1260         {
1261             if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1262                 shader->u.hs.tessellator_output_primitive = ins.declaration.tessellator_output_primitive;
1263             else
1264                 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1265         }
1266         else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_PARTITIONING)
1267         {
1268             if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1269                 shader->u.hs.tessellator_partitioning = ins.declaration.tessellator_partitioning;
1270             else
1271                 FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1272         }
1273         else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_RAW)
1274         {
1275             if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps, ins.declaration.tgsm_raw.reg.reg.idx[0].offset,
1276                     ins.declaration.tgsm_raw.byte_count / 4, 0)))
1277                 return hr;
1278         }
1279         else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_STRUCTURED)
1280         {
1281             unsigned int stride = ins.declaration.tgsm_structured.byte_stride / 4;
1282             unsigned int size = stride * ins.declaration.tgsm_structured.structure_count;
1283             if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps,
1284                     ins.declaration.tgsm_structured.reg.reg.idx[0].offset, size, stride)))
1285                 return hr;
1286         }
1287         else if (ins.handler_idx == WINED3DSIH_DCL_THREAD_GROUP)
1288         {
1289             if (shader_version.type == WINED3D_SHADER_TYPE_COMPUTE)
1290             {
1291                 shader->u.cs.thread_group_size = ins.declaration.thread_group_size;
1292             }
1293             else
1294             {
1295                 FIXME("Invalid instruction %#x for shader type %#x.\n",
1296                         ins.handler_idx, shader_version.type);
1297             }
1298         }
1299         else if (ins.handler_idx == WINED3DSIH_DCL_UAV_RAW)
1300         {
1301             unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1302             if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1303             {
1304                 ERR("Invalid UAV resource index %u.\n", reg_idx);
1305                 break;
1306             }
1307             if (ins.flags)
1308                 FIXME("Ignoring raw UAV flags %#x.\n", ins.flags);
1309             reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1310             reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1311             reg_maps->uav_resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
1312         }
1313         else if (ins.handler_idx == WINED3DSIH_DCL_UAV_STRUCTURED)
1314         {
1315             unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
1316             if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1317             {
1318                 ERR("Invalid UAV resource index %u.\n", reg_idx);
1319                 break;
1320             }
1321             if (ins.flags)
1322                 FIXME("Ignoring structured UAV flags %#x.\n", ins.flags);
1323             reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1324             reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1325             reg_maps->uav_resource_info[reg_idx].flags = 0;
1326             reg_maps->uav_resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
1327         }
1328         else if (ins.handler_idx == WINED3DSIH_DCL_VERTICES_OUT)
1329         {
1330             if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1331                 shader->u.gs.vertices_out = ins.declaration.count;
1332             else
1333                 FIXME("Invalid instruction %#x for shader type %#x.\n",
1334                         ins.handler_idx, shader_version.type);
1335         }
1336         else if (ins.handler_idx == WINED3DSIH_DEF)
1337         {
1338             struct wined3d_shader_lconst *lconst;
1339             float *value;
1340 
1341             if (!(lconst = heap_alloc(sizeof(*lconst))))
1342                 return E_OUTOFMEMORY;
1343 
1344             lconst->idx = ins.dst[0].reg.idx[0].offset;
1345             memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1346             value = (float *)lconst->value;
1347 
1348             /* In pixel shader 1.X shaders, the constants are clamped between [-1;1] */
1349             if (shader_version.major == 1 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1350             {
1351                 if (value[0] < -1.0f) value[0] = -1.0f;
1352                 else if (value[0] > 1.0f) value[0] = 1.0f;
1353                 if (value[1] < -1.0f) value[1] = -1.0f;
1354                 else if (value[1] > 1.0f) value[1] = 1.0f;
1355                 if (value[2] < -1.0f) value[2] = -1.0f;
1356                 else if (value[2] > 1.0f) value[2] = 1.0f;
1357                 if (value[3] < -1.0f) value[3] = -1.0f;
1358                 else if (value[3] > 1.0f) value[3] = 1.0f;
1359             }
1360 
1361             list_add_head(&shader->constantsF, &lconst->entry);
1362 
1363             if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
1364                     || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
1365             {
1366                 shader->lconst_inf_or_nan = TRUE;
1367             }
1368         }
1369         else if (ins.handler_idx == WINED3DSIH_DEFI)
1370         {
1371             struct wined3d_shader_lconst *lconst;
1372 
1373             if (!(lconst = heap_alloc(sizeof(*lconst))))
1374                 return E_OUTOFMEMORY;
1375 
1376             lconst->idx = ins.dst[0].reg.idx[0].offset;
1377             memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1378 
1379             list_add_head(&shader->constantsI, &lconst->entry);
1380             reg_maps->local_int_consts |= (1u << lconst->idx);
1381         }
1382         else if (ins.handler_idx == WINED3DSIH_DEFB)
1383         {
1384             struct wined3d_shader_lconst *lconst;
1385 
1386             if (!(lconst = heap_alloc(sizeof(*lconst))))
1387                 return E_OUTOFMEMORY;
1388 
1389             lconst->idx = ins.dst[0].reg.idx[0].offset;
1390             memcpy(lconst->value, ins.src[0].reg.u.immconst_data, sizeof(DWORD));
1391 
1392             list_add_head(&shader->constantsB, &lconst->entry);
1393             reg_maps->local_bool_consts |= (1u << lconst->idx);
1394         }
1395         /* Handle shader phases. */
1396         else if (ins.handler_idx == WINED3DSIH_HS_CONTROL_POINT_PHASE
1397                 || ins.handler_idx == WINED3DSIH_HS_FORK_PHASE
1398                 || ins.handler_idx == WINED3DSIH_HS_JOIN_PHASE)
1399         {
1400             if (FAILED(hr = shader_record_shader_phase(shader, &phase, &ins, current_ins, prev_ins)))
1401                 return hr;
1402         }
1403         /* For subroutine prototypes. */
1404         else if (ins.handler_idx == WINED3DSIH_LABEL)
1405         {
1406             reg_maps->labels |= 1u << ins.src[0].reg.idx[0].offset;
1407         }
1408         /* Set texture, address, temporary registers. */
1409         else
1410         {
1411             BOOL color0_mov = FALSE;
1412             unsigned int i;
1413 
1414             /* This will loop over all the registers and try to
1415              * make a bitmask of the ones we're interested in.
1416              *
1417              * Relative addressing tokens are ignored, but that's
1418              * okay, since we'll catch any address registers when
1419              * they are initialized (required by spec). */
1420             for (i = 0; i < ins.dst_count; ++i)
1421             {
1422                 if (!shader_record_register_usage(shader, reg_maps, &ins.dst[i].reg,
1423                         shader_version.type, constf_size))
1424                     return WINED3DERR_INVALIDCALL;
1425 
1426                 if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1427                 {
1428                     UINT idx = ins.dst[i].reg.idx[0].offset;
1429 
1430                     switch (ins.dst[i].reg.type)
1431                     {
1432                         case WINED3DSPR_RASTOUT:
1433                             if (shader_version.major >= 3)
1434                                 break;
1435                             switch (idx)
1436                             {
1437                                 case 0: /* oPos */
1438                                     reg_maps->output_registers |= 1u << 10;
1439                                     shader_signature_from_usage(&output_signature_elements[10],
1440                                             WINED3D_DECL_USAGE_POSITION, 0, 10, WINED3DSP_WRITEMASK_ALL);
1441                                     break;
1442 
1443                                 case 1: /* oFog */
1444                                     reg_maps->output_registers |= 1u << 11;
1445                                     shader_signature_from_usage(&output_signature_elements[11],
1446                                             WINED3D_DECL_USAGE_FOG, 0, 11, WINED3DSP_WRITEMASK_0);
1447                                     break;
1448 
1449                                 case 2: /* oPts */
1450                                     reg_maps->output_registers |= 1u << 11;
1451                                     shader_signature_from_usage(&output_signature_elements[11],
1452                                             WINED3D_DECL_USAGE_PSIZE, 0, 11, WINED3DSP_WRITEMASK_1);
1453                                     break;
1454                             }
1455                             break;
1456 
1457                         case WINED3DSPR_ATTROUT:
1458                             if (shader_version.major >= 3)
1459                                 break;
1460                             if (idx < 2)
1461                             {
1462                                 idx += 8;
1463                                 if (reg_maps->output_registers & (1u << idx))
1464                                 {
1465                                     output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1466                                 }
1467                                 else
1468                                 {
1469                                     reg_maps->output_registers |= 1u << idx;
1470                                     shader_signature_from_usage(&output_signature_elements[idx],
1471                                             WINED3D_DECL_USAGE_COLOR, idx - 8, idx, ins.dst[i].write_mask);
1472                                 }
1473                             }
1474                             break;
1475 
1476                         case WINED3DSPR_TEXCRDOUT: /* WINED3DSPR_OUTPUT */
1477                             if (shader_version.major >= 3)
1478                             {
1479                                 if (idx >= ARRAY_SIZE(reg_maps->u.output_registers_mask))
1480                                 {
1481                                     WARN("Invalid output register index %u.\n", idx);
1482                                     break;
1483                                 }
1484                                 reg_maps->u.output_registers_mask[idx] |= ins.dst[i].write_mask;
1485                                 break;
1486                             }
1487                             if (idx >= ARRAY_SIZE(reg_maps->u.texcoord_mask))
1488                             {
1489                                 WARN("Invalid texcoord index %u.\n", idx);
1490                                 break;
1491                             }
1492                             reg_maps->u.texcoord_mask[idx] |= ins.dst[i].write_mask;
1493                             if (reg_maps->output_registers & (1u << idx))
1494                             {
1495                                 output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1496                             }
1497                             else
1498                             {
1499                                 reg_maps->output_registers |= 1u << idx;
1500                                 shader_signature_from_usage(&output_signature_elements[idx],
1501                                         WINED3D_DECL_USAGE_TEXCOORD, idx, idx, ins.dst[i].write_mask);
1502                             }
1503                             break;
1504 
1505                         default:
1506                             break;
1507                     }
1508                 }
1509 
1510                 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1511                 {
1512                     if (ins.dst[i].reg.type == WINED3DSPR_COLOROUT && !ins.dst[i].reg.idx[0].offset)
1513                     {
1514                         /* Many 2.0 and 3.0 pixel shaders end with a MOV from a temp register to
1515                          * COLOROUT 0. If we know this in advance, the ARB shader backend can skip
1516                          * the mov and perform the sRGB write correction from the source register.
1517                          *
1518                          * However, if the mov is only partial, we can't do this, and if the write
1519                          * comes from an instruction other than MOV it is hard to do as well. If
1520                          * COLOROUT 0 is overwritten partially later, the marker is dropped again. */
1521                         shader->u.ps.color0_mov = FALSE;
1522                         if (ins.handler_idx == WINED3DSIH_MOV
1523                                 && ins.dst[i].write_mask == WINED3DSP_WRITEMASK_ALL)
1524                         {
1525                             /* Used later when the source register is read. */
1526                             color0_mov = TRUE;
1527                         }
1528                     }
1529                     /* Also drop the MOV marker if the source register is overwritten prior to the shader
1530                      * end
1531                      */
1532                     else if (ins.dst[i].reg.type == WINED3DSPR_TEMP
1533                             && ins.dst[i].reg.idx[0].offset == shader->u.ps.color0_reg)
1534                     {
1535                         shader->u.ps.color0_mov = FALSE;
1536                     }
1537                 }
1538 
1539                 /* Declare 1.x samplers implicitly, based on the destination reg. number. */
1540                 if (shader_version.major == 1
1541                         && (ins.handler_idx == WINED3DSIH_TEX
1542                             || ins.handler_idx == WINED3DSIH_TEXBEM
1543                             || ins.handler_idx == WINED3DSIH_TEXBEML
1544                             || ins.handler_idx == WINED3DSIH_TEXDP3TEX
1545                             || ins.handler_idx == WINED3DSIH_TEXM3x2TEX
1546                             || ins.handler_idx == WINED3DSIH_TEXM3x3SPEC
1547                             || ins.handler_idx == WINED3DSIH_TEXM3x3TEX
1548                             || ins.handler_idx == WINED3DSIH_TEXM3x3VSPEC
1549                             || ins.handler_idx == WINED3DSIH_TEXREG2AR
1550                             || ins.handler_idx == WINED3DSIH_TEXREG2GB
1551                             || ins.handler_idx == WINED3DSIH_TEXREG2RGB))
1552                 {
1553                     unsigned int reg_idx = ins.dst[i].reg.idx[0].offset;
1554 
1555                     if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1556                     {
1557                         WARN("Invalid 1.x sampler index %u.\n", reg_idx);
1558                         continue;
1559                     }
1560 
1561                     TRACE("Setting fake 2D resource for 1.x pixelshader.\n");
1562                     reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_TEXTURE_2D;
1563                     reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_FLOAT;
1564                     shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1565 
1566                     /* texbem is only valid with < 1.4 pixel shaders */
1567                     if (ins.handler_idx == WINED3DSIH_TEXBEM
1568                             || ins.handler_idx == WINED3DSIH_TEXBEML)
1569                     {
1570                         reg_maps->bumpmat |= 1u << reg_idx;
1571                         if (ins.handler_idx == WINED3DSIH_TEXBEML)
1572                         {
1573                             reg_maps->luminanceparams |= 1u << reg_idx;
1574                         }
1575                     }
1576                 }
1577                 else if (ins.handler_idx == WINED3DSIH_BEM)
1578                 {
1579                     reg_maps->bumpmat |= 1u << ins.dst[i].reg.idx[0].offset;
1580                 }
1581             }
1582 
1583             if (ins.handler_idx == WINED3DSIH_IMM_ATOMIC_ALLOC || ins.handler_idx == WINED3DSIH_IMM_ATOMIC_CONSUME)
1584             {
1585                 unsigned int reg_idx = ins.src[0].reg.idx[0].offset;
1586                 if (reg_idx >= MAX_UNORDERED_ACCESS_VIEWS)
1587                 {
1588                     ERR("Invalid UAV index %u.\n", reg_idx);
1589                     break;
1590                 }
1591                 reg_maps->uav_counter_mask |= (1u << reg_idx);
1592             }
1593             else if ((WINED3DSIH_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_ATOMIC_XOR)
1594                     || (WINED3DSIH_IMM_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_IMM_ATOMIC_XOR)
1595                     || (ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_UAV)
1596                     || ins.handler_idx == WINED3DSIH_LD_UAV_TYPED
1597                     || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_UAV)
1598                     || (ins.handler_idx == WINED3DSIH_LD_STRUCTURED && ins.src[2].reg.type == WINED3DSPR_UAV))
1599             {
1600                 unsigned int reg_idx;
1601                 if (ins.handler_idx == WINED3DSIH_LD_UAV_TYPED || ins.handler_idx == WINED3DSIH_LD_RAW)
1602                     reg_idx = ins.src[1].reg.idx[0].offset;
1603                 else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED)
1604                     reg_idx = ins.src[2].reg.idx[0].offset;
1605                 else if (WINED3DSIH_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_ATOMIC_XOR)
1606                     reg_idx = ins.dst[0].reg.idx[0].offset;
1607                 else if (ins.handler_idx == WINED3DSIH_BUFINFO)
1608                     reg_idx = ins.src[0].reg.idx[0].offset;
1609                 else
1610                     reg_idx = ins.dst[1].reg.idx[0].offset;
1611                 if (reg_idx >= MAX_UNORDERED_ACCESS_VIEWS)
1612                 {
1613                     ERR("Invalid UAV index %u.\n", reg_idx);
1614                     break;
1615                 }
1616                 reg_maps->uav_read_mask |= (1u << reg_idx);
1617             }
1618             else if (ins.handler_idx == WINED3DSIH_NRM)
1619             {
1620                 reg_maps->usesnrm = 1;
1621             }
1622             else if (ins.handler_idx == WINED3DSIH_DSY
1623                     || ins.handler_idx == WINED3DSIH_DSY_COARSE
1624                     || ins.handler_idx == WINED3DSIH_DSY_FINE)
1625             {
1626                 reg_maps->usesdsy = 1;
1627             }
1628             else if (ins.handler_idx == WINED3DSIH_DSX
1629                     || ins.handler_idx == WINED3DSIH_DSX_COARSE
1630                     || ins.handler_idx == WINED3DSIH_DSX_FINE)
1631             {
1632                 reg_maps->usesdsx = 1;
1633             }
1634             else if (ins.handler_idx == WINED3DSIH_TEXLDD) reg_maps->usestexldd = 1;
1635             else if (ins.handler_idx == WINED3DSIH_TEXLDL) reg_maps->usestexldl = 1;
1636             else if (ins.handler_idx == WINED3DSIH_MOVA) reg_maps->usesmova = 1;
1637             else if (ins.handler_idx == WINED3DSIH_IFC) reg_maps->usesifc = 1;
1638             else if (ins.handler_idx == WINED3DSIH_CALL) reg_maps->usescall = 1;
1639             else if (ins.handler_idx == WINED3DSIH_POW) reg_maps->usespow = 1;
1640             else if (ins.handler_idx == WINED3DSIH_LOOP
1641                     || ins.handler_idx == WINED3DSIH_REP)
1642             {
1643                 ++cur_loop_depth;
1644                 if (cur_loop_depth > max_loop_depth)
1645                     max_loop_depth = cur_loop_depth;
1646             }
1647             else if (ins.handler_idx == WINED3DSIH_ENDLOOP
1648                     || ins.handler_idx == WINED3DSIH_ENDREP)
1649             {
1650                 --cur_loop_depth;
1651             }
1652             else if (ins.handler_idx == WINED3DSIH_GATHER4
1653                     || ins.handler_idx == WINED3DSIH_GATHER4_C
1654                     || ins.handler_idx == WINED3DSIH_SAMPLE
1655                     || ins.handler_idx == WINED3DSIH_SAMPLE_B
1656                     || ins.handler_idx == WINED3DSIH_SAMPLE_C
1657                     || ins.handler_idx == WINED3DSIH_SAMPLE_C_LZ
1658                     || ins.handler_idx == WINED3DSIH_SAMPLE_GRAD
1659                     || ins.handler_idx == WINED3DSIH_SAMPLE_LOD)
1660             {
1661                 shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
1662                         ins.src[2].reg.idx[0].offset, reg_maps->sampler_map.count);
1663             }
1664             else if (ins.handler_idx == WINED3DSIH_GATHER4_PO
1665                     || ins.handler_idx == WINED3DSIH_GATHER4_PO_C)
1666             {
1667                 shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
1668                         ins.src[3].reg.idx[0].offset, reg_maps->sampler_map.count);
1669             }
1670             else if (ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE)
1671             {
1672                 shader_record_sample(reg_maps, ins.src[0].reg.idx[0].offset,
1673                         WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1674             }
1675             else if (ins.handler_idx == WINED3DSIH_LD
1676                     || ins.handler_idx == WINED3DSIH_LD2DMS
1677                     || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_RESOURCE)
1678                     || (ins.handler_idx == WINED3DSIH_RESINFO && ins.src[1].reg.type == WINED3DSPR_RESOURCE))
1679             {
1680                 shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
1681                         WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1682             }
1683             else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED
1684                     && ins.src[2].reg.type == WINED3DSPR_RESOURCE)
1685             {
1686                 shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
1687                         WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1688             }
1689 
1690             if (ins.predicate)
1691                 if (!shader_record_register_usage(shader, reg_maps, &ins.predicate->reg,
1692                         shader_version.type, constf_size))
1693                     return WINED3DERR_INVALIDCALL;
1694 
1695             for (i = 0; i < ins.src_count; ++i)
1696             {
1697                 unsigned int count = get_instr_extra_regcount(ins.handler_idx, i);
1698                 struct wined3d_shader_register reg = ins.src[i].reg;
1699 
1700                 if (!shader_record_register_usage(shader, reg_maps, &ins.src[i].reg,
1701                         shader_version.type, constf_size))
1702                     return WINED3DERR_INVALIDCALL;
1703                 while (count)
1704                 {
1705                     ++reg.idx[0].offset;
1706                     if (!shader_record_register_usage(shader, reg_maps, &reg,
1707                             shader_version.type, constf_size))
1708                         return WINED3DERR_INVALIDCALL;
1709                     --count;
1710                 }
1711 
1712                 if (color0_mov)
1713                 {
1714                     if (ins.src[i].reg.type == WINED3DSPR_TEMP
1715                             && ins.src[i].swizzle == WINED3DSP_NOSWIZZLE)
1716                     {
1717                         shader->u.ps.color0_mov = TRUE;
1718                         shader->u.ps.color0_reg = ins.src[i].reg.idx[0].offset;
1719                     }
1720                 }
1721             }
1722         }
1723 
1724         prev_ins = current_ins;
1725     }
1726     reg_maps->loop_depth = max_loop_depth;
1727 
1728     if (phase)
1729     {
1730         phase->end = prev_ins;
1731         phase = NULL;
1732     }
1733 
1734     /* PS before 2.0 don't have explicit color outputs. Instead the value of
1735      * R0 is written to the render target. */
1736     if (shader_version.major < 2 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1737         reg_maps->rt_mask |= (1u << 0);
1738 
1739     if (input_signature->elements)
1740     {
1741         for (i = 0; i < input_signature->element_count; ++i)
1742         {
1743             if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1744             {
1745                 if (input_signature->elements[i].register_idx >= ARRAY_SIZE(shader->u.vs.attributes))
1746                 {
1747                     WARN("Invalid input signature register index %u.\n", input_signature->elements[i].register_idx);
1748                     return WINED3DERR_INVALIDCALL;
1749                 }
1750             }
1751             else if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1752             {
1753                 if (input_signature->elements[i].sysval_semantic == WINED3D_SV_POSITION)
1754                     reg_maps->vpos = 1;
1755                 else if (input_signature->elements[i].sysval_semantic == WINED3D_SV_IS_FRONT_FACE)
1756                     reg_maps->usesfacing = 1;
1757             }
1758             reg_maps->input_registers |= 1u << input_signature->elements[i].register_idx;
1759         }
1760     }
1761     else if (!input_signature->elements && reg_maps->input_registers)
1762     {
1763         unsigned int count = wined3d_popcount(reg_maps->input_registers);
1764         struct wined3d_shader_signature_element *e;
1765         unsigned int i;
1766 
1767         if (!(input_signature->elements = heap_calloc(count, sizeof(*input_signature->elements))))
1768             return E_OUTOFMEMORY;
1769         input_signature->element_count = count;
1770 
1771         e = input_signature->elements;
1772         for (i = 0; i < ARRAY_SIZE(input_signature_elements); ++i)
1773         {
1774             if (!(reg_maps->input_registers & (1u << i)))
1775                 continue;
1776             input_signature_elements[i].register_idx = i;
1777             *e++ = input_signature_elements[i];
1778         }
1779     }
1780 
1781     if (output_signature->elements)
1782     {
1783         for (i = 0; i < output_signature->element_count; ++i)
1784         {
1785             const struct wined3d_shader_signature_element *e = &output_signature->elements[i];
1786             unsigned int mask;
1787 
1788             reg_maps->output_registers |= 1u << e->register_idx;
1789             if (e->sysval_semantic == WINED3D_SV_CLIP_DISTANCE)
1790             {
1791                 if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
1792                     return hr;
1793                 reg_maps->clip_distance_mask |= mask;
1794             }
1795             else if (e->sysval_semantic == WINED3D_SV_CULL_DISTANCE)
1796             {
1797                 if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
1798                     return hr;
1799                 reg_maps->cull_distance_mask |= mask;
1800             }
1801         }
1802     }
1803     else if (reg_maps->output_registers)
1804     {
1805         unsigned int count = wined3d_popcount(reg_maps->output_registers);
1806         struct wined3d_shader_signature_element *e;
1807 
1808         if (!(output_signature->elements = heap_calloc(count, sizeof(*output_signature->elements))))
1809             return E_OUTOFMEMORY;
1810         output_signature->element_count = count;
1811 
1812         e = output_signature->elements;
1813         for (i = 0; i < ARRAY_SIZE(output_signature_elements); ++i)
1814         {
1815             if (!(reg_maps->output_registers & (1u << i)))
1816                 continue;
1817             *e++ = output_signature_elements[i];
1818         }
1819     }
1820 
1821     return WINED3D_OK;
1822 }
1823 
shader_cleanup_reg_maps(struct wined3d_shader_reg_maps * reg_maps)1824 static void shader_cleanup_reg_maps(struct wined3d_shader_reg_maps *reg_maps)
1825 {
1826     struct wined3d_shader_indexable_temp *reg, *reg_next;
1827 
1828     heap_free(reg_maps->constf);
1829     heap_free(reg_maps->sampler_map.entries);
1830 
1831     LIST_FOR_EACH_ENTRY_SAFE(reg, reg_next, &reg_maps->indexable_temps, struct wined3d_shader_indexable_temp, entry)
1832         heap_free(reg);
1833     list_init(&reg_maps->indexable_temps);
1834 
1835     heap_free(reg_maps->tgsm);
1836 }
1837 
shader_find_free_input_register(const struct wined3d_shader_reg_maps * reg_maps,unsigned int max)1838 unsigned int shader_find_free_input_register(const struct wined3d_shader_reg_maps *reg_maps, unsigned int max)
1839 {
1840     DWORD map = 1u << max;
1841     map |= map - 1;
1842     map &= reg_maps->shader_version.major < 3 ? ~reg_maps->texcoord : ~reg_maps->input_registers;
1843 
1844     return wined3d_log2i(map);
1845 }
1846 
shader_dump_global_flags(struct wined3d_string_buffer * buffer,DWORD global_flags)1847 static void shader_dump_global_flags(struct wined3d_string_buffer *buffer, DWORD global_flags)
1848 {
1849     if (global_flags & WINED3DSGF_REFACTORING_ALLOWED)
1850     {
1851         shader_addline(buffer, "refactoringAllowed");
1852         global_flags &= ~WINED3DSGF_REFACTORING_ALLOWED;
1853         if (global_flags)
1854             shader_addline(buffer, " | ");
1855     }
1856 
1857     if (global_flags & WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL)
1858     {
1859         shader_addline(buffer, "forceEarlyDepthStencil");
1860         global_flags &= ~WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL;
1861         if (global_flags)
1862             shader_addline(buffer, " | ");
1863     }
1864 
1865     if (global_flags & WINED3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)
1866     {
1867         shader_addline(buffer, "enableRawAndStructuredBuffers");
1868         global_flags &= ~WINED3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS;
1869     }
1870 
1871     if (global_flags)
1872         shader_addline(buffer, "unknown_flags(%#x)", global_flags);
1873 }
1874 
shader_dump_sync_flags(struct wined3d_string_buffer * buffer,DWORD sync_flags)1875 static void shader_dump_sync_flags(struct wined3d_string_buffer *buffer, DWORD sync_flags)
1876 {
1877     if (sync_flags & WINED3DSSF_GROUP_SHARED_MEMORY)
1878     {
1879         shader_addline(buffer, "_g");
1880         sync_flags &= ~WINED3DSSF_GROUP_SHARED_MEMORY;
1881     }
1882     if (sync_flags & WINED3DSSF_THREAD_GROUP)
1883     {
1884         shader_addline(buffer, "_t");
1885         sync_flags &= ~WINED3DSSF_THREAD_GROUP;
1886     }
1887 
1888     if (sync_flags)
1889         shader_addline(buffer, "_unknown_flags(%#x)", sync_flags);
1890 }
1891 
shader_dump_uav_flags(struct wined3d_string_buffer * buffer,DWORD uav_flags)1892 static void shader_dump_uav_flags(struct wined3d_string_buffer *buffer, DWORD uav_flags)
1893 {
1894     if (uav_flags & WINED3DSUF_GLOBALLY_COHERENT)
1895     {
1896         shader_addline(buffer, "_glc");
1897         uav_flags &= ~WINED3DSUF_GLOBALLY_COHERENT;
1898     }
1899     if (uav_flags & WINED3DSUF_ORDER_PRESERVING_COUNTER)
1900     {
1901         shader_addline(buffer, "_opc");
1902         uav_flags &= ~WINED3DSUF_ORDER_PRESERVING_COUNTER;
1903     }
1904 
1905     if (uav_flags)
1906         shader_addline(buffer, "_unknown_flags(%#x)", uav_flags);
1907 }
1908 
shader_dump_tessellator_domain(struct wined3d_string_buffer * buffer,enum wined3d_tessellator_domain domain)1909 static void shader_dump_tessellator_domain(struct wined3d_string_buffer *buffer,
1910         enum wined3d_tessellator_domain domain)
1911 {
1912     switch (domain)
1913     {
1914         case WINED3D_TESSELLATOR_DOMAIN_LINE:
1915             shader_addline(buffer, "line");
1916             break;
1917         case WINED3D_TESSELLATOR_DOMAIN_TRIANGLE:
1918             shader_addline(buffer, "triangle");
1919             break;
1920         case WINED3D_TESSELLATOR_DOMAIN_QUAD:
1921             shader_addline(buffer, "quad");
1922             break;
1923         default:
1924             shader_addline(buffer, "unknown_tessellator_domain(%#x)", domain);
1925             break;
1926     }
1927 }
1928 
shader_dump_tessellator_output_primitive(struct wined3d_string_buffer * buffer,enum wined3d_tessellator_output_primitive output_primitive)1929 static void shader_dump_tessellator_output_primitive(struct wined3d_string_buffer *buffer,
1930         enum wined3d_tessellator_output_primitive output_primitive)
1931 {
1932     switch (output_primitive)
1933     {
1934         case WINED3D_TESSELLATOR_OUTPUT_POINT:
1935             shader_addline(buffer, "point");
1936             break;
1937         case WINED3D_TESSELLATOR_OUTPUT_LINE:
1938             shader_addline(buffer, "line");
1939             break;
1940         case WINED3D_TESSELLATOR_OUTPUT_TRIANGLE_CW:
1941             shader_addline(buffer, "triangle_cw");
1942             break;
1943         case WINED3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
1944             shader_addline(buffer, "triangle_ccw");
1945             break;
1946         default:
1947             shader_addline(buffer, "unknown_tessellator_output_primitive(%#x)", output_primitive);
1948             break;
1949     }
1950 }
1951 
shader_dump_tessellator_partitioning(struct wined3d_string_buffer * buffer,enum wined3d_tessellator_partitioning partitioning)1952 static void shader_dump_tessellator_partitioning(struct wined3d_string_buffer *buffer,
1953         enum wined3d_tessellator_partitioning partitioning)
1954 {
1955     switch (partitioning)
1956     {
1957         case WINED3D_TESSELLATOR_PARTITIONING_INTEGER:
1958             shader_addline(buffer, "integer");
1959             break;
1960         case WINED3D_TESSELLATOR_PARTITIONING_POW2:
1961             shader_addline(buffer, "pow2");
1962             break;
1963         case WINED3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
1964             shader_addline(buffer, "fractional_odd");
1965             break;
1966         case WINED3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
1967             shader_addline(buffer, "fractional_even");
1968             break;
1969         default:
1970             shader_addline(buffer, "unknown_tessellator_partitioning(%#x)", partitioning);
1971             break;
1972     }
1973 }
1974 
shader_dump_shader_input_sysval_semantic(struct wined3d_string_buffer * buffer,enum wined3d_shader_input_sysval_semantic semantic)1975 static void shader_dump_shader_input_sysval_semantic(struct wined3d_string_buffer *buffer,
1976         enum wined3d_shader_input_sysval_semantic semantic)
1977 {
1978     unsigned int i;
1979 
1980     for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i)
1981     {
1982         if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic)
1983         {
1984             shader_addline(buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name);
1985             return;
1986         }
1987     }
1988 
1989     shader_addline(buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic);
1990 }
1991 
shader_dump_decl_usage(struct wined3d_string_buffer * buffer,const struct wined3d_shader_semantic * semantic,unsigned int flags,const struct wined3d_shader_version * shader_version)1992 static void shader_dump_decl_usage(struct wined3d_string_buffer *buffer,
1993         const struct wined3d_shader_semantic *semantic, unsigned int flags,
1994         const struct wined3d_shader_version *shader_version)
1995 {
1996     shader_addline(buffer, "dcl");
1997 
1998     if (semantic->reg.reg.type == WINED3DSPR_SAMPLER)
1999     {
2000         switch (semantic->resource_type)
2001         {
2002             case WINED3D_SHADER_RESOURCE_TEXTURE_2D:
2003                 shader_addline(buffer, "_2d");
2004                 break;
2005 
2006             case WINED3D_SHADER_RESOURCE_TEXTURE_3D:
2007                 shader_addline(buffer, "_3d");
2008                 break;
2009 
2010             case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE:
2011                 shader_addline(buffer, "_cube");
2012                 break;
2013 
2014             default:
2015                 shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type);
2016                 break;
2017         }
2018     }
2019     else if (semantic->reg.reg.type == WINED3DSPR_RESOURCE || semantic->reg.reg.type == WINED3DSPR_UAV)
2020     {
2021         if (semantic->reg.reg.type == WINED3DSPR_RESOURCE)
2022             shader_addline(buffer, "_resource_");
2023         else
2024             shader_addline(buffer, "_uav_");
2025         switch (semantic->resource_type)
2026         {
2027             case WINED3D_SHADER_RESOURCE_BUFFER:
2028                 shader_addline(buffer, "buffer");
2029                 break;
2030 
2031             case WINED3D_SHADER_RESOURCE_TEXTURE_1D:
2032                 shader_addline(buffer, "texture1d");
2033                 break;
2034 
2035             case WINED3D_SHADER_RESOURCE_TEXTURE_2D:
2036                 shader_addline(buffer, "texture2d");
2037                 break;
2038 
2039             case WINED3D_SHADER_RESOURCE_TEXTURE_2DMS:
2040                 shader_addline(buffer, "texture2dms");
2041                 break;
2042 
2043             case WINED3D_SHADER_RESOURCE_TEXTURE_3D:
2044                 shader_addline(buffer, "texture3d");
2045                 break;
2046 
2047             case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE:
2048                 shader_addline(buffer, "texturecube");
2049                 break;
2050 
2051             case WINED3D_SHADER_RESOURCE_TEXTURE_1DARRAY:
2052                 shader_addline(buffer, "texture1darray");
2053                 break;
2054 
2055             case WINED3D_SHADER_RESOURCE_TEXTURE_2DARRAY:
2056                 shader_addline(buffer, "texture2darray");
2057                 break;
2058 
2059             case WINED3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY:
2060                 shader_addline(buffer, "texture2dmsarray");
2061                 break;
2062 
2063             case WINED3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY:
2064                 shader_addline(buffer, "texturecubearray");
2065                 break;
2066 
2067             default:
2068                 shader_addline(buffer, "unknown");
2069                 break;
2070         }
2071         if (semantic->reg.reg.type == WINED3DSPR_UAV)
2072             shader_dump_uav_flags(buffer, flags);
2073         switch (semantic->resource_data_type)
2074         {
2075             case WINED3D_DATA_FLOAT:
2076                 shader_addline(buffer, " (float)");
2077                 break;
2078 
2079             case WINED3D_DATA_INT:
2080                 shader_addline(buffer, " (int)");
2081                 break;
2082 
2083             case WINED3D_DATA_UINT:
2084                 shader_addline(buffer, " (uint)");
2085                 break;
2086 
2087             case WINED3D_DATA_UNORM:
2088                 shader_addline(buffer, " (unorm)");
2089                 break;
2090 
2091             case WINED3D_DATA_SNORM:
2092                 shader_addline(buffer, " (snorm)");
2093                 break;
2094 
2095             default:
2096                 shader_addline(buffer, " (unknown)");
2097                 break;
2098         }
2099     }
2100     else
2101     {
2102         /* Pixel shaders 3.0 don't have usage semantics. */
2103         if (shader_version->major < 3 && shader_version->type == WINED3D_SHADER_TYPE_PIXEL)
2104             return;
2105         else
2106             shader_addline(buffer, "_");
2107 
2108         switch (semantic->usage)
2109         {
2110             case WINED3D_DECL_USAGE_POSITION:
2111                 shader_addline(buffer, "position%u", semantic->usage_idx);
2112                 break;
2113 
2114             case WINED3D_DECL_USAGE_BLEND_INDICES:
2115                 shader_addline(buffer, "blend");
2116                 break;
2117 
2118             case WINED3D_DECL_USAGE_BLEND_WEIGHT:
2119                 shader_addline(buffer, "weight");
2120                 break;
2121 
2122             case WINED3D_DECL_USAGE_NORMAL:
2123                 shader_addline(buffer, "normal%u", semantic->usage_idx);
2124                 break;
2125 
2126             case WINED3D_DECL_USAGE_PSIZE:
2127                 shader_addline(buffer, "psize");
2128                 break;
2129 
2130             case WINED3D_DECL_USAGE_COLOR:
2131                 if (!semantic->usage_idx)
2132                     shader_addline(buffer, "color");
2133                 else
2134                     shader_addline(buffer, "specular%u", (semantic->usage_idx - 1));
2135                 break;
2136 
2137             case WINED3D_DECL_USAGE_TEXCOORD:
2138                 shader_addline(buffer, "texture%u", semantic->usage_idx);
2139                 break;
2140 
2141             case WINED3D_DECL_USAGE_TANGENT:
2142                 shader_addline(buffer, "tangent");
2143                 break;
2144 
2145             case WINED3D_DECL_USAGE_BINORMAL:
2146                 shader_addline(buffer, "binormal");
2147                 break;
2148 
2149             case WINED3D_DECL_USAGE_TESS_FACTOR:
2150                 shader_addline(buffer, "tessfactor");
2151                 break;
2152 
2153             case WINED3D_DECL_USAGE_POSITIONT:
2154                 shader_addline(buffer, "positionT%u", semantic->usage_idx);
2155                 break;
2156 
2157             case WINED3D_DECL_USAGE_FOG:
2158                 shader_addline(buffer, "fog");
2159                 break;
2160 
2161             case WINED3D_DECL_USAGE_DEPTH:
2162                 shader_addline(buffer, "depth");
2163                 break;
2164 
2165             case WINED3D_DECL_USAGE_SAMPLE:
2166                 shader_addline(buffer, "sample");
2167                 break;
2168 
2169             default:
2170                 shader_addline(buffer, "<unknown_semantic(%#x)>", semantic->usage);
2171                 FIXME("Unrecognised semantic usage %#x.\n", semantic->usage);
2172         }
2173     }
2174 }
2175 
shader_dump_register(struct wined3d_string_buffer * buffer,const struct wined3d_shader_register * reg,const struct wined3d_shader_version * shader_version)2176 static void shader_dump_register(struct wined3d_string_buffer *buffer,
2177         const struct wined3d_shader_register *reg, const struct wined3d_shader_version *shader_version)
2178 {
2179     static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"};
2180     static const char * const misctype_reg_names[] = {"vPos", "vFace"};
2181     UINT offset = reg->idx[0].offset;
2182 
2183     switch (reg->type)
2184     {
2185         case WINED3DSPR_TEMP:
2186             shader_addline(buffer, "r");
2187             break;
2188 
2189         case WINED3DSPR_INPUT:
2190             shader_addline(buffer, "v");
2191             break;
2192 
2193         case WINED3DSPR_CONST:
2194         case WINED3DSPR_CONST2:
2195         case WINED3DSPR_CONST3:
2196         case WINED3DSPR_CONST4:
2197             shader_addline(buffer, "c");
2198             offset = shader_get_float_offset(reg->type, offset);
2199             break;
2200 
2201         case WINED3DSPR_TEXTURE: /* vs: case WINED3DSPR_ADDR */
2202             shader_addline(buffer, "%c", shader_version->type == WINED3D_SHADER_TYPE_PIXEL ? 't' : 'a');
2203             break;
2204 
2205         case WINED3DSPR_RASTOUT:
2206             shader_addline(buffer, "%s", rastout_reg_names[offset]);
2207             break;
2208 
2209         case WINED3DSPR_COLOROUT:
2210             shader_addline(buffer, "oC");
2211             break;
2212 
2213         case WINED3DSPR_DEPTHOUT:
2214             shader_addline(buffer, "oDepth");
2215             break;
2216 
2217         case WINED3DSPR_DEPTHOUTGE:
2218             shader_addline(buffer, "oDepthGE");
2219             break;
2220 
2221         case WINED3DSPR_DEPTHOUTLE:
2222             shader_addline(buffer, "oDepthLE");
2223             break;
2224 
2225         case WINED3DSPR_ATTROUT:
2226             shader_addline(buffer, "oD");
2227             break;
2228 
2229         case WINED3DSPR_TEXCRDOUT:
2230             /* Vertex shaders >= 3.0 use general purpose output registers
2231              * (WINED3DSPR_OUTPUT), which can include an address token. */
2232             if (shader_version->major >= 3)
2233                 shader_addline(buffer, "o");
2234             else
2235                 shader_addline(buffer, "oT");
2236             break;
2237 
2238         case WINED3DSPR_CONSTINT:
2239             shader_addline(buffer, "i");
2240             break;
2241 
2242         case WINED3DSPR_CONSTBOOL:
2243             shader_addline(buffer, "b");
2244             break;
2245 
2246         case WINED3DSPR_LABEL:
2247             shader_addline(buffer, "l");
2248             break;
2249 
2250         case WINED3DSPR_LOOP:
2251             shader_addline(buffer, "aL");
2252             break;
2253 
2254         case WINED3DSPR_SAMPLER:
2255             shader_addline(buffer, "s");
2256             break;
2257 
2258         case WINED3DSPR_MISCTYPE:
2259             if (offset > 1)
2260             {
2261                 FIXME("Unhandled misctype register %u.\n", offset);
2262                 shader_addline(buffer, "<unhandled misctype %#x>", offset);
2263             }
2264             else
2265             {
2266                 shader_addline(buffer, "%s", misctype_reg_names[offset]);
2267             }
2268             break;
2269 
2270         case WINED3DSPR_PREDICATE:
2271             shader_addline(buffer, "p");
2272             break;
2273 
2274         case WINED3DSPR_IMMCONST:
2275             shader_addline(buffer, "l");
2276             break;
2277 
2278         case WINED3DSPR_CONSTBUFFER:
2279             shader_addline(buffer, "cb");
2280             break;
2281 
2282         case WINED3DSPR_IMMCONSTBUFFER:
2283             shader_addline(buffer, "icb");
2284             break;
2285 
2286         case WINED3DSPR_PRIMID:
2287             shader_addline(buffer, "primID");
2288             break;
2289 
2290         case WINED3DSPR_NULL:
2291             shader_addline(buffer, "null");
2292             break;
2293 
2294         case WINED3DSPR_RESOURCE:
2295             shader_addline(buffer, "t");
2296             break;
2297 
2298         case WINED3DSPR_UAV:
2299             shader_addline(buffer, "u");
2300             break;
2301 
2302         case WINED3DSPR_OUTPOINTID:
2303             shader_addline(buffer, "vOutputControlPointID");
2304             break;
2305 
2306         case WINED3DSPR_FORKINSTID:
2307             shader_addline(buffer, "vForkInstanceId");
2308             break;
2309 
2310         case WINED3DSPR_JOININSTID:
2311             shader_addline(buffer, "vJoinInstanceId");
2312             break;
2313 
2314         case WINED3DSPR_INCONTROLPOINT:
2315             shader_addline(buffer, "vicp");
2316             break;
2317 
2318         case WINED3DSPR_OUTCONTROLPOINT:
2319             shader_addline(buffer, "vocp");
2320             break;
2321 
2322         case WINED3DSPR_PATCHCONST:
2323             shader_addline(buffer, "vpc");
2324             break;
2325 
2326         case WINED3DSPR_TESSCOORD:
2327             shader_addline(buffer, "vDomainLocation");
2328             break;
2329 
2330         case WINED3DSPR_GROUPSHAREDMEM:
2331             shader_addline(buffer, "g");
2332             break;
2333 
2334         case WINED3DSPR_THREADID:
2335             shader_addline(buffer, "vThreadID");
2336             break;
2337 
2338         case WINED3DSPR_THREADGROUPID:
2339             shader_addline(buffer, "vThreadGroupID");
2340             break;
2341 
2342         case WINED3DSPR_LOCALTHREADID:
2343             shader_addline(buffer, "vThreadIDInGroup");
2344             break;
2345 
2346         case WINED3DSPR_LOCALTHREADINDEX:
2347             shader_addline(buffer, "vThreadIDInGroupFlattened");
2348             break;
2349 
2350         case WINED3DSPR_IDXTEMP:
2351             shader_addline(buffer, "x");
2352             break;
2353 
2354         case WINED3DSPR_STREAM:
2355             shader_addline(buffer, "m");
2356             break;
2357 
2358         case WINED3DSPR_FUNCTIONBODY:
2359             shader_addline(buffer, "fb");
2360             break;
2361 
2362         case WINED3DSPR_FUNCTIONPOINTER:
2363             shader_addline(buffer, "fp");
2364             break;
2365 
2366         case WINED3DSPR_COVERAGE:
2367             shader_addline(buffer, "vCoverage");
2368             break;
2369 
2370         case WINED3DSPR_SAMPLEMASK:
2371             shader_addline(buffer, "oMask");
2372             break;
2373 
2374         case WINED3DSPR_GSINSTID:
2375             shader_addline(buffer, "vGSInstanceID");
2376             break;
2377 
2378         default:
2379             shader_addline(buffer, "<unhandled_rtype(%#x)>", reg->type);
2380             break;
2381     }
2382 
2383     if (reg->type == WINED3DSPR_IMMCONST)
2384     {
2385         shader_addline(buffer, "(");
2386         switch (reg->immconst_type)
2387         {
2388             case WINED3D_IMMCONST_SCALAR:
2389                 switch (reg->data_type)
2390                 {
2391                     case WINED3D_DATA_FLOAT:
2392                         shader_addline(buffer, "%.8e", *(const float *)reg->u.immconst_data);
2393                         break;
2394                     case WINED3D_DATA_INT:
2395                         shader_addline(buffer, "%d", reg->u.immconst_data[0]);
2396                         break;
2397                     case WINED3D_DATA_RESOURCE:
2398                     case WINED3D_DATA_SAMPLER:
2399                     case WINED3D_DATA_UINT:
2400                         shader_addline(buffer, "%u", reg->u.immconst_data[0]);
2401                         break;
2402                     default:
2403                         shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
2404                         break;
2405                 }
2406                 break;
2407 
2408             case WINED3D_IMMCONST_VEC4:
2409                 switch (reg->data_type)
2410                 {
2411                     case WINED3D_DATA_FLOAT:
2412                         shader_addline(buffer, "%.8e, %.8e, %.8e, %.8e",
2413                                 *(const float *)&reg->u.immconst_data[0], *(const float *)&reg->u.immconst_data[1],
2414                                 *(const float *)&reg->u.immconst_data[2], *(const float *)&reg->u.immconst_data[3]);
2415                         break;
2416                     case WINED3D_DATA_INT:
2417                         shader_addline(buffer, "%d, %d, %d, %d",
2418                                 reg->u.immconst_data[0], reg->u.immconst_data[1],
2419                                 reg->u.immconst_data[2], reg->u.immconst_data[3]);
2420                         break;
2421                     case WINED3D_DATA_RESOURCE:
2422                     case WINED3D_DATA_SAMPLER:
2423                     case WINED3D_DATA_UINT:
2424                         shader_addline(buffer, "%u, %u, %u, %u",
2425                                 reg->u.immconst_data[0], reg->u.immconst_data[1],
2426                                 reg->u.immconst_data[2], reg->u.immconst_data[3]);
2427                         break;
2428                     default:
2429                         shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
2430                         break;
2431                 }
2432                 break;
2433 
2434             default:
2435                 shader_addline(buffer, "<unhandled immconst_type %#x>", reg->immconst_type);
2436                 break;
2437         }
2438         shader_addline(buffer, ")");
2439     }
2440     else if (reg->type != WINED3DSPR_RASTOUT
2441             && reg->type != WINED3DSPR_MISCTYPE
2442             && reg->type != WINED3DSPR_NULL)
2443     {
2444         if (offset != ~0u)
2445         {
2446             shader_addline(buffer, "[");
2447             if (reg->idx[0].rel_addr)
2448             {
2449                 shader_dump_src_param(buffer, reg->idx[0].rel_addr, shader_version);
2450                 shader_addline(buffer, " + ");
2451             }
2452             shader_addline(buffer, "%u]", offset);
2453 
2454             if (reg->idx[1].offset != ~0u)
2455             {
2456                 shader_addline(buffer, "[");
2457                 if (reg->idx[1].rel_addr)
2458                 {
2459                     shader_dump_src_param(buffer, reg->idx[1].rel_addr, shader_version);
2460                     shader_addline(buffer, " + ");
2461                 }
2462                 shader_addline(buffer, "%u]", reg->idx[1].offset);
2463             }
2464         }
2465 
2466         if (reg->type == WINED3DSPR_FUNCTIONPOINTER)
2467             shader_addline(buffer, "[%u]", reg->u.fp_body_idx);
2468     }
2469 }
2470 
shader_dump_dst_param(struct wined3d_string_buffer * buffer,const struct wined3d_shader_dst_param * param,const struct wined3d_shader_version * shader_version)2471 static void shader_dump_dst_param(struct wined3d_string_buffer *buffer,
2472         const struct wined3d_shader_dst_param *param, const struct wined3d_shader_version *shader_version)
2473 {
2474     DWORD write_mask = param->write_mask;
2475 
2476     shader_dump_register(buffer, &param->reg, shader_version);
2477 
2478     if (write_mask && write_mask != WINED3DSP_WRITEMASK_ALL)
2479     {
2480         static const char write_mask_chars[] = "xyzw";
2481 
2482         shader_addline(buffer, ".");
2483         if (write_mask & WINED3DSP_WRITEMASK_0)
2484             shader_addline(buffer, "%c", write_mask_chars[0]);
2485         if (write_mask & WINED3DSP_WRITEMASK_1)
2486             shader_addline(buffer, "%c", write_mask_chars[1]);
2487         if (write_mask & WINED3DSP_WRITEMASK_2)
2488             shader_addline(buffer, "%c", write_mask_chars[2]);
2489         if (write_mask & WINED3DSP_WRITEMASK_3)
2490             shader_addline(buffer, "%c", write_mask_chars[3]);
2491     }
2492 }
2493 
shader_dump_src_param(struct wined3d_string_buffer * buffer,const struct wined3d_shader_src_param * param,const struct wined3d_shader_version * shader_version)2494 static void shader_dump_src_param(struct wined3d_string_buffer *buffer,
2495         const struct wined3d_shader_src_param *param, const struct wined3d_shader_version *shader_version)
2496 {
2497     enum wined3d_shader_src_modifier src_modifier = param->modifiers;
2498     DWORD swizzle = param->swizzle;
2499 
2500     if (src_modifier == WINED3DSPSM_NEG
2501             || src_modifier == WINED3DSPSM_BIASNEG
2502             || src_modifier == WINED3DSPSM_SIGNNEG
2503             || src_modifier == WINED3DSPSM_X2NEG
2504             || src_modifier == WINED3DSPSM_ABSNEG)
2505         shader_addline(buffer, "-");
2506     else if (src_modifier == WINED3DSPSM_COMP)
2507         shader_addline(buffer, "1-");
2508     else if (src_modifier == WINED3DSPSM_NOT)
2509         shader_addline(buffer, "!");
2510 
2511     if (src_modifier == WINED3DSPSM_ABS || src_modifier == WINED3DSPSM_ABSNEG)
2512         shader_addline(buffer, "abs(");
2513 
2514     shader_dump_register(buffer, &param->reg, shader_version);
2515 
2516     switch (src_modifier)
2517     {
2518         case WINED3DSPSM_NONE:    break;
2519         case WINED3DSPSM_NEG:     break;
2520         case WINED3DSPSM_NOT:     break;
2521         case WINED3DSPSM_BIAS:    shader_addline(buffer, "_bias"); break;
2522         case WINED3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break;
2523         case WINED3DSPSM_SIGN:    shader_addline(buffer, "_bx2"); break;
2524         case WINED3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break;
2525         case WINED3DSPSM_COMP:    break;
2526         case WINED3DSPSM_X2:      shader_addline(buffer, "_x2"); break;
2527         case WINED3DSPSM_X2NEG:   shader_addline(buffer, "_x2"); break;
2528         case WINED3DSPSM_DZ:      shader_addline(buffer, "_dz"); break;
2529         case WINED3DSPSM_DW:      shader_addline(buffer, "_dw"); break;
2530         case WINED3DSPSM_ABSNEG:  shader_addline(buffer, ")"); break;
2531         case WINED3DSPSM_ABS:     shader_addline(buffer, ")"); break;
2532         default:                  shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier);
2533     }
2534 
2535     if (swizzle != WINED3DSP_NOSWIZZLE)
2536     {
2537         static const char swizzle_chars[] = "xyzw";
2538         DWORD swizzle_x = swizzle & 0x03;
2539         DWORD swizzle_y = (swizzle >> 2) & 0x03;
2540         DWORD swizzle_z = (swizzle >> 4) & 0x03;
2541         DWORD swizzle_w = (swizzle >> 6) & 0x03;
2542 
2543         if (swizzle_x == swizzle_y
2544                 && swizzle_x == swizzle_z
2545                 && swizzle_x == swizzle_w)
2546         {
2547             shader_addline(buffer, ".%c", swizzle_chars[swizzle_x]);
2548         }
2549         else
2550         {
2551             shader_addline(buffer, ".%c%c%c%c", swizzle_chars[swizzle_x], swizzle_chars[swizzle_y],
2552                     swizzle_chars[swizzle_z], swizzle_chars[swizzle_w]);
2553         }
2554     }
2555 }
2556 
2557 /* Shared code in order to generate the bulk of the shader string. */
shader_generate_code(const struct wined3d_shader * shader,struct wined3d_string_buffer * buffer,const struct wined3d_shader_reg_maps * reg_maps,void * backend_ctx,const DWORD * start,const DWORD * end)2558 HRESULT shader_generate_code(const struct wined3d_shader *shader, struct wined3d_string_buffer *buffer,
2559         const struct wined3d_shader_reg_maps *reg_maps, void *backend_ctx,
2560         const DWORD *start, const DWORD *end)
2561 {
2562     struct wined3d_device *device = shader->device;
2563     const struct wined3d_shader_frontend *fe = shader->frontend;
2564     void *fe_data = shader->frontend_data;
2565     struct wined3d_shader_version shader_version;
2566     struct wined3d_shader_parser_state state;
2567     struct wined3d_shader_instruction ins;
2568     struct wined3d_shader_tex_mx tex_mx;
2569     struct wined3d_shader_context ctx;
2570     const DWORD *ptr;
2571 
2572     /* Initialize current parsing state. */
2573     tex_mx.current_row = 0;
2574     state.current_loop_depth = 0;
2575     state.current_loop_reg = 0;
2576     state.in_subroutine = FALSE;
2577 
2578     ctx.shader = shader;
2579     ctx.gl_info = &device->adapter->gl_info;
2580     ctx.reg_maps = reg_maps;
2581     ctx.buffer = buffer;
2582     ctx.tex_mx = &tex_mx;
2583     ctx.state = &state;
2584     ctx.backend_data = backend_ctx;
2585     ins.ctx = &ctx;
2586 
2587     fe->shader_read_header(fe_data, &ptr, &shader_version);
2588     if (start)
2589         ptr = start;
2590 
2591     while (!fe->shader_is_end(fe_data, &ptr) && ptr != end)
2592     {
2593         /* Read opcode. */
2594         fe->shader_read_instruction(fe_data, &ptr, &ins);
2595 
2596         /* Unknown opcode and its parameters. */
2597         if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
2598         {
2599             WARN("Encountered unrecognised or invalid instruction.\n");
2600             return WINED3DERR_INVALIDCALL;
2601         }
2602 
2603         if (ins.predicate)
2604             FIXME("Predicates not implemented.\n");
2605 
2606         /* Call appropriate function for output target */
2607         device->shader_backend->shader_handle_instruction(&ins);
2608     }
2609 
2610     return WINED3D_OK;
2611 }
2612 
shader_dump_ins_modifiers(struct wined3d_string_buffer * buffer,const struct wined3d_shader_dst_param * dst)2613 static void shader_dump_ins_modifiers(struct wined3d_string_buffer *buffer,
2614         const struct wined3d_shader_dst_param *dst)
2615 {
2616     DWORD mmask = dst->modifiers;
2617 
2618     switch (dst->shift)
2619     {
2620         case 0: break;
2621         case 13: shader_addline(buffer, "_d8"); break;
2622         case 14: shader_addline(buffer, "_d4"); break;
2623         case 15: shader_addline(buffer, "_d2"); break;
2624         case 1: shader_addline(buffer, "_x2"); break;
2625         case 2: shader_addline(buffer, "_x4"); break;
2626         case 3: shader_addline(buffer, "_x8"); break;
2627         default: shader_addline(buffer, "_unhandled_shift(%d)", dst->shift); break;
2628     }
2629 
2630     if (mmask & WINED3DSPDM_SATURATE)         shader_addline(buffer, "_sat");
2631     if (mmask & WINED3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp");
2632     if (mmask & WINED3DSPDM_MSAMPCENTROID)    shader_addline(buffer, "_centroid");
2633 
2634     mmask &= ~(WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION | WINED3DSPDM_MSAMPCENTROID);
2635     if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask);
2636 }
2637 
shader_dump_primitive_type(struct wined3d_string_buffer * buffer,const struct wined3d_shader_primitive_type * primitive_type)2638 static void shader_dump_primitive_type(struct wined3d_string_buffer *buffer,
2639         const struct wined3d_shader_primitive_type *primitive_type)
2640 {
2641     switch (primitive_type->type)
2642     {
2643         case WINED3D_PT_UNDEFINED:
2644             shader_addline(buffer, "undefined");
2645             break;
2646         case WINED3D_PT_POINTLIST:
2647             shader_addline(buffer, "pointlist");
2648             break;
2649         case WINED3D_PT_LINELIST:
2650             shader_addline(buffer, "linelist");
2651             break;
2652         case WINED3D_PT_LINESTRIP:
2653             shader_addline(buffer, "linestrip");
2654             break;
2655         case WINED3D_PT_TRIANGLELIST:
2656             shader_addline(buffer, "trianglelist");
2657             break;
2658         case WINED3D_PT_TRIANGLESTRIP:
2659             shader_addline(buffer, "trianglestrip");
2660             break;
2661         case WINED3D_PT_TRIANGLEFAN:
2662             shader_addline(buffer, "trianglefan");
2663             break;
2664         case WINED3D_PT_LINELIST_ADJ:
2665             shader_addline(buffer, "linelist_adj");
2666             break;
2667         case WINED3D_PT_LINESTRIP_ADJ:
2668             shader_addline(buffer, "linestrip_adj");
2669             break;
2670         case WINED3D_PT_TRIANGLELIST_ADJ:
2671             shader_addline(buffer, "trianglelist_adj");
2672             break;
2673         case WINED3D_PT_TRIANGLESTRIP_ADJ:
2674             shader_addline(buffer, "trianglestrip_adj");
2675             break;
2676         case WINED3D_PT_PATCH:
2677             shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count);
2678             break;
2679         default:
2680             shader_addline(buffer, "<unrecognized_primitive_type %#x>", primitive_type->type);
2681             break;
2682     }
2683 }
2684 
shader_dump_interpolation_mode(struct wined3d_string_buffer * buffer,enum wined3d_shader_interpolation_mode interpolation_mode)2685 static void shader_dump_interpolation_mode(struct wined3d_string_buffer *buffer,
2686         enum wined3d_shader_interpolation_mode interpolation_mode)
2687 {
2688     switch (interpolation_mode)
2689     {
2690         case WINED3DSIM_CONSTANT:
2691             shader_addline(buffer, "constant");
2692             break;
2693         case WINED3DSIM_LINEAR:
2694             shader_addline(buffer, "linear");
2695             break;
2696         case WINED3DSIM_LINEAR_CENTROID:
2697             shader_addline(buffer, "linear centroid");
2698             break;
2699         case WINED3DSIM_LINEAR_NOPERSPECTIVE:
2700             shader_addline(buffer, "linear noperspective");
2701             break;
2702         case WINED3DSIM_LINEAR_SAMPLE:
2703             shader_addline(buffer, "linear sample");
2704             break;
2705         case WINED3DSIM_LINEAR_NOPERSPECTIVE_CENTROID:
2706             shader_addline(buffer, "linear noperspective centroid");
2707             break;
2708         case WINED3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE:
2709             shader_addline(buffer, "linear noperspective sample");
2710             break;
2711         default:
2712             shader_addline(buffer, "<unrecognized_interpolation_mode %#x>", interpolation_mode);
2713             break;
2714     }
2715 }
2716 
shader_trace_init(const struct wined3d_shader_frontend * fe,void * fe_data)2717 static void shader_trace_init(const struct wined3d_shader_frontend *fe, void *fe_data)
2718 {
2719     struct wined3d_shader_version shader_version;
2720     struct wined3d_string_buffer buffer;
2721     const char *type_prefix;
2722     const char *p, *q;
2723     const DWORD *ptr;
2724     DWORD i;
2725 
2726     if (!string_buffer_init(&buffer))
2727     {
2728         ERR("Failed to initialize string buffer.\n");
2729         return;
2730     }
2731 
2732     fe->shader_read_header(fe_data, &ptr, &shader_version);
2733 
2734     TRACE("Parsing %p.\n", ptr);
2735 
2736     switch (shader_version.type)
2737     {
2738         case WINED3D_SHADER_TYPE_VERTEX:
2739             type_prefix = "vs";
2740             break;
2741 
2742         case WINED3D_SHADER_TYPE_HULL:
2743             type_prefix = "hs";
2744             break;
2745 
2746         case WINED3D_SHADER_TYPE_DOMAIN:
2747             type_prefix = "ds";
2748             break;
2749 
2750         case WINED3D_SHADER_TYPE_GEOMETRY:
2751             type_prefix = "gs";
2752             break;
2753 
2754         case WINED3D_SHADER_TYPE_PIXEL:
2755             type_prefix = "ps";
2756             break;
2757 
2758         case WINED3D_SHADER_TYPE_COMPUTE:
2759             type_prefix = "cs";
2760             break;
2761 
2762         default:
2763             FIXME("Unhandled shader type %#x.\n", shader_version.type);
2764             type_prefix = "unknown";
2765             break;
2766     }
2767 
2768     shader_addline(&buffer, "%s_%u_%u\n", type_prefix, shader_version.major, shader_version.minor);
2769 
2770     while (!fe->shader_is_end(fe_data, &ptr))
2771     {
2772         struct wined3d_shader_instruction ins;
2773 
2774         fe->shader_read_instruction(fe_data, &ptr, &ins);
2775         if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
2776         {
2777             WARN("Skipping unrecognized instruction.\n");
2778             shader_addline(&buffer, "<unrecognized instruction>\n");
2779             continue;
2780         }
2781 
2782         if (ins.handler_idx == WINED3DSIH_DCL || ins.handler_idx == WINED3DSIH_DCL_UAV_TYPED)
2783         {
2784             shader_dump_decl_usage(&buffer, &ins.declaration.semantic, ins.flags, &shader_version);
2785             shader_dump_ins_modifiers(&buffer, &ins.declaration.semantic.reg);
2786             shader_addline(&buffer, " ");
2787             shader_dump_dst_param(&buffer, &ins.declaration.semantic.reg, &shader_version);
2788         }
2789         else if (ins.handler_idx == WINED3DSIH_DCL_CONSTANT_BUFFER)
2790         {
2791             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2792             shader_dump_src_param(&buffer, &ins.declaration.src, &shader_version);
2793             shader_addline(&buffer, ", %s",
2794                     ins.flags & WINED3DSI_INDEXED_DYNAMIC ? "dynamicIndexed" : "immediateIndexed");
2795         }
2796         else if (ins.handler_idx == WINED3DSIH_DCL_FUNCTION_BODY)
2797         {
2798             shader_addline(&buffer, "%s fb%u",
2799                     shader_opcode_names[ins.handler_idx], ins.declaration.index);
2800         }
2801         else if (ins.handler_idx == WINED3DSIH_DCL_FUNCTION_TABLE)
2802         {
2803             shader_addline(&buffer, "%s ft%u = {...}",
2804                     shader_opcode_names[ins.handler_idx], ins.declaration.index);
2805         }
2806         else if (ins.handler_idx == WINED3DSIH_DCL_GLOBAL_FLAGS)
2807         {
2808             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2809             shader_dump_global_flags(&buffer, ins.flags);
2810         }
2811         else if (ins.handler_idx == WINED3DSIH_DCL_HS_MAX_TESSFACTOR)
2812         {
2813             shader_addline(&buffer, "%s %.8e", shader_opcode_names[ins.handler_idx],
2814                     ins.declaration.max_tessellation_factor);
2815         }
2816         else if (ins.handler_idx == WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER)
2817         {
2818             shader_addline(&buffer, "%s {\n", shader_opcode_names[ins.handler_idx]);
2819             for (i = 0; i < ins.declaration.icb->vec4_count; ++i)
2820             {
2821                 shader_addline(&buffer, "    {0x%08x, 0x%08x, 0x%08x, 0x%08x},\n",
2822                         ins.declaration.icb->data[4 * i + 0],
2823                         ins.declaration.icb->data[4 * i + 1],
2824                         ins.declaration.icb->data[4 * i + 2],
2825                         ins.declaration.icb->data[4 * i + 3]);
2826             }
2827             shader_addline(&buffer, "}");
2828         }
2829         else if (ins.handler_idx == WINED3DSIH_DCL_INDEX_RANGE)
2830         {
2831             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2832             shader_dump_dst_param(&buffer, &ins.declaration.index_range.first_register, &shader_version);
2833             shader_addline(&buffer, " %u", ins.declaration.index_range.last_register);
2834         }
2835         else if (ins.handler_idx == WINED3DSIH_DCL_INDEXABLE_TEMP)
2836         {
2837             shader_addline(&buffer, "%s x[%u][%u], %u", shader_opcode_names[ins.handler_idx],
2838                     ins.declaration.indexable_temp.register_idx,
2839                     ins.declaration.indexable_temp.register_size,
2840                     ins.declaration.indexable_temp.component_count);
2841         }
2842         else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS)
2843         {
2844             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2845             shader_dump_interpolation_mode(&buffer, ins.flags);
2846             shader_addline(&buffer, " ");
2847             shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2848         }
2849         else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS_SGV
2850                 || ins.handler_idx == WINED3DSIH_DCL_INPUT_SGV
2851                 || ins.handler_idx == WINED3DSIH_DCL_INPUT_SIV
2852                 || ins.handler_idx == WINED3DSIH_DCL_OUTPUT_SIV)
2853         {
2854             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2855             shader_dump_dst_param(&buffer, &ins.declaration.register_semantic.reg, &shader_version);
2856             shader_addline(&buffer, ", ");
2857             shader_dump_shader_input_sysval_semantic(&buffer, ins.declaration.register_semantic.sysval_semantic);
2858         }
2859         else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS_SIV)
2860         {
2861             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2862             shader_dump_interpolation_mode(&buffer, ins.flags);
2863             shader_addline(&buffer, " ");
2864             shader_dump_dst_param(&buffer, &ins.declaration.register_semantic.reg, &shader_version);
2865             shader_addline(&buffer, ", ");
2866             shader_dump_shader_input_sysval_semantic(&buffer, ins.declaration.register_semantic.sysval_semantic);
2867         }
2868         else if (ins.handler_idx == WINED3DSIH_DCL_INPUT
2869                 || ins.handler_idx == WINED3DSIH_DCL_OUTPUT)
2870         {
2871             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2872             shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2873         }
2874         else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PRIMITIVE
2875                 || ins.handler_idx == WINED3DSIH_DCL_OUTPUT_TOPOLOGY)
2876         {
2877             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2878             shader_dump_primitive_type(&buffer, &ins.declaration.primitive_type);
2879         }
2880         else if (ins.handler_idx == WINED3DSIH_DCL_INTERFACE)
2881         {
2882             shader_addline(&buffer, "%s fp[%u][%u][%u] = {...}",
2883                     shader_opcode_names[ins.handler_idx], ins.declaration.fp.index,
2884                     ins.declaration.fp.array_size, ins.declaration.fp.body_count);
2885         }
2886         else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_RAW)
2887         {
2888             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2889             shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2890         }
2891         else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_STRUCTURED)
2892         {
2893             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2894             shader_dump_dst_param(&buffer, &ins.declaration.structured_resource.reg, &shader_version);
2895             shader_addline(&buffer, ", %u", ins.declaration.structured_resource.byte_stride);
2896         }
2897         else if (ins.handler_idx == WINED3DSIH_DCL_SAMPLER)
2898         {
2899             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2900             shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2901             if (ins.flags == WINED3DSI_SAMPLER_COMPARISON_MODE)
2902                 shader_addline(&buffer, ", comparisonMode");
2903         }
2904         else if (ins.handler_idx == WINED3DSIH_DCL_TEMPS
2905                 || ins.handler_idx == WINED3DSIH_DCL_GS_INSTANCES
2906                 || ins.handler_idx == WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT
2907                 || ins.handler_idx == WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT
2908                 || ins.handler_idx == WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT
2909                 || ins.handler_idx == WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT
2910                 || ins.handler_idx == WINED3DSIH_DCL_VERTICES_OUT)
2911         {
2912             shader_addline(&buffer, "%s %u", shader_opcode_names[ins.handler_idx], ins.declaration.count);
2913         }
2914         else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_DOMAIN)
2915         {
2916             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2917             shader_dump_tessellator_domain(&buffer, ins.declaration.tessellator_domain);
2918         }
2919         else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE)
2920         {
2921             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2922             shader_dump_tessellator_output_primitive(&buffer, ins.declaration.tessellator_output_primitive);
2923         }
2924         else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_PARTITIONING)
2925         {
2926             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2927             shader_dump_tessellator_partitioning(&buffer, ins.declaration.tessellator_partitioning);
2928         }
2929         else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_RAW)
2930         {
2931             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2932             shader_dump_dst_param(&buffer, &ins.declaration.tgsm_raw.reg, &shader_version);
2933             shader_addline(&buffer, ", %u", ins.declaration.tgsm_raw.byte_count);
2934         }
2935         else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_STRUCTURED)
2936         {
2937             shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2938             shader_dump_dst_param(&buffer, &ins.declaration.tgsm_structured.reg, &shader_version);
2939             shader_addline(&buffer, ", %u, %u", ins.declaration.tgsm_structured.byte_stride,
2940                     ins.declaration.tgsm_structured.structure_count);
2941         }
2942         else if (ins.handler_idx == WINED3DSIH_DCL_THREAD_GROUP)
2943         {
2944             shader_addline(&buffer, "%s %u, %u, %u", shader_opcode_names[ins.handler_idx],
2945                     ins.declaration.thread_group_size.x,
2946                     ins.declaration.thread_group_size.y,
2947                     ins.declaration.thread_group_size.z);
2948         }
2949         else if (ins.handler_idx == WINED3DSIH_DCL_UAV_RAW)
2950         {
2951             shader_addline(&buffer, "%s", shader_opcode_names[ins.handler_idx]);
2952             shader_dump_uav_flags(&buffer, ins.flags);
2953             shader_addline(&buffer, " ");
2954             shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2955         }
2956         else if (ins.handler_idx == WINED3DSIH_DCL_UAV_STRUCTURED)
2957         {
2958             shader_addline(&buffer, "%s", shader_opcode_names[ins.handler_idx]);
2959             shader_dump_uav_flags(&buffer, ins.flags);
2960             shader_addline(&buffer, " ");
2961             shader_dump_dst_param(&buffer, &ins.declaration.structured_resource.reg, &shader_version);
2962             shader_addline(&buffer, ", %u", ins.declaration.structured_resource.byte_stride);
2963         }
2964         else if (ins.handler_idx == WINED3DSIH_DEF)
2965         {
2966             shader_addline(&buffer, "def c%u = %.8e, %.8e, %.8e, %.8e", shader_get_float_offset(ins.dst[0].reg.type,
2967                     ins.dst[0].reg.idx[0].offset),
2968                     *(const float *)&ins.src[0].reg.u.immconst_data[0],
2969                     *(const float *)&ins.src[0].reg.u.immconst_data[1],
2970                     *(const float *)&ins.src[0].reg.u.immconst_data[2],
2971                     *(const float *)&ins.src[0].reg.u.immconst_data[3]);
2972         }
2973         else if (ins.handler_idx == WINED3DSIH_DEFI)
2974         {
2975             shader_addline(&buffer, "defi i%u = %d, %d, %d, %d", ins.dst[0].reg.idx[0].offset,
2976                     ins.src[0].reg.u.immconst_data[0],
2977                     ins.src[0].reg.u.immconst_data[1],
2978                     ins.src[0].reg.u.immconst_data[2],
2979                     ins.src[0].reg.u.immconst_data[3]);
2980         }
2981         else if (ins.handler_idx == WINED3DSIH_DEFB)
2982         {
2983             shader_addline(&buffer, "defb b%u = %s",
2984                     ins.dst[0].reg.idx[0].offset, ins.src[0].reg.u.immconst_data[0] ? "true" : "false");
2985         }
2986         else
2987         {
2988             if (ins.predicate)
2989             {
2990                 shader_addline(&buffer, "(");
2991                 shader_dump_src_param(&buffer, ins.predicate, &shader_version);
2992                 shader_addline(&buffer, ") ");
2993             }
2994 
2995             /* PixWin marks instructions with the coissue flag with a '+' */
2996             if (ins.coissue)
2997                 shader_addline(&buffer, "+");
2998 
2999             shader_addline(&buffer, "%s", shader_opcode_names[ins.handler_idx]);
3000 
3001             if (ins.handler_idx == WINED3DSIH_BREAKP
3002                     || ins.handler_idx == WINED3DSIH_CONTINUEP
3003                     || ins.handler_idx == WINED3DSIH_IF
3004                     || ins.handler_idx == WINED3DSIH_RETP
3005                     || ins.handler_idx == WINED3DSIH_TEXKILL)
3006             {
3007                 switch (ins.flags)
3008                 {
3009                     case WINED3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(&buffer, "_nz"); break;
3010                     case WINED3D_SHADER_CONDITIONAL_OP_Z:  shader_addline(&buffer, "_z"); break;
3011                     default: shader_addline(&buffer, "_unrecognized(%#x)", ins.flags); break;
3012                 }
3013             }
3014             else if (ins.handler_idx == WINED3DSIH_IFC
3015                     || ins.handler_idx == WINED3DSIH_BREAKC)
3016             {
3017                 switch (ins.flags)
3018                 {
3019                     case WINED3D_SHADER_REL_OP_GT: shader_addline(&buffer, "_gt"); break;
3020                     case WINED3D_SHADER_REL_OP_EQ: shader_addline(&buffer, "_eq"); break;
3021                     case WINED3D_SHADER_REL_OP_GE: shader_addline(&buffer, "_ge"); break;
3022                     case WINED3D_SHADER_REL_OP_LT: shader_addline(&buffer, "_lt"); break;
3023                     case WINED3D_SHADER_REL_OP_NE: shader_addline(&buffer, "_ne"); break;
3024                     case WINED3D_SHADER_REL_OP_LE: shader_addline(&buffer, "_le"); break;
3025                     default: shader_addline(&buffer, "_(%u)", ins.flags);
3026                 }
3027             }
3028             else if (ins.handler_idx == WINED3DSIH_TEX
3029                     && shader_version.major >= 2
3030                     && (ins.flags & WINED3DSI_TEXLD_PROJECT))
3031             {
3032                 shader_addline(&buffer, "p");
3033             }
3034             else if (ins.handler_idx == WINED3DSIH_RESINFO && ins.flags)
3035             {
3036                 switch (ins.flags)
3037                 {
3038                     case WINED3DSI_RESINFO_RCP_FLOAT: shader_addline(&buffer, "_rcpFloat"); break;
3039                     case WINED3DSI_RESINFO_UINT: shader_addline(&buffer, "_uint"); break;
3040                     default: shader_addline(&buffer, "_unrecognized(%#x)", ins.flags);
3041                 }
3042             }
3043             else if (ins.handler_idx == WINED3DSIH_SAMPLE_INFO && ins.flags)
3044             {
3045                 switch (ins.flags)
3046                 {
3047                     case WINED3DSI_SAMPLE_INFO_UINT: shader_addline(&buffer, "_uint"); break;
3048                     default: shader_addline(&buffer, "_unrecognized(%#x)", ins.flags);
3049                 }
3050             }
3051             else if (ins.handler_idx == WINED3DSIH_SYNC)
3052             {
3053                 shader_dump_sync_flags(&buffer, ins.flags);
3054             }
3055 
3056             if (wined3d_shader_instruction_has_texel_offset(&ins))
3057                 shader_addline(&buffer, "(%d,%d,%d)", ins.texel_offset.u, ins.texel_offset.v, ins.texel_offset.w);
3058 
3059             for (i = 0; i < ins.dst_count; ++i)
3060             {
3061                 shader_dump_ins_modifiers(&buffer, &ins.dst[i]);
3062                 shader_addline(&buffer, !i ? " " : ", ");
3063                 shader_dump_dst_param(&buffer, &ins.dst[i], &shader_version);
3064             }
3065 
3066             /* Other source tokens */
3067             for (i = ins.dst_count; i < (ins.dst_count + ins.src_count); ++i)
3068             {
3069                 shader_addline(&buffer, !i ? " " : ", ");
3070                 shader_dump_src_param(&buffer, &ins.src[i - ins.dst_count], &shader_version);
3071             }
3072         }
3073         shader_addline(&buffer, "\n");
3074     }
3075 
3076     for (p = buffer.buffer; *p; p = q)
3077     {
3078         if (!(q = strstr(p, "\n")))
3079             q = p + strlen(p);
3080         else
3081             ++q;
3082         TRACE("    %.*s", (int)(q - p), p);
3083     }
3084 
3085     string_buffer_free(&buffer);
3086 }
3087 
shader_cleanup(struct wined3d_shader * shader)3088 static void shader_cleanup(struct wined3d_shader *shader)
3089 {
3090     if (shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_HULL)
3091     {
3092         heap_free(shader->u.hs.phases.control_point);
3093         heap_free(shader->u.hs.phases.fork);
3094         heap_free(shader->u.hs.phases.join);
3095     }
3096     else if (shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
3097     {
3098         heap_free(shader->u.gs.so_desc.elements);
3099     }
3100 
3101     heap_free(shader->patch_constant_signature.elements);
3102     heap_free(shader->output_signature.elements);
3103     heap_free(shader->input_signature.elements);
3104     heap_free(shader->signature_strings);
3105     shader->device->shader_backend->shader_destroy(shader);
3106     shader_cleanup_reg_maps(&shader->reg_maps);
3107     heap_free(shader->function);
3108     shader_delete_constant_list(&shader->constantsF);
3109     shader_delete_constant_list(&shader->constantsB);
3110     shader_delete_constant_list(&shader->constantsI);
3111     list_remove(&shader->shader_list_entry);
3112 
3113     if (shader->frontend && shader->frontend_data)
3114         shader->frontend->shader_free(shader->frontend_data);
3115 }
3116 
3117 struct shader_none_priv
3118 {
3119     const struct wined3d_vertex_pipe_ops *vertex_pipe;
3120     const struct fragment_pipeline *fragment_pipe;
3121     BOOL ffp_proj_control;
3122 };
3123 
shader_none_handle_instruction(const struct wined3d_shader_instruction * ins)3124 static void shader_none_handle_instruction(const struct wined3d_shader_instruction *ins) {}
shader_none_precompile(void * shader_priv,struct wined3d_shader * shader)3125 static void shader_none_precompile(void *shader_priv, struct wined3d_shader *shader) {}
shader_none_select_compute(void * shader_priv,struct wined3d_context * context,const struct wined3d_state * state)3126 static void shader_none_select_compute(void *shader_priv, struct wined3d_context *context,
3127         const struct wined3d_state *state) {}
shader_none_update_float_vertex_constants(struct wined3d_device * device,UINT start,UINT count)3128 static void shader_none_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) {}
shader_none_update_float_pixel_constants(struct wined3d_device * device,UINT start,UINT count)3129 static void shader_none_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) {}
shader_none_load_constants(void * shader_priv,struct wined3d_context * context,const struct wined3d_state * state)3130 static void shader_none_load_constants(void *shader_priv, struct wined3d_context *context,
3131         const struct wined3d_state *state) {}
shader_none_destroy(struct wined3d_shader * shader)3132 static void shader_none_destroy(struct wined3d_shader *shader) {}
shader_none_free_context_data(struct wined3d_context * context)3133 static void shader_none_free_context_data(struct wined3d_context *context) {}
shader_none_init_context_state(struct wined3d_context * context)3134 static void shader_none_init_context_state(struct wined3d_context *context) {}
3135 
3136 /* Context activation is done by the caller. */
shader_none_select(void * shader_priv,struct wined3d_context * context,const struct wined3d_state * state)3137 static void shader_none_select(void *shader_priv, struct wined3d_context *context,
3138         const struct wined3d_state *state)
3139 {
3140     const struct wined3d_gl_info *gl_info = context->gl_info;
3141     struct shader_none_priv *priv = shader_priv;
3142 
3143     priv->vertex_pipe->vp_enable(gl_info, !use_vs(state));
3144     priv->fragment_pipe->enable_extension(gl_info, !use_ps(state));
3145 }
3146 
3147 /* Context activation is done by the caller. */
shader_none_disable(void * shader_priv,struct wined3d_context * context)3148 static void shader_none_disable(void *shader_priv, struct wined3d_context *context)
3149 {
3150     struct shader_none_priv *priv = shader_priv;
3151     const struct wined3d_gl_info *gl_info = context->gl_info;
3152 
3153     priv->vertex_pipe->vp_enable(gl_info, FALSE);
3154     priv->fragment_pipe->enable_extension(gl_info, FALSE);
3155 
3156     context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL)
3157             | (1u << WINED3D_SHADER_TYPE_VERTEX)
3158             | (1u << WINED3D_SHADER_TYPE_GEOMETRY)
3159             | (1u << WINED3D_SHADER_TYPE_HULL)
3160             | (1u << WINED3D_SHADER_TYPE_DOMAIN)
3161             | (1u << WINED3D_SHADER_TYPE_COMPUTE);
3162 }
3163 
shader_none_alloc(struct wined3d_device * device,const struct wined3d_vertex_pipe_ops * vertex_pipe,const struct fragment_pipeline * fragment_pipe)3164 static HRESULT shader_none_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe,
3165         const struct fragment_pipeline *fragment_pipe)
3166 {
3167     struct fragment_caps fragment_caps;
3168     void *vertex_priv, *fragment_priv;
3169     struct shader_none_priv *priv;
3170 
3171     if (!(priv = heap_alloc(sizeof(*priv))))
3172         return E_OUTOFMEMORY;
3173 
3174     if (!(vertex_priv = vertex_pipe->vp_alloc(&none_shader_backend, priv)))
3175     {
3176         ERR("Failed to initialize vertex pipe.\n");
3177         heap_free(priv);
3178         return E_FAIL;
3179     }
3180 
3181     if (!(fragment_priv = fragment_pipe->alloc_private(&none_shader_backend, priv)))
3182     {
3183         ERR("Failed to initialize fragment pipe.\n");
3184         vertex_pipe->vp_free(device);
3185         heap_free(priv);
3186         return E_FAIL;
3187     }
3188 
3189     priv->vertex_pipe = vertex_pipe;
3190     priv->fragment_pipe = fragment_pipe;
3191     fragment_pipe->get_caps(&device->adapter->gl_info, &fragment_caps);
3192     priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL;
3193 
3194     device->vertex_priv = vertex_priv;
3195     device->fragment_priv = fragment_priv;
3196     device->shader_priv = priv;
3197 
3198     return WINED3D_OK;
3199 }
3200 
shader_none_free(struct wined3d_device * device)3201 static void shader_none_free(struct wined3d_device *device)
3202 {
3203     struct shader_none_priv *priv = device->shader_priv;
3204 
3205     priv->fragment_pipe->free_private(device);
3206     priv->vertex_pipe->vp_free(device);
3207     heap_free(priv);
3208 }
3209 
shader_none_allocate_context_data(struct wined3d_context * context)3210 static BOOL shader_none_allocate_context_data(struct wined3d_context *context)
3211 {
3212     return TRUE;
3213 }
3214 
shader_none_get_caps(const struct wined3d_gl_info * gl_info,struct shader_caps * caps)3215 static void shader_none_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps)
3216 {
3217     /* Set the shader caps to 0 for the none shader backend */
3218     caps->vs_version = 0;
3219     caps->hs_version = 0;
3220     caps->ds_version = 0;
3221     caps->gs_version = 0;
3222     caps->ps_version = 0;
3223     caps->cs_version = 0;
3224     caps->vs_uniform_count = 0;
3225     caps->ps_uniform_count = 0;
3226     caps->ps_1x_max_value = 0.0f;
3227     caps->varying_count = 0;
3228     caps->wined3d_caps = 0;
3229 }
3230 
shader_none_color_fixup_supported(struct color_fixup_desc fixup)3231 static BOOL shader_none_color_fixup_supported(struct color_fixup_desc fixup)
3232 {
3233     /* We "support" every possible fixup, since we don't support any shader
3234      * model, and will never have to actually sample a texture. */
3235     return TRUE;
3236 }
3237 
shader_none_has_ffp_proj_control(void * shader_priv)3238 static BOOL shader_none_has_ffp_proj_control(void *shader_priv)
3239 {
3240     struct shader_none_priv *priv = shader_priv;
3241 
3242     return priv->ffp_proj_control;
3243 }
3244 
3245 const struct wined3d_shader_backend_ops none_shader_backend =
3246 {
3247     shader_none_handle_instruction,
3248     shader_none_precompile,
3249     shader_none_select,
3250     shader_none_select_compute,
3251     shader_none_disable,
3252     shader_none_update_float_vertex_constants,
3253     shader_none_update_float_pixel_constants,
3254     shader_none_load_constants,
3255     shader_none_destroy,
3256     shader_none_alloc,
3257     shader_none_free,
3258     shader_none_allocate_context_data,
3259     shader_none_free_context_data,
3260     shader_none_init_context_state,
3261     shader_none_get_caps,
3262     shader_none_color_fixup_supported,
3263     shader_none_has_ffp_proj_control,
3264 };
3265 
shader_set_function(struct wined3d_shader * shader,DWORD float_const_count,enum wined3d_shader_type type,unsigned int max_version)3266 static HRESULT shader_set_function(struct wined3d_shader *shader, DWORD float_const_count,
3267         enum wined3d_shader_type type, unsigned int max_version)
3268 {
3269     struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
3270     const struct wined3d_shader_frontend *fe;
3271     HRESULT hr;
3272     unsigned int backend_version;
3273     const struct wined3d_d3d_info *d3d_info = &shader->device->adapter->d3d_info;
3274 
3275     TRACE("shader %p, float_const_count %u, type %#x, max_version %u.\n",
3276             shader, float_const_count, type, max_version);
3277 
3278     fe = shader->frontend;
3279     if (!(shader->frontend_data = fe->shader_init(shader->function,
3280             shader->functionLength, &shader->output_signature)))
3281     {
3282         FIXME("Failed to initialize frontend.\n");
3283         return WINED3DERR_INVALIDCALL;
3284     }
3285 
3286     /* First pass: trace shader. */
3287     if (TRACE_ON(d3d_shader))
3288         shader_trace_init(fe, shader->frontend_data);
3289 
3290     /* Second pass: figure out which registers are used, what the semantics are, etc. */
3291     if (FAILED(hr = shader_get_registers_used(shader, fe, reg_maps, &shader->input_signature,
3292             &shader->output_signature, float_const_count)))
3293         return hr;
3294 
3295     if (reg_maps->shader_version.type != type)
3296     {
3297         WARN("Wrong shader type %d.\n", reg_maps->shader_version.type);
3298         return WINED3DERR_INVALIDCALL;
3299     }
3300     if (reg_maps->shader_version.major > max_version)
3301     {
3302         WARN("Shader version %d not supported by this D3D API version.\n", reg_maps->shader_version.major);
3303         return WINED3DERR_INVALIDCALL;
3304     }
3305     switch (type)
3306     {
3307         case WINED3D_SHADER_TYPE_VERTEX:
3308             backend_version = d3d_info->limits.vs_version;
3309             break;
3310         case WINED3D_SHADER_TYPE_HULL:
3311             backend_version = d3d_info->limits.hs_version;
3312             break;
3313         case WINED3D_SHADER_TYPE_DOMAIN:
3314             backend_version = d3d_info->limits.ds_version;
3315             break;
3316         case WINED3D_SHADER_TYPE_GEOMETRY:
3317             backend_version = d3d_info->limits.gs_version;
3318             break;
3319         case WINED3D_SHADER_TYPE_PIXEL:
3320             backend_version = d3d_info->limits.ps_version;
3321             break;
3322         case WINED3D_SHADER_TYPE_COMPUTE:
3323             backend_version = d3d_info->limits.cs_version;
3324             break;
3325         default:
3326             FIXME("No backend version-checking for this shader type.\n");
3327             backend_version = 0;
3328     }
3329     if (reg_maps->shader_version.major > backend_version)
3330     {
3331         WARN("Shader version %d.%d not supported by your GPU with the current shader backend.\n",
3332                 reg_maps->shader_version.major, reg_maps->shader_version.minor);
3333         return WINED3DERR_INVALIDCALL;
3334     }
3335 
3336     return WINED3D_OK;
3337 }
3338 
wined3d_shader_incref(struct wined3d_shader * shader)3339 ULONG CDECL wined3d_shader_incref(struct wined3d_shader *shader)
3340 {
3341     ULONG refcount = InterlockedIncrement(&shader->ref);
3342 
3343     TRACE("%p increasing refcount to %u.\n", shader, refcount);
3344 
3345     return refcount;
3346 }
3347 
wined3d_shader_init_object(void * object)3348 static void wined3d_shader_init_object(void *object)
3349 {
3350     struct wined3d_shader *shader = object;
3351     struct wined3d_device *device = shader->device;
3352 
3353     list_add_head(&device->shaders, &shader->shader_list_entry);
3354 
3355     device->shader_backend->shader_precompile(device->shader_priv, shader);
3356 }
3357 
wined3d_shader_destroy_object(void * object)3358 static void wined3d_shader_destroy_object(void *object)
3359 {
3360     shader_cleanup(object);
3361     heap_free(object);
3362 }
3363 
wined3d_shader_decref(struct wined3d_shader * shader)3364 ULONG CDECL wined3d_shader_decref(struct wined3d_shader *shader)
3365 {
3366     ULONG refcount = InterlockedDecrement(&shader->ref);
3367 
3368     TRACE("%p decreasing refcount to %u.\n", shader, refcount);
3369 
3370     if (!refcount)
3371     {
3372         shader->parent_ops->wined3d_object_destroyed(shader->parent);
3373         wined3d_cs_destroy_object(shader->device->cs, wined3d_shader_destroy_object, shader);
3374     }
3375 
3376     return refcount;
3377 }
3378 
wined3d_shader_get_parent(const struct wined3d_shader * shader)3379 void * CDECL wined3d_shader_get_parent(const struct wined3d_shader *shader)
3380 {
3381     TRACE("shader %p.\n", shader);
3382 
3383     return shader->parent;
3384 }
3385 
wined3d_shader_get_byte_code(const struct wined3d_shader * shader,void * byte_code,UINT * byte_code_size)3386 HRESULT CDECL wined3d_shader_get_byte_code(const struct wined3d_shader *shader,
3387         void *byte_code, UINT *byte_code_size)
3388 {
3389     TRACE("shader %p, byte_code %p, byte_code_size %p.\n", shader, byte_code, byte_code_size);
3390 
3391     if (!byte_code)
3392     {
3393         *byte_code_size = shader->functionLength;
3394         return WINED3D_OK;
3395     }
3396 
3397     if (*byte_code_size < shader->functionLength)
3398     {
3399         /* MSDN claims (for d3d8 at least) that if *byte_code_size is smaller
3400          * than the required size we should write the required size and
3401          * return D3DERR_MOREDATA. That's not actually true. */
3402         return WINED3DERR_INVALIDCALL;
3403     }
3404 
3405     memcpy(byte_code, shader->function, shader->functionLength);
3406 
3407     return WINED3D_OK;
3408 }
3409 
3410 /* Set local constants for d3d8 shaders. */
wined3d_shader_set_local_constants_float(struct wined3d_shader * shader,UINT start_idx,const float * src_data,UINT count)3411 HRESULT CDECL wined3d_shader_set_local_constants_float(struct wined3d_shader *shader,
3412         UINT start_idx, const float *src_data, UINT count)
3413 {
3414     UINT end_idx = start_idx + count;
3415     UINT i;
3416 
3417     TRACE("shader %p, start_idx %u, src_data %p, count %u.\n", shader, start_idx, src_data, count);
3418 
3419     if (end_idx > shader->limits->constant_float)
3420     {
3421         WARN("end_idx %u > float constants limit %u.\n",
3422                 end_idx, shader->limits->constant_float);
3423         end_idx = shader->limits->constant_float;
3424     }
3425 
3426     for (i = start_idx; i < end_idx; ++i)
3427     {
3428         struct wined3d_shader_lconst *lconst;
3429         float *value;
3430 
3431         if (!(lconst = heap_alloc(sizeof(*lconst))))
3432             return E_OUTOFMEMORY;
3433 
3434         lconst->idx = i;
3435         value = (float *)lconst->value;
3436         memcpy(value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
3437         list_add_head(&shader->constantsF, &lconst->entry);
3438 
3439         if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
3440                 || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
3441         {
3442             shader->lconst_inf_or_nan = TRUE;
3443         }
3444     }
3445 
3446     return WINED3D_OK;
3447 }
3448 
init_interpolation_compile_args(DWORD * interpolation_args,const struct wined3d_shader * pixel_shader,const struct wined3d_gl_info * gl_info)3449 static void init_interpolation_compile_args(DWORD *interpolation_args,
3450         const struct wined3d_shader *pixel_shader, const struct wined3d_gl_info *gl_info)
3451 {
3452     if (!needs_interpolation_qualifiers_for_shader_outputs(gl_info)
3453             || !pixel_shader || pixel_shader->reg_maps.shader_version.major < 4)
3454     {
3455         memset(interpolation_args, 0, sizeof(pixel_shader->u.ps.interpolation_mode));
3456         return;
3457     }
3458 
3459     memcpy(interpolation_args, pixel_shader->u.ps.interpolation_mode,
3460             sizeof(pixel_shader->u.ps.interpolation_mode));
3461 }
3462 
find_vs_compile_args(const struct wined3d_state * state,const struct wined3d_shader * shader,WORD swizzle_map,struct vs_compile_args * args,const struct wined3d_context * context)3463 void find_vs_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
3464         WORD swizzle_map, struct vs_compile_args *args, const struct wined3d_context *context)
3465 {
3466     const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
3467     const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
3468     const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];
3469     const struct wined3d_d3d_info *d3d_info = context->d3d_info;
3470     const struct wined3d_gl_info *gl_info = context->gl_info;
3471 
3472     args->fog_src = state->render_states[WINED3D_RS_FOGTABLEMODE]
3473             == WINED3D_FOG_NONE ? VS_FOG_COORD : VS_FOG_Z;
3474     args->clip_enabled = state->render_states[WINED3D_RS_CLIPPING]
3475             && state->render_states[WINED3D_RS_CLIPPLANEENABLE];
3476     args->point_size = state->gl_primitive_type == GL_POINTS;
3477     args->per_vertex_point_size = shader->reg_maps.point_size;
3478     args->next_shader_type = hull_shader? WINED3D_SHADER_TYPE_HULL
3479             : geometry_shader ? WINED3D_SHADER_TYPE_GEOMETRY : WINED3D_SHADER_TYPE_PIXEL;
3480     if (shader->reg_maps.shader_version.major >= 4)
3481         args->next_shader_input_count = hull_shader ? hull_shader->limits->packed_input
3482                 : geometry_shader ? geometry_shader->limits->packed_input
3483                 : pixel_shader ? pixel_shader->limits->packed_input : 0;
3484     else
3485         args->next_shader_input_count = 0;
3486     args->swizzle_map = swizzle_map;
3487     if (d3d_info->emulated_flatshading)
3488         args->flatshading = state->render_states[WINED3D_RS_SHADEMODE] == WINED3D_SHADE_FLAT;
3489     else
3490         args->flatshading = 0;
3491 
3492     init_interpolation_compile_args(args->interpolation_mode,
3493             args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, gl_info);
3494 }
3495 
match_usage(BYTE usage1,BYTE usage_idx1,BYTE usage2,BYTE usage_idx2)3496 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2)
3497 {
3498     if (usage_idx1 != usage_idx2)
3499         return FALSE;
3500     if (usage1 == usage2)
3501         return TRUE;
3502     if (usage1 == WINED3D_DECL_USAGE_POSITION && usage2 == WINED3D_DECL_USAGE_POSITIONT)
3503         return TRUE;
3504     if (usage2 == WINED3D_DECL_USAGE_POSITION && usage1 == WINED3D_DECL_USAGE_POSITIONT)
3505         return TRUE;
3506 
3507     return FALSE;
3508 }
3509 
vshader_get_input(const struct wined3d_shader * shader,BYTE usage_req,BYTE usage_idx_req,unsigned int * regnum)3510 BOOL vshader_get_input(const struct wined3d_shader *shader,
3511         BYTE usage_req, BYTE usage_idx_req, unsigned int *regnum)
3512 {
3513     WORD map = shader->reg_maps.input_registers;
3514     unsigned int i;
3515 
3516     for (i = 0; map; map >>= 1, ++i)
3517     {
3518         if (!(map & 1)) continue;
3519 
3520         if (match_usage(shader->u.vs.attributes[i].usage,
3521                 shader->u.vs.attributes[i].usage_idx, usage_req, usage_idx_req))
3522         {
3523             *regnum = i;
3524             return TRUE;
3525         }
3526     }
3527     return FALSE;
3528 }
3529 
shader_signature_calculate_strings_length(const struct wined3d_shader_signature * signature,SIZE_T * total)3530 static HRESULT shader_signature_calculate_strings_length(const struct wined3d_shader_signature *signature,
3531         SIZE_T *total)
3532 {
3533     struct wined3d_shader_signature_element *e;
3534     unsigned int i;
3535     SIZE_T len;
3536 
3537     for (i = 0; i < signature->element_count; ++i)
3538     {
3539         e = &signature->elements[i];
3540         len = strlen(e->semantic_name);
3541         if (len >= ~(SIZE_T)0 - *total)
3542             return E_OUTOFMEMORY;
3543 
3544         *total += len + 1;
3545     }
3546     return WINED3D_OK;
3547 }
3548 
shader_signature_copy(struct wined3d_shader_signature * dst,const struct wined3d_shader_signature * src,char ** signature_strings)3549 static HRESULT shader_signature_copy(struct wined3d_shader_signature *dst,
3550         const struct wined3d_shader_signature *src, char **signature_strings)
3551 {
3552     struct wined3d_shader_signature_element *e;
3553     unsigned int i;
3554     SIZE_T len;
3555     char *ptr;
3556 
3557     if (!src->element_count)
3558         return WINED3D_OK;
3559 
3560     ptr = *signature_strings;
3561 
3562     dst->element_count = src->element_count;
3563     if (!(dst->elements = heap_calloc(dst->element_count, sizeof(*dst->elements))))
3564         return E_OUTOFMEMORY;
3565 
3566     for (i = 0; i < src->element_count; ++i)
3567     {
3568         e = &src->elements[i];
3569         dst->elements[i] = *e;
3570 
3571         len = strlen(e->semantic_name);
3572         memcpy(ptr, e->semantic_name, len + 1);
3573         dst->elements[i].semantic_name = ptr;
3574         ptr += len + 1;
3575     }
3576 
3577     *signature_strings = ptr;
3578 
3579     return WINED3D_OK;
3580 }
3581 
shader_init(struct wined3d_shader * shader,struct wined3d_device * device,const struct wined3d_shader_desc * desc,DWORD float_const_count,enum wined3d_shader_type type,void * parent,const struct wined3d_parent_ops * parent_ops)3582 static HRESULT shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
3583         const struct wined3d_shader_desc *desc, DWORD float_const_count, enum wined3d_shader_type type,
3584         void *parent, const struct wined3d_parent_ops *parent_ops)
3585 {
3586     size_t byte_code_size;
3587     SIZE_T total;
3588     HRESULT hr;
3589     char *ptr;
3590 
3591     TRACE("byte_code %p, byte_code_size %#lx, format %#x, max_version %#x.\n",
3592             desc->byte_code, (long)desc->byte_code_size, desc->format, desc->max_version);
3593 
3594     if (!desc->byte_code)
3595         return WINED3DERR_INVALIDCALL;
3596 
3597     if (!(shader->frontend = shader_select_frontend(desc->format)))
3598     {
3599         FIXME("Unable to find frontend for shader.\n");
3600         return WINED3DERR_INVALIDCALL;
3601     }
3602 
3603     shader->ref = 1;
3604     shader->device = device;
3605     shader->parent = parent;
3606     shader->parent_ops = parent_ops;
3607 
3608     total = 0;
3609     if (FAILED(hr = shader_signature_calculate_strings_length(&desc->input_signature, &total)))
3610         return hr;
3611     if (FAILED(hr = shader_signature_calculate_strings_length(&desc->output_signature, &total)))
3612         return hr;
3613     if (FAILED(hr = shader_signature_calculate_strings_length(&desc->patch_constant_signature, &total)))
3614         return hr;
3615     if (total && !(shader->signature_strings = heap_alloc(total)))
3616         return E_OUTOFMEMORY;
3617     ptr = shader->signature_strings;
3618 
3619     if (FAILED(hr = shader_signature_copy(&shader->input_signature, &desc->input_signature, &ptr)))
3620     {
3621         heap_free(shader->signature_strings);
3622         return hr;
3623     }
3624     if (FAILED(hr = shader_signature_copy(&shader->output_signature, &desc->output_signature, &ptr)))
3625     {
3626         heap_free(shader->input_signature.elements);
3627         heap_free(shader->signature_strings);
3628         return hr;
3629     }
3630     if (FAILED(hr = shader_signature_copy(&shader->patch_constant_signature, &desc->patch_constant_signature, &ptr)))
3631     {
3632         heap_free(shader->output_signature.elements);
3633         heap_free(shader->input_signature.elements);
3634         heap_free(shader->signature_strings);
3635         return hr;
3636     }
3637 
3638     list_init(&shader->linked_programs);
3639     list_init(&shader->constantsF);
3640     list_init(&shader->constantsB);
3641     list_init(&shader->constantsI);
3642     shader->lconst_inf_or_nan = FALSE;
3643     list_init(&shader->reg_maps.indexable_temps);
3644     list_init(&shader->shader_list_entry);
3645 
3646     byte_code_size = desc->byte_code_size;
3647     if (byte_code_size == ~(size_t)0)
3648     {
3649         const struct wined3d_shader_frontend *fe = shader->frontend;
3650         struct wined3d_shader_version shader_version;
3651         struct wined3d_shader_instruction ins;
3652         const DWORD *ptr;
3653         void *fe_data;
3654 
3655         if (!(fe_data = fe->shader_init(desc->byte_code, byte_code_size, &shader->output_signature)))
3656         {
3657             WARN("Failed to initialise frontend data.\n");
3658             shader_cleanup(shader);
3659             return WINED3DERR_INVALIDCALL;
3660         }
3661 
3662         fe->shader_read_header(fe_data, &ptr, &shader_version);
3663         while (!fe->shader_is_end(fe_data, &ptr))
3664             fe->shader_read_instruction(fe_data, &ptr, &ins);
3665 
3666         fe->shader_free(fe_data);
3667 
3668         byte_code_size = (ptr - desc->byte_code) * sizeof(*ptr);
3669     }
3670 
3671     if (!(shader->function = heap_alloc(byte_code_size)))
3672     {
3673         shader_cleanup(shader);
3674         return E_OUTOFMEMORY;
3675     }
3676     memcpy(shader->function, desc->byte_code, byte_code_size);
3677     shader->functionLength = byte_code_size;
3678 
3679     if (FAILED(hr = shader_set_function(shader, float_const_count, type, desc->max_version)))
3680     {
3681         WARN("Failed to set function, hr %#x.\n", hr);
3682         shader_cleanup(shader);
3683         return hr;
3684     }
3685 
3686     shader->load_local_constsF = shader->lconst_inf_or_nan;
3687 
3688     wined3d_cs_init_object(shader->device->cs, wined3d_shader_init_object, shader);
3689 
3690     return hr;
3691 }
3692 
vertex_shader_init(struct wined3d_shader * shader,struct wined3d_device * device,const struct wined3d_shader_desc * desc,void * parent,const struct wined3d_parent_ops * parent_ops)3693 static HRESULT vertex_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
3694         const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
3695 {
3696     struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
3697     unsigned int i;
3698     HRESULT hr;
3699 
3700     if (FAILED(hr = shader_init(shader, device, desc, device->adapter->d3d_info.limits.vs_uniform_count,
3701             WINED3D_SHADER_TYPE_VERTEX, parent, parent_ops)))
3702         return hr;
3703 
3704     for (i = 0; i < shader->input_signature.element_count; ++i)
3705     {
3706         const struct wined3d_shader_signature_element *input = &shader->input_signature.elements[i];
3707 
3708         if (!(reg_maps->input_registers & (1u << input->register_idx)) || !input->semantic_name)
3709             continue;
3710 
3711         shader->u.vs.attributes[input->register_idx].usage =
3712                 shader_usage_from_semantic_name(input->semantic_name);
3713         shader->u.vs.attributes[input->register_idx].usage_idx = input->semantic_idx;
3714     }
3715 
3716     if (reg_maps->usesrelconstF && !list_empty(&shader->constantsF))
3717         shader->load_local_constsF = TRUE;
3718 
3719     return WINED3D_OK;
3720 }
3721 
geometry_shader_init(struct wined3d_shader * shader,struct wined3d_device * device,const struct wined3d_shader_desc * desc,const struct wined3d_stream_output_desc * so_desc,void * parent,const struct wined3d_parent_ops * parent_ops)3722 static HRESULT geometry_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
3723         const struct wined3d_shader_desc *desc, const struct wined3d_stream_output_desc *so_desc,
3724         void *parent, const struct wined3d_parent_ops *parent_ops)
3725 {
3726     struct wined3d_stream_output_element *elements = NULL;
3727     HRESULT hr;
3728 
3729     if (so_desc && !(elements = heap_calloc(so_desc->element_count, sizeof(*elements))))
3730         return E_OUTOFMEMORY;
3731 
3732     if (FAILED(hr = shader_init(shader, device, desc, 0, WINED3D_SHADER_TYPE_GEOMETRY, parent, parent_ops)))
3733     {
3734         heap_free(elements);
3735         return hr;
3736     }
3737 
3738     if (so_desc)
3739     {
3740         shader->u.gs.so_desc = *so_desc;
3741         shader->u.gs.so_desc.elements = elements;
3742         memcpy(elements, so_desc->elements, so_desc->element_count * sizeof(*elements));
3743     }
3744 
3745     return WINED3D_OK;
3746 }
3747 
find_ds_compile_args(const struct wined3d_state * state,const struct wined3d_shader * shader,struct ds_compile_args * args,const struct wined3d_context * context)3748 void find_ds_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
3749         struct ds_compile_args *args, const struct wined3d_context *context)
3750 {
3751     const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
3752     const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
3753     const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];
3754     const struct wined3d_gl_info *gl_info = context->gl_info;
3755 
3756     args->tessellator_output_primitive = hull_shader->u.hs.tessellator_output_primitive;
3757     args->tessellator_partitioning = hull_shader->u.hs.tessellator_partitioning;
3758 
3759     args->output_count = geometry_shader ? geometry_shader->limits->packed_input
3760             : pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
3761     args->next_shader_type = geometry_shader ? WINED3D_SHADER_TYPE_GEOMETRY : WINED3D_SHADER_TYPE_PIXEL;
3762 
3763     args->render_offscreen = context->render_offscreen;
3764 
3765     init_interpolation_compile_args(args->interpolation_mode,
3766             args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, gl_info);
3767 
3768     args->padding = 0;
3769 }
3770 
find_gs_compile_args(const struct wined3d_state * state,const struct wined3d_shader * shader,struct gs_compile_args * args,const struct wined3d_context * context)3771 void find_gs_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
3772         struct gs_compile_args *args, const struct wined3d_context *context)
3773 {
3774     const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
3775     const struct wined3d_gl_info *gl_info = context->gl_info;
3776 
3777     args->output_count = pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
3778 
3779     init_interpolation_compile_args(args->interpolation_mode, pixel_shader, gl_info);
3780 }
3781 
find_ps_compile_args(const struct wined3d_state * state,const struct wined3d_shader * shader,BOOL position_transformed,struct ps_compile_args * args,const struct wined3d_context * context)3782 void find_ps_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
3783         BOOL position_transformed, struct ps_compile_args *args, const struct wined3d_context *context)
3784 {
3785     const struct wined3d_d3d_info *d3d_info = context->d3d_info;
3786     const struct wined3d_gl_info *gl_info = context->gl_info;
3787     const struct wined3d_texture *texture;
3788     unsigned int i;
3789 
3790     memset(args, 0, sizeof(*args)); /* FIXME: Make sure all bits are set. */
3791     if (!gl_info->supported[ARB_FRAMEBUFFER_SRGB] && needs_srgb_write(context, state, state->fb))
3792     {
3793         static unsigned int warned = 0;
3794 
3795         args->srgb_correction = 1;
3796         if (state->render_states[WINED3D_RS_ALPHABLENDENABLE] && !warned++)
3797             WARN("Blending into a sRGB render target with no GL_ARB_framebuffer_sRGB "
3798                     "support, expect rendering artifacts.\n");
3799     }
3800 
3801     if (shader->reg_maps.shader_version.major == 1
3802             && shader->reg_maps.shader_version.minor <= 3)
3803     {
3804         for (i = 0; i < shader->limits->sampler; ++i)
3805         {
3806             DWORD flags = state->texture_states[i][WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS];
3807 
3808             if (flags & WINED3D_TTFF_PROJECTED)
3809             {
3810                 DWORD tex_transform = flags & ~WINED3D_TTFF_PROJECTED;
3811 
3812                 if (!state->shader[WINED3D_SHADER_TYPE_VERTEX])
3813                 {
3814                     enum wined3d_shader_resource_type resource_type = shader->reg_maps.resource_info[i].type;
3815                     unsigned int j;
3816                     unsigned int index = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];
3817                     DWORD max_valid = WINED3D_TTFF_COUNT4;
3818 
3819                     for (j = 0; j < state->vertex_declaration->element_count; ++j)
3820                     {
3821                         struct wined3d_vertex_declaration_element *element =
3822                                 &state->vertex_declaration->elements[j];
3823 
3824                         if (element->usage == WINED3D_DECL_USAGE_TEXCOORD
3825                                 && element->usage_idx == index)
3826                         {
3827                             max_valid = element->format->component_count;
3828                             break;
3829                         }
3830                     }
3831                     if (!tex_transform || tex_transform > max_valid)
3832                     {
3833                         WARN("Fixing up projected texture transform flags from %#x to %#x.\n",
3834                                 tex_transform, max_valid);
3835                         tex_transform = max_valid;
3836                     }
3837                     if ((resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_1D && tex_transform > WINED3D_TTFF_COUNT1)
3838                             || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2D
3839                             && tex_transform > WINED3D_TTFF_COUNT2)
3840                             || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D
3841                             && tex_transform > WINED3D_TTFF_COUNT3))
3842                         tex_transform |= WINED3D_PSARGS_PROJECTED;
3843                     else
3844                     {
3845                         WARN("Application requested projected texture with unsuitable texture coordinates.\n");
3846                         WARN("(texture unit %u, transform flags %#x, sampler type %u).\n",
3847                                 i, tex_transform, resource_type);
3848                     }
3849                 }
3850                 else
3851                     tex_transform = WINED3D_TTFF_COUNT4 | WINED3D_PSARGS_PROJECTED;
3852 
3853                 args->tex_transform |= tex_transform << i * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
3854             }
3855         }
3856     }
3857     if (shader->reg_maps.shader_version.major == 1
3858             && shader->reg_maps.shader_version.minor <= 4)
3859     {
3860         for (i = 0; i < shader->limits->sampler; ++i)
3861         {
3862             const struct wined3d_texture *texture = state->textures[i];
3863 
3864             if (!shader->reg_maps.resource_info[i].type)
3865                 continue;
3866 
3867             /* Treat unbound textures as 2D. The dummy texture will provide
3868              * the proper sample value. The tex_types bitmap defaults to
3869              * 2D because of the memset. */
3870             if (!texture)
3871                 continue;
3872 
3873             switch (texture->target)
3874             {
3875                 /* RECT textures are distinguished from 2D textures via np2_fixup */
3876                 case GL_TEXTURE_RECTANGLE_ARB:
3877                 case GL_TEXTURE_2D:
3878                     break;
3879 
3880                 case GL_TEXTURE_3D:
3881                     args->tex_types |= WINED3D_SHADER_TEX_3D << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
3882                     break;
3883 
3884                 case GL_TEXTURE_CUBE_MAP_ARB:
3885                     args->tex_types |= WINED3D_SHADER_TEX_CUBE << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
3886                     break;
3887             }
3888         }
3889     }
3890 
3891     if (shader->reg_maps.shader_version.major >= 4)
3892     {
3893         /* In SM4+ we use dcl_sampler in order to determine if we should use shadow sampler. */
3894         args->shadow = 0;
3895         for (i = 0 ; i < MAX_FRAGMENT_SAMPLERS; ++i)
3896             args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
3897         args->np2_fixup = 0;
3898     }
3899     else
3900     {
3901         for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i)
3902         {
3903             if (!shader->reg_maps.resource_info[i].type)
3904                 continue;
3905 
3906             texture = state->textures[i];
3907             if (!texture)
3908             {
3909                 args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
3910                 continue;
3911             }
3912             if (can_use_texture_swizzle(gl_info, texture->resource.format))
3913                 args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
3914             else
3915                 args->color_fixup[i] = texture->resource.format->color_fixup;
3916 
3917             if (texture->resource.format_flags & WINED3DFMT_FLAG_SHADOW)
3918                 args->shadow |= 1u << i;
3919 
3920             /* Flag samplers that need NP2 texcoord fixup. */
3921             if (!(texture->flags & WINED3D_TEXTURE_POW2_MAT_IDENT))
3922                 args->np2_fixup |= (1u << i);
3923         }
3924     }
3925 
3926     if (shader->reg_maps.shader_version.major >= 3)
3927     {
3928         if (position_transformed)
3929             args->vp_mode = pretransformed;
3930         else if (use_vs(state))
3931             args->vp_mode = vertexshader;
3932         else
3933             args->vp_mode = fixedfunction;
3934         args->fog = WINED3D_FFP_PS_FOG_OFF;
3935     }
3936     else
3937     {
3938         args->vp_mode = vertexshader;
3939         if (state->render_states[WINED3D_RS_FOGENABLE])
3940         {
3941             switch (state->render_states[WINED3D_RS_FOGTABLEMODE])
3942             {
3943                 case WINED3D_FOG_NONE:
3944                     if (position_transformed || use_vs(state))
3945                     {
3946                         args->fog = WINED3D_FFP_PS_FOG_LINEAR;
3947                         break;
3948                     }
3949 
3950                     switch (state->render_states[WINED3D_RS_FOGVERTEXMODE])
3951                     {
3952                         case WINED3D_FOG_NONE: /* Fall through. */
3953                         case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
3954                         case WINED3D_FOG_EXP:    args->fog = WINED3D_FFP_PS_FOG_EXP;    break;
3955                         case WINED3D_FOG_EXP2:   args->fog = WINED3D_FFP_PS_FOG_EXP2;   break;
3956                     }
3957                     break;
3958 
3959                 case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
3960                 case WINED3D_FOG_EXP:    args->fog = WINED3D_FFP_PS_FOG_EXP;    break;
3961                 case WINED3D_FOG_EXP2:   args->fog = WINED3D_FFP_PS_FOG_EXP2;   break;
3962             }
3963         }
3964         else
3965         {
3966             args->fog = WINED3D_FFP_PS_FOG_OFF;
3967         }
3968     }
3969 
3970     if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info))
3971     {
3972         const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX];
3973 
3974         args->texcoords_initialized = 0;
3975         for (i = 0; i < MAX_TEXTURES; ++i)
3976         {
3977             if (vs)
3978             {
3979                 if (state->shader[WINED3D_SHADER_TYPE_VERTEX]->reg_maps.output_registers & (1u << i))
3980                     args->texcoords_initialized |= 1u << i;
3981             }
3982             else
3983             {
3984                 const struct wined3d_stream_info *si = &context->stream_info;
3985                 unsigned int coord_idx = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];
3986 
3987                 if ((state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX] >> WINED3D_FFP_TCI_SHIFT)
3988                         & WINED3D_FFP_TCI_MASK
3989                         || (coord_idx < MAX_TEXTURES && (si->use_map & (1u << (WINED3D_FFP_TEXCOORD0 + coord_idx)))))
3990                     args->texcoords_initialized |= 1u << i;
3991             }
3992         }
3993     }
3994     else
3995     {
3996         args->texcoords_initialized = (1u << MAX_TEXTURES) - 1;
3997     }
3998 
3999     args->pointsprite = state->render_states[WINED3D_RS_POINTSPRITEENABLE]
4000             && state->gl_primitive_type == GL_POINTS;
4001 
4002     if (gl_info->supported[WINED3D_GL_LEGACY_CONTEXT])
4003         args->alpha_test_func = WINED3D_CMP_ALWAYS - 1;
4004     else
4005         args->alpha_test_func = (state->render_states[WINED3D_RS_ALPHATESTENABLE]
4006                 ? wined3d_sanitize_cmp_func(state->render_states[WINED3D_RS_ALPHAFUNC])
4007                 : WINED3D_CMP_ALWAYS) - 1;
4008 
4009     if (d3d_info->emulated_flatshading)
4010         args->flatshading = state->render_states[WINED3D_RS_SHADEMODE] == WINED3D_SHADE_FLAT;
4011 
4012     args->render_offscreen = shader->reg_maps.vpos && gl_info->supported[ARB_FRAGMENT_COORD_CONVENTIONS]
4013             ? context->render_offscreen : 0;
4014 
4015     args->dual_source_blend = wined3d_dualblend_enabled(state, gl_info);
4016 }
4017 
pixel_shader_init(struct wined3d_shader * shader,struct wined3d_device * device,const struct wined3d_shader_desc * desc,void * parent,const struct wined3d_parent_ops * parent_ops)4018 static HRESULT pixel_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
4019         const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
4020 {
4021     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4022     unsigned int i, highest_reg_used = 0, num_regs_used = 0;
4023     HRESULT hr;
4024 
4025     if (FAILED(hr = shader_init(shader, device, desc, device->adapter->d3d_info.limits.ps_uniform_count,
4026             WINED3D_SHADER_TYPE_PIXEL, parent, parent_ops)))
4027         return hr;
4028 
4029     for (i = 0; i < MAX_REG_INPUT; ++i)
4030     {
4031         if (shader->u.ps.input_reg_used & (1u << i))
4032         {
4033             ++num_regs_used;
4034             highest_reg_used = i;
4035         }
4036     }
4037 
4038     /* Don't do any register mapping magic if it is not needed, or if we can't
4039      * achieve anything anyway */
4040     if (highest_reg_used < (gl_info->limits.glsl_varyings / 4)
4041             || num_regs_used > (gl_info->limits.glsl_varyings / 4)
4042             || shader->reg_maps.shader_version.major >= 4)
4043     {
4044         if (num_regs_used > (gl_info->limits.glsl_varyings / 4))
4045         {
4046             /* This happens with relative addressing. The input mapper function
4047              * warns about this if the higher registers are declared too, so
4048              * don't write a FIXME here */
4049             WARN("More varying registers used than supported\n");
4050         }
4051 
4052         for (i = 0; i < MAX_REG_INPUT; ++i)
4053         {
4054             shader->u.ps.input_reg_map[i] = i;
4055         }
4056 
4057         shader->u.ps.declared_in_count = highest_reg_used + 1;
4058     }
4059     else
4060     {
4061         shader->u.ps.declared_in_count = 0;
4062         for (i = 0; i < MAX_REG_INPUT; ++i)
4063         {
4064             if (shader->u.ps.input_reg_used & (1u << i))
4065                 shader->u.ps.input_reg_map[i] = shader->u.ps.declared_in_count++;
4066             else shader->u.ps.input_reg_map[i] = ~0U;
4067         }
4068     }
4069 
4070     return WINED3D_OK;
4071 }
4072 
pixelshader_update_resource_types(struct wined3d_shader * shader,WORD tex_types)4073 void pixelshader_update_resource_types(struct wined3d_shader *shader, WORD tex_types)
4074 {
4075     struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4076     struct wined3d_shader_resource_info *resource_info = reg_maps->resource_info;
4077     unsigned int i;
4078 
4079     if (reg_maps->shader_version.major != 1) return;
4080 
4081     for (i = 0; i < shader->limits->sampler; ++i)
4082     {
4083         /* We don't sample from this sampler. */
4084         if (!resource_info[i].type)
4085             continue;
4086 
4087         switch ((tex_types >> i * WINED3D_PSARGS_TEXTYPE_SHIFT) & WINED3D_PSARGS_TEXTYPE_MASK)
4088         {
4089             case WINED3D_SHADER_TEX_2D:
4090                 resource_info[i].type = WINED3D_SHADER_RESOURCE_TEXTURE_2D;
4091                 break;
4092 
4093             case WINED3D_SHADER_TEX_3D:
4094                 resource_info[i].type = WINED3D_SHADER_RESOURCE_TEXTURE_3D;
4095                 break;
4096 
4097             case WINED3D_SHADER_TEX_CUBE:
4098                 resource_info[i].type = WINED3D_SHADER_RESOURCE_TEXTURE_CUBE;
4099                 break;
4100         }
4101     }
4102 }
4103 
wined3d_shader_create_cs(struct wined3d_device * device,const struct wined3d_shader_desc * desc,void * parent,const struct wined3d_parent_ops * parent_ops,struct wined3d_shader ** shader)4104 HRESULT CDECL wined3d_shader_create_cs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
4105         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4106 {
4107     struct wined3d_shader *object;
4108     HRESULT hr;
4109 
4110     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
4111             device, desc, parent, parent_ops, shader);
4112 
4113     if (!(object = heap_alloc_zero(sizeof(*object))))
4114         return E_OUTOFMEMORY;
4115 
4116     if (FAILED(hr = shader_init(object, device, desc, 0, WINED3D_SHADER_TYPE_COMPUTE, parent, parent_ops)))
4117     {
4118         WARN("Failed to initialize compute shader, hr %#x.\n", hr);
4119         heap_free(object);
4120         return hr;
4121     }
4122 
4123     TRACE("Created compute shader %p.\n", object);
4124     *shader = object;
4125 
4126     return WINED3D_OK;
4127 }
4128 
wined3d_shader_create_ds(struct wined3d_device * device,const struct wined3d_shader_desc * desc,void * parent,const struct wined3d_parent_ops * parent_ops,struct wined3d_shader ** shader)4129 HRESULT CDECL wined3d_shader_create_ds(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
4130         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4131 {
4132     struct wined3d_shader *object;
4133     HRESULT hr;
4134 
4135     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
4136             device, desc, parent, parent_ops, shader);
4137 
4138     if (!(object = heap_alloc_zero(sizeof(*object))))
4139         return E_OUTOFMEMORY;
4140 
4141     if (FAILED(hr = shader_init(object, device, desc, 0, WINED3D_SHADER_TYPE_DOMAIN, parent, parent_ops)))
4142     {
4143         WARN("Failed to initialize domain shader, hr %#x.\n", hr);
4144         heap_free(object);
4145         return hr;
4146     }
4147 
4148     TRACE("Created domain shader %p.\n", object);
4149     *shader = object;
4150 
4151     return WINED3D_OK;
4152 }
4153 
wined3d_shader_create_gs(struct wined3d_device * device,const struct wined3d_shader_desc * desc,const struct wined3d_stream_output_desc * so_desc,void * parent,const struct wined3d_parent_ops * parent_ops,struct wined3d_shader ** shader)4154 HRESULT CDECL wined3d_shader_create_gs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
4155         const struct wined3d_stream_output_desc *so_desc, void *parent,
4156         const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4157 {
4158     struct wined3d_shader *object;
4159     HRESULT hr;
4160 
4161     TRACE("device %p, desc %p, so_desc %p, parent %p, parent_ops %p, shader %p.\n",
4162             device, desc, so_desc, parent, parent_ops, shader);
4163 
4164     if (!(object = heap_alloc_zero(sizeof(*object))))
4165         return E_OUTOFMEMORY;
4166 
4167     if (FAILED(hr = geometry_shader_init(object, device, desc, so_desc, parent, parent_ops)))
4168     {
4169         WARN("Failed to initialize geometry shader, hr %#x.\n", hr);
4170         heap_free(object);
4171         return hr;
4172     }
4173 
4174     TRACE("Created geometry shader %p.\n", object);
4175     *shader = object;
4176 
4177     return WINED3D_OK;
4178 }
4179 
wined3d_shader_create_hs(struct wined3d_device * device,const struct wined3d_shader_desc * desc,void * parent,const struct wined3d_parent_ops * parent_ops,struct wined3d_shader ** shader)4180 HRESULT CDECL wined3d_shader_create_hs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
4181         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4182 {
4183     struct wined3d_shader *object;
4184     HRESULT hr;
4185 
4186     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
4187             device, desc, parent, parent_ops, shader);
4188 
4189     if (!(object = heap_alloc_zero(sizeof(*object))))
4190         return E_OUTOFMEMORY;
4191 
4192     if (FAILED(hr = shader_init(object, device, desc, 0, WINED3D_SHADER_TYPE_HULL, parent, parent_ops)))
4193     {
4194         WARN("Failed to initialize hull shader, hr %#x.\n", hr);
4195         heap_free(object);
4196         return hr;
4197     }
4198 
4199     TRACE("Created hull shader %p.\n", object);
4200     *shader = object;
4201 
4202     return WINED3D_OK;
4203 }
4204 
wined3d_shader_create_ps(struct wined3d_device * device,const struct wined3d_shader_desc * desc,void * parent,const struct wined3d_parent_ops * parent_ops,struct wined3d_shader ** shader)4205 HRESULT CDECL wined3d_shader_create_ps(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
4206         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4207 {
4208     struct wined3d_shader *object;
4209     HRESULT hr;
4210 
4211     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
4212             device, desc, parent, parent_ops, shader);
4213 
4214     if (!(object = heap_alloc_zero(sizeof(*object))))
4215         return E_OUTOFMEMORY;
4216 
4217     if (FAILED(hr = pixel_shader_init(object, device, desc, parent, parent_ops)))
4218     {
4219         WARN("Failed to initialize pixel shader, hr %#x.\n", hr);
4220         heap_free(object);
4221         return hr;
4222     }
4223 
4224     TRACE("Created pixel shader %p.\n", object);
4225     *shader = object;
4226 
4227     return WINED3D_OK;
4228 }
4229 
wined3d_shader_create_vs(struct wined3d_device * device,const struct wined3d_shader_desc * desc,void * parent,const struct wined3d_parent_ops * parent_ops,struct wined3d_shader ** shader)4230 HRESULT CDECL wined3d_shader_create_vs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
4231         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4232 {
4233     struct wined3d_shader *object;
4234     HRESULT hr;
4235 
4236     TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
4237             device, desc, parent, parent_ops, shader);
4238 
4239     if (!(object = heap_alloc_zero(sizeof(*object))))
4240         return E_OUTOFMEMORY;
4241 
4242     if (FAILED(hr = vertex_shader_init(object, device, desc, parent, parent_ops)))
4243     {
4244         WARN("Failed to initialize vertex shader, hr %#x.\n", hr);
4245         heap_free(object);
4246         return hr;
4247     }
4248 
4249     TRACE("Created vertex shader %p.\n", object);
4250     *shader = object;
4251 
4252     return WINED3D_OK;
4253 }
4254