1 /**************************************************************************
2  *
3  * Copyright 2012-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 /*
29  * ShaderTGSI.c --
30  *    Functions for translating shaders.
31  */
32 
33 #include "Debug.h"
34 #include "ShaderParse.h"
35 
36 #include "pipe/p_state.h"
37 #include "tgsi/tgsi_ureg.h"
38 #include "tgsi/tgsi_dump.h"
39 #include "util/u_memory.h"
40 
41 #include "ShaderDump.h"
42 
43 
44 enum dx10_opcode_format {
45    OF_FLOAT,
46    OF_INT,
47    OF_UINT
48 };
49 
50 struct dx10_opcode_xlate {
51    D3D10_SB_OPCODE_TYPE type;
52    enum dx10_opcode_format format;
53    uint tgsi_opcode;
54 };
55 
56 /* Opcodes that we have not even attempted to implement:
57  */
58 #define TGSI_LOG_UNSUPPORTED TGSI_OPCODE_LAST
59 
60 /* Opcodes which do not translate directly to a TGSI opcode, but which
61  * have at least a partial implemention coded below:
62  */
63 #define TGSI_EXPAND          (TGSI_OPCODE_LAST+1)
64 
65 static struct dx10_opcode_xlate opcode_xlate[D3D10_SB_NUM_OPCODES] = {
66    {D3D10_SB_OPCODE_ADD,                              OF_FLOAT, TGSI_OPCODE_ADD},
67    {D3D10_SB_OPCODE_AND,                              OF_UINT,  TGSI_OPCODE_AND},
68    {D3D10_SB_OPCODE_BREAK,                            OF_FLOAT, TGSI_OPCODE_BRK},
69    {D3D10_SB_OPCODE_BREAKC,                           OF_UINT,  TGSI_EXPAND},
70    {D3D10_SB_OPCODE_CALL,                             OF_UINT,  TGSI_EXPAND},
71    {D3D10_SB_OPCODE_CALLC,                            OF_UINT,  TGSI_EXPAND},
72    {D3D10_SB_OPCODE_CASE,                             OF_UINT,  TGSI_OPCODE_CASE},
73    {D3D10_SB_OPCODE_CONTINUE,                         OF_FLOAT, TGSI_OPCODE_CONT},
74    {D3D10_SB_OPCODE_CONTINUEC,                        OF_UINT,  TGSI_EXPAND},
75    {D3D10_SB_OPCODE_CUT,                              OF_FLOAT, TGSI_EXPAND},
76    {D3D10_SB_OPCODE_DEFAULT,                          OF_FLOAT, TGSI_OPCODE_DEFAULT},
77    {D3D10_SB_OPCODE_DERIV_RTX,                        OF_FLOAT, TGSI_OPCODE_DDX},
78    {D3D10_SB_OPCODE_DERIV_RTY,                        OF_FLOAT, TGSI_OPCODE_DDY},
79    {D3D10_SB_OPCODE_DISCARD,                          OF_UINT,  TGSI_EXPAND},
80    {D3D10_SB_OPCODE_DIV,                              OF_FLOAT, TGSI_OPCODE_DIV},
81    {D3D10_SB_OPCODE_DP2,                              OF_FLOAT, TGSI_OPCODE_DP2},
82    {D3D10_SB_OPCODE_DP3,                              OF_FLOAT, TGSI_OPCODE_DP3},
83    {D3D10_SB_OPCODE_DP4,                              OF_FLOAT, TGSI_OPCODE_DP4},
84    {D3D10_SB_OPCODE_ELSE,                             OF_FLOAT, TGSI_OPCODE_ELSE},
85    {D3D10_SB_OPCODE_EMIT,                             OF_FLOAT, TGSI_EXPAND},
86    {D3D10_SB_OPCODE_EMITTHENCUT,                      OF_FLOAT, TGSI_EXPAND},
87    {D3D10_SB_OPCODE_ENDIF,                            OF_FLOAT, TGSI_OPCODE_ENDIF},
88    {D3D10_SB_OPCODE_ENDLOOP,                          OF_FLOAT, TGSI_OPCODE_ENDLOOP},
89    {D3D10_SB_OPCODE_ENDSWITCH,                        OF_FLOAT, TGSI_OPCODE_ENDSWITCH},
90    {D3D10_SB_OPCODE_EQ,                               OF_FLOAT, TGSI_OPCODE_FSEQ},
91    {D3D10_SB_OPCODE_EXP,                              OF_FLOAT, TGSI_EXPAND},
92    {D3D10_SB_OPCODE_FRC,                              OF_FLOAT, TGSI_OPCODE_FRC},
93    {D3D10_SB_OPCODE_FTOI,                             OF_FLOAT, TGSI_EXPAND},
94    {D3D10_SB_OPCODE_FTOU,                             OF_FLOAT, TGSI_EXPAND},
95    {D3D10_SB_OPCODE_GE,                               OF_FLOAT, TGSI_OPCODE_FSGE},
96    {D3D10_SB_OPCODE_IADD,                             OF_INT,   TGSI_OPCODE_UADD},
97    {D3D10_SB_OPCODE_IF,                               OF_UINT,  TGSI_EXPAND},
98    {D3D10_SB_OPCODE_IEQ,                              OF_INT,   TGSI_OPCODE_USEQ},
99    {D3D10_SB_OPCODE_IGE,                              OF_INT,   TGSI_OPCODE_ISGE},
100    {D3D10_SB_OPCODE_ILT,                              OF_INT,   TGSI_OPCODE_ISLT},
101    {D3D10_SB_OPCODE_IMAD,                             OF_INT,   TGSI_OPCODE_UMAD},
102    {D3D10_SB_OPCODE_IMAX,                             OF_INT,   TGSI_OPCODE_IMAX},
103    {D3D10_SB_OPCODE_IMIN,                             OF_INT,   TGSI_OPCODE_IMIN},
104    {D3D10_SB_OPCODE_IMUL,                             OF_INT,   TGSI_EXPAND},
105    {D3D10_SB_OPCODE_INE,                              OF_INT,   TGSI_OPCODE_USNE},
106    {D3D10_SB_OPCODE_INEG,                             OF_INT,   TGSI_OPCODE_INEG},
107    {D3D10_SB_OPCODE_ISHL,                             OF_INT,   TGSI_OPCODE_SHL},
108    {D3D10_SB_OPCODE_ISHR,                             OF_INT,   TGSI_OPCODE_ISHR},
109    {D3D10_SB_OPCODE_ITOF,                             OF_INT,   TGSI_OPCODE_I2F},
110    {D3D10_SB_OPCODE_LABEL,                            OF_INT,   TGSI_EXPAND},
111    {D3D10_SB_OPCODE_LD,                               OF_UINT,  TGSI_EXPAND},
112    {D3D10_SB_OPCODE_LD_MS,                            OF_UINT,  TGSI_EXPAND},
113    {D3D10_SB_OPCODE_LOG,                              OF_FLOAT, TGSI_EXPAND},
114    {D3D10_SB_OPCODE_LOOP,                             OF_FLOAT, TGSI_OPCODE_BGNLOOP},
115    {D3D10_SB_OPCODE_LT,                               OF_FLOAT, TGSI_OPCODE_FSLT},
116    {D3D10_SB_OPCODE_MAD,                              OF_FLOAT, TGSI_OPCODE_MAD},
117    {D3D10_SB_OPCODE_MIN,                              OF_FLOAT, TGSI_OPCODE_MIN},
118    {D3D10_SB_OPCODE_MAX,                              OF_FLOAT, TGSI_OPCODE_MAX},
119    {D3D10_SB_OPCODE_CUSTOMDATA,                       OF_FLOAT, TGSI_EXPAND},
120    {D3D10_SB_OPCODE_MOV,                              OF_UINT,  TGSI_OPCODE_MOV},
121    {D3D10_SB_OPCODE_MOVC,                             OF_UINT,  TGSI_OPCODE_UCMP},
122    {D3D10_SB_OPCODE_MUL,                              OF_FLOAT, TGSI_OPCODE_MUL},
123    {D3D10_SB_OPCODE_NE,                               OF_FLOAT, TGSI_OPCODE_FSNE},
124    {D3D10_SB_OPCODE_NOP,                              OF_FLOAT, TGSI_OPCODE_NOP},
125    {D3D10_SB_OPCODE_NOT,                              OF_UINT,  TGSI_OPCODE_NOT},
126    {D3D10_SB_OPCODE_OR,                               OF_UINT,  TGSI_OPCODE_OR},
127    {D3D10_SB_OPCODE_RESINFO,                          OF_UINT,  TGSI_EXPAND},
128    {D3D10_SB_OPCODE_RET,                              OF_FLOAT, TGSI_OPCODE_RET},
129    {D3D10_SB_OPCODE_RETC,                             OF_UINT,  TGSI_EXPAND},
130    {D3D10_SB_OPCODE_ROUND_NE,                         OF_FLOAT, TGSI_OPCODE_ROUND},
131    {D3D10_SB_OPCODE_ROUND_NI,                         OF_FLOAT, TGSI_OPCODE_FLR},
132    {D3D10_SB_OPCODE_ROUND_PI,                         OF_FLOAT, TGSI_OPCODE_CEIL},
133    {D3D10_SB_OPCODE_ROUND_Z,                          OF_FLOAT, TGSI_OPCODE_TRUNC},
134    {D3D10_SB_OPCODE_RSQ,                              OF_FLOAT, TGSI_EXPAND},
135    {D3D10_SB_OPCODE_SAMPLE,                           OF_FLOAT, TGSI_EXPAND},
136    {D3D10_SB_OPCODE_SAMPLE_C,                         OF_FLOAT, TGSI_EXPAND},
137    {D3D10_SB_OPCODE_SAMPLE_C_LZ,                      OF_FLOAT, TGSI_EXPAND},
138    {D3D10_SB_OPCODE_SAMPLE_L,                         OF_FLOAT, TGSI_EXPAND},
139    {D3D10_SB_OPCODE_SAMPLE_D,                         OF_FLOAT, TGSI_EXPAND},
140    {D3D10_SB_OPCODE_SAMPLE_B,                         OF_FLOAT, TGSI_EXPAND},
141    {D3D10_SB_OPCODE_SQRT,                             OF_FLOAT, TGSI_EXPAND},
142    {D3D10_SB_OPCODE_SWITCH,                           OF_UINT,  TGSI_OPCODE_SWITCH},
143    {D3D10_SB_OPCODE_SINCOS,                           OF_FLOAT, TGSI_EXPAND},
144    {D3D10_SB_OPCODE_UDIV,                             OF_UINT,  TGSI_EXPAND},
145    {D3D10_SB_OPCODE_ULT,                              OF_UINT,  TGSI_OPCODE_USLT},
146    {D3D10_SB_OPCODE_UGE,                              OF_UINT,  TGSI_OPCODE_USGE},
147    {D3D10_SB_OPCODE_UMUL,                             OF_UINT,  TGSI_EXPAND},
148    {D3D10_SB_OPCODE_UMAD,                             OF_UINT,  TGSI_OPCODE_UMAD},
149    {D3D10_SB_OPCODE_UMAX,                             OF_UINT,  TGSI_OPCODE_UMAX},
150    {D3D10_SB_OPCODE_UMIN,                             OF_UINT,  TGSI_OPCODE_UMIN},
151    {D3D10_SB_OPCODE_USHR,                             OF_UINT,  TGSI_OPCODE_USHR},
152    {D3D10_SB_OPCODE_UTOF,                             OF_UINT,  TGSI_OPCODE_U2F},
153    {D3D10_SB_OPCODE_XOR,                              OF_UINT,  TGSI_OPCODE_XOR},
154    {D3D10_SB_OPCODE_DCL_RESOURCE,                     OF_FLOAT, TGSI_EXPAND},
155    {D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER,              OF_FLOAT, TGSI_EXPAND},
156    {D3D10_SB_OPCODE_DCL_SAMPLER,                      OF_FLOAT, TGSI_EXPAND},
157    {D3D10_SB_OPCODE_DCL_INDEX_RANGE,                  OF_FLOAT, TGSI_LOG_UNSUPPORTED},
158    {D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, OF_FLOAT, TGSI_EXPAND},
159    {D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE,           OF_FLOAT, TGSI_EXPAND},
160    {D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT,      OF_FLOAT, TGSI_EXPAND},
161    {D3D10_SB_OPCODE_DCL_INPUT,                        OF_FLOAT, TGSI_EXPAND},
162    {D3D10_SB_OPCODE_DCL_INPUT_SGV,                    OF_FLOAT, TGSI_EXPAND},
163    {D3D10_SB_OPCODE_DCL_INPUT_SIV,                    OF_FLOAT, TGSI_EXPAND},
164    {D3D10_SB_OPCODE_DCL_INPUT_PS,                     OF_FLOAT, TGSI_EXPAND},
165    {D3D10_SB_OPCODE_DCL_INPUT_PS_SGV,                 OF_FLOAT, TGSI_EXPAND},
166    {D3D10_SB_OPCODE_DCL_INPUT_PS_SIV,                 OF_FLOAT, TGSI_EXPAND},
167    {D3D10_SB_OPCODE_DCL_OUTPUT,                       OF_FLOAT, TGSI_EXPAND},
168    {D3D10_SB_OPCODE_DCL_OUTPUT_SGV,                   OF_FLOAT, TGSI_EXPAND},
169    {D3D10_SB_OPCODE_DCL_OUTPUT_SIV,                   OF_FLOAT, TGSI_EXPAND},
170    {D3D10_SB_OPCODE_DCL_TEMPS,                        OF_FLOAT, TGSI_EXPAND},
171    {D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP,               OF_FLOAT, TGSI_EXPAND},
172    {D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS,                 OF_FLOAT, TGSI_LOG_UNSUPPORTED},
173    {D3D10_SB_OPCODE_RESERVED0,                        OF_FLOAT, TGSI_LOG_UNSUPPORTED},
174    {D3D10_1_SB_OPCODE_LOD,                            OF_FLOAT, TGSI_LOG_UNSUPPORTED},
175    {D3D10_1_SB_OPCODE_GATHER4,                        OF_FLOAT, TGSI_LOG_UNSUPPORTED},
176    {D3D10_1_SB_OPCODE_SAMPLE_POS,                     OF_FLOAT, TGSI_LOG_UNSUPPORTED},
177    {D3D10_1_SB_OPCODE_SAMPLE_INFO,                    OF_FLOAT, TGSI_LOG_UNSUPPORTED}
178 };
179 
180 #define SHADER_MAX_TEMPS 4096
181 #define SHADER_MAX_INPUTS 32
182 #define SHADER_MAX_OUTPUTS 32
183 #define SHADER_MAX_CONSTS 4096
184 #define SHADER_MAX_RESOURCES PIPE_MAX_SHADER_SAMPLER_VIEWS
185 #define SHADER_MAX_SAMPLERS PIPE_MAX_SAMPLERS
186 #define SHADER_MAX_INDEXABLE_TEMPS 4096
187 
188 struct Shader_call {
189    unsigned d3d_label;
190    unsigned tgsi_label_token;
191 };
192 
193 struct Shader_label {
194    unsigned d3d_label;
195    unsigned tgsi_insn_no;
196 };
197 
198 struct Shader_resource {
199    uint target;   /* TGSI_TEXTURE_x */
200 };
201 
202 struct Shader_xlate {
203    struct ureg_program *ureg;
204 
205    uint vertices_in;
206    uint declared_temps;
207 
208    struct ureg_dst temps[SHADER_MAX_TEMPS];
209    struct ureg_dst output_depth;
210    struct Shader_resource resources[SHADER_MAX_RESOURCES];
211    struct ureg_src sv[SHADER_MAX_RESOURCES];
212    struct ureg_src samplers[SHADER_MAX_SAMPLERS];
213    struct ureg_src imms;
214    struct ureg_src prim_id;
215 
216    uint temp_offset;
217    uint indexable_temp_offsets[SHADER_MAX_INDEXABLE_TEMPS];
218 
219    struct {
220       boolean declared;
221       uint    writemask;
222       uint    siv_name;
223       boolean overloaded;
224       struct ureg_src reg;
225    } inputs[SHADER_MAX_INPUTS];
226 
227    struct {
228       struct ureg_dst reg[4];
229    } outputs[SHADER_MAX_OUTPUTS];
230 
231    struct {
232       uint d3d;
233       uint tgsi;
234    } clip_distance_mapping[2], cull_distance_mapping[2];
235    uint num_clip_distances_declared;
236    uint num_cull_distances_declared;
237 
238    struct Shader_call *calls;
239    uint num_calls;
240    uint max_calls;
241    struct Shader_label *labels;
242    uint num_labels;
243    uint max_labels;
244 };
245 
246 static uint
translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation)247 translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation)
248 {
249    switch (interpolation) {
250    case D3D10_SB_INTERPOLATION_UNDEFINED:
251       assert(0);
252       return TGSI_INTERPOLATE_LINEAR;
253 
254    case D3D10_SB_INTERPOLATION_CONSTANT:
255       return TGSI_INTERPOLATE_CONSTANT;
256    case D3D10_SB_INTERPOLATION_LINEAR:
257       return TGSI_INTERPOLATE_PERSPECTIVE;
258    case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE:
259       return TGSI_INTERPOLATE_LINEAR;
260 
261    case D3D10_SB_INTERPOLATION_LINEAR_CENTROID:
262    case D3D10_SB_INTERPOLATION_LINEAR_SAMPLE: // DX10.1
263       LOG_UNSUPPORTED(TRUE);
264       return TGSI_INTERPOLATE_PERSPECTIVE;
265 
266    case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID:
267    case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: // DX10.1
268       LOG_UNSUPPORTED(TRUE);
269       return TGSI_INTERPOLATE_LINEAR;
270    }
271 
272    assert(0);
273    return TGSI_INTERPOLATE_LINEAR;
274 }
275 
276 static uint
translate_system_name(D3D10_SB_NAME name)277 translate_system_name(D3D10_SB_NAME name)
278 {
279    switch (name) {
280    case D3D10_SB_NAME_UNDEFINED:
281       assert(0);                /* should not happen */
282       return TGSI_SEMANTIC_GENERIC;
283    case D3D10_SB_NAME_POSITION:
284       return TGSI_SEMANTIC_POSITION;
285    case D3D10_SB_NAME_CLIP_DISTANCE:
286    case D3D10_SB_NAME_CULL_DISTANCE:
287       return TGSI_SEMANTIC_CLIPDIST;
288    case D3D10_SB_NAME_PRIMITIVE_ID:
289       return TGSI_SEMANTIC_PRIMID;
290    case D3D10_SB_NAME_INSTANCE_ID:
291       return TGSI_SEMANTIC_INSTANCEID;
292    case D3D10_SB_NAME_VERTEX_ID:
293       return TGSI_SEMANTIC_VERTEXID_NOBASE;
294    case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
295       return TGSI_SEMANTIC_VIEWPORT_INDEX;
296    case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
297       return TGSI_SEMANTIC_LAYER;
298    case D3D10_SB_NAME_IS_FRONT_FACE:
299       return TGSI_SEMANTIC_FACE;
300    case D3D10_SB_NAME_SAMPLE_INDEX:
301       LOG_UNSUPPORTED(TRUE);
302       return TGSI_SEMANTIC_GENERIC;
303    }
304 
305    assert(0);
306    return TGSI_SEMANTIC_GENERIC;
307 }
308 
309 static uint
translate_semantic_index(struct Shader_xlate * sx,D3D10_SB_NAME name,const struct Shader_dst_operand * operand)310 translate_semantic_index(struct Shader_xlate *sx,
311                          D3D10_SB_NAME name,
312                          const struct Shader_dst_operand *operand)
313 {
314    unsigned idx;
315    switch (name) {
316    case D3D10_SB_NAME_CLIP_DISTANCE:
317    case D3D10_SB_NAME_CULL_DISTANCE:
318       if (sx->clip_distance_mapping[0].d3d == operand->base.index[0].imm) {
319          idx = sx->clip_distance_mapping[0].tgsi;
320       } else {
321          assert(sx->clip_distance_mapping[1].d3d == operand->base.index[0].imm);
322          idx = sx->clip_distance_mapping[1].tgsi;
323       }
324       break;
325 /*   case D3D10_SB_NAME_CULL_DISTANCE:
326       if (sx->cull_distance_mapping[0].d3d == operand->base.index[0].imm) {
327          idx = sx->cull_distance_mapping[0].tgsi;
328       } else {
329          assert(sx->cull_distance_mapping[1].d3d == operand->base.index[0].imm);
330          idx = sx->cull_distance_mapping[1].tgsi;
331       }
332       break;*/
333    default:
334       idx = 0;
335    }
336    return idx;
337 }
338 
339 static enum tgsi_return_type
trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype)340 trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype) {
341    switch (d3drettype) {
342    case D3D10_SB_RETURN_TYPE_UNORM:
343       return TGSI_RETURN_TYPE_UNORM;
344    case D3D10_SB_RETURN_TYPE_SNORM:
345       return TGSI_RETURN_TYPE_SNORM;
346    case D3D10_SB_RETURN_TYPE_SINT:
347       return TGSI_RETURN_TYPE_SINT;
348    case D3D10_SB_RETURN_TYPE_UINT:
349       return TGSI_RETURN_TYPE_UINT;
350    case D3D10_SB_RETURN_TYPE_FLOAT:
351       return TGSI_RETURN_TYPE_FLOAT;
352    case D3D10_SB_RETURN_TYPE_MIXED:
353    default:
354       LOG_UNSUPPORTED(TRUE);
355       return TGSI_RETURN_TYPE_FLOAT;
356    }
357 }
358 
359 static void
declare_vertices_in(struct Shader_xlate * sx,unsigned in)360 declare_vertices_in(struct Shader_xlate *sx,
361                     unsigned in)
362 {
363    /* Make sure vertices_in is consistent with input primitive
364     * and other input declarations.
365     */
366    if (sx->vertices_in) {
367       assert(sx->vertices_in == in);
368    } else {
369       sx->vertices_in = in;
370    }
371 }
372 
373 struct swizzle_mapping {
374    unsigned x;
375    unsigned y;
376    unsigned z;
377    unsigned w;
378 };
379 
380 /* mapping of writmask to swizzles */
381 static const struct swizzle_mapping writemask_to_swizzle[] = {
382    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_NONE
383    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_X
384    { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_Y
385    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_XY
386    { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_Z
387    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_XZ
388    { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_YZ
389    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_XYZ
390    { TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_W
391    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XW
392    { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YW
393    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYW
394    { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_ZW
395    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XZW
396    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YZW
397    { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYZW
398 };
399 
400 static struct ureg_src
swizzle_reg(struct ureg_src src,uint writemask,unsigned siv_name)401 swizzle_reg(struct ureg_src src, uint writemask,
402             unsigned siv_name)
403 {
404    switch (siv_name) {
405    case D3D10_SB_NAME_PRIMITIVE_ID:
406    case D3D10_SB_NAME_INSTANCE_ID:
407    case D3D10_SB_NAME_VERTEX_ID:
408    case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
409    case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
410    case D3D10_SB_NAME_IS_FRONT_FACE:
411       return ureg_scalar(src, TGSI_SWIZZLE_X);
412    default: {
413       const struct swizzle_mapping *swizzle =
414          &writemask_to_swizzle[writemask];
415       return ureg_swizzle(src, swizzle->x, swizzle->y,
416                           swizzle->z, swizzle->w);
417    }
418    }
419 }
420 
421 static void
dcl_base_output(struct Shader_xlate * sx,struct ureg_program * ureg,struct ureg_dst reg,const struct Shader_dst_operand * operand)422 dcl_base_output(struct Shader_xlate *sx,
423                 struct ureg_program *ureg,
424                 struct ureg_dst reg,
425                 const struct Shader_dst_operand *operand)
426 {
427    unsigned writemask =
428       operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
429    unsigned idx = operand->base.index[0].imm;
430    unsigned i;
431 
432    if (!writemask) {
433       sx->outputs[idx].reg[0] = reg;
434       sx->outputs[idx].reg[1] = reg;
435       sx->outputs[idx].reg[2] = reg;
436       sx->outputs[idx].reg[3] = reg;
437       return;
438    }
439 
440    for (i = 0; i < 4; ++i) {
441       unsigned mask = 1 << i;
442       if ((writemask & mask)) {
443          sx->outputs[idx].reg[i] = reg;
444       }
445    }
446 }
447 
448 static void
dcl_base_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * operand,struct ureg_src dcl_reg,uint index,uint siv_name)449 dcl_base_input(struct Shader_xlate *sx,
450                struct ureg_program *ureg,
451                const struct Shader_dst_operand *operand,
452                struct ureg_src dcl_reg,
453                uint index,
454                uint siv_name)
455 {
456    unsigned writemask =
457       operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
458 
459    if (sx->inputs[index].declared && !sx->inputs[index].overloaded) {
460       struct ureg_dst temp = ureg_DECL_temporary(sx->ureg);
461 
462       ureg_MOV(ureg,
463                ureg_writemask(temp, sx->inputs[index].writemask),
464                swizzle_reg(sx->inputs[index].reg, sx->inputs[index].writemask,
465                            sx->inputs[index].siv_name));
466       ureg_MOV(ureg, ureg_writemask(temp, writemask),
467                swizzle_reg(dcl_reg, writemask, siv_name));
468       sx->inputs[index].reg = ureg_src(temp);
469       sx->inputs[index].overloaded = TRUE;
470       sx->inputs[index].writemask |= writemask;
471    } else if (sx->inputs[index].overloaded) {
472       struct ureg_dst temp = ureg_dst(sx->inputs[index].reg);
473       ureg_MOV(ureg, ureg_writemask(temp, writemask),
474                swizzle_reg(dcl_reg, writemask, siv_name));
475       sx->inputs[index].writemask |= writemask;
476    } else {
477       assert(!sx->inputs[index].declared);
478 
479       sx->inputs[index].reg = dcl_reg;
480       sx->inputs[index].declared = TRUE;
481       sx->inputs[index].writemask = writemask;
482       sx->inputs[index].siv_name = siv_name;
483    }
484 }
485 
486 static void
dcl_vs_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst)487 dcl_vs_input(struct Shader_xlate *sx,
488              struct ureg_program *ureg,
489              const struct Shader_dst_operand *dst)
490 {
491    struct ureg_src reg;
492    assert(dst->base.index_dim == 1);
493    assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
494 
495    reg = ureg_DECL_vs_input(ureg, dst->base.index[0].imm);
496 
497    dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
498                   D3D10_SB_NAME_UNDEFINED);
499 }
500 
501 static void
dcl_gs_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst)502 dcl_gs_input(struct Shader_xlate *sx,
503              struct ureg_program *ureg,
504              const struct Shader_dst_operand *dst)
505 {
506    if (dst->base.index_dim == 2) {
507       assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
508 
509       declare_vertices_in(sx, dst->base.index[0].imm);
510 
511       /* XXX: Implement declaration masks in gallium.
512        */
513       if (!sx->inputs[dst->base.index[1].imm].reg.File) {
514          struct ureg_src reg =
515             ureg_DECL_input(ureg,
516                             TGSI_SEMANTIC_GENERIC,
517                             dst->base.index[1].imm,
518                             0, 1);
519          dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
520                         D3D10_SB_NAME_UNDEFINED);
521       }
522    } else {
523       assert(dst->base.type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID);
524       assert(dst->base.index_dim == 0);
525 
526       sx->prim_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_PRIMID, 0);
527    }
528 }
529 
530 static void
dcl_sgv_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_siv_name)531 dcl_sgv_input(struct Shader_xlate *sx,
532               struct ureg_program *ureg,
533               const struct Shader_dst_operand *dst,
534               uint dcl_siv_name)
535 {
536    struct ureg_src reg;
537    assert(dst->base.index_dim == 1);
538    assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
539 
540    reg = ureg_DECL_system_value(ureg, translate_system_name(dcl_siv_name), 0);
541 
542    dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
543                   dcl_siv_name);
544 }
545 
546 static void
dcl_siv_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_siv_name)547 dcl_siv_input(struct Shader_xlate *sx,
548               struct ureg_program *ureg,
549               const struct Shader_dst_operand *dst,
550               uint dcl_siv_name)
551 {
552    struct ureg_src reg;
553    assert(dst->base.index_dim == 2);
554    assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
555 
556    declare_vertices_in(sx, dst->base.index[0].imm);
557 
558    reg = ureg_DECL_input(ureg,
559                          translate_system_name(dcl_siv_name), 0,
560                          0, 1);
561 
562    dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
563                   dcl_siv_name);
564 }
565 
566 static void
dcl_ps_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_in_ps_interp)567 dcl_ps_input(struct Shader_xlate *sx,
568              struct ureg_program *ureg,
569              const struct Shader_dst_operand *dst,
570              uint dcl_in_ps_interp)
571 {
572    struct ureg_src reg;
573    assert(dst->base.index_dim == 1);
574    assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
575 
576    reg = ureg_DECL_fs_input(ureg,
577                             TGSI_SEMANTIC_GENERIC,
578                             dst->base.index[0].imm,
579                             translate_interpolation(dcl_in_ps_interp));
580 
581    dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
582                   D3D10_SB_NAME_UNDEFINED);
583 }
584 
585 static void
dcl_ps_sgv_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_siv_name)586 dcl_ps_sgv_input(struct Shader_xlate *sx,
587                  struct ureg_program *ureg,
588                  const struct Shader_dst_operand *dst,
589                  uint dcl_siv_name)
590 {
591    struct ureg_src reg;
592    assert(dst->base.index_dim == 1);
593    assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
594 
595    if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
596       ureg_property(ureg,
597                     TGSI_PROPERTY_FS_COORD_ORIGIN,
598                     TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
599       ureg_property(ureg,
600                     TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
601                     TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
602    }
603 
604    reg = ureg_DECL_fs_input(ureg,
605                             translate_system_name(dcl_siv_name),
606                             0,
607                             TGSI_INTERPOLATE_CONSTANT);
608 
609    if (dcl_siv_name == D3D10_SB_NAME_IS_FRONT_FACE) {
610       /* We need to map gallium's front_face to the one expected
611        * by D3D10 */
612       struct ureg_dst tmp = ureg_DECL_temporary(ureg);
613 
614       tmp = ureg_writemask(tmp, TGSI_WRITEMASK_X);
615 
616       ureg_CMP(ureg, tmp, reg,
617                ureg_imm1i(ureg, 0), ureg_imm1i(ureg, -1));
618 
619       reg = ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X);
620    }
621 
622    dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
623                   dcl_siv_name);
624 }
625 
626 static void
dcl_ps_siv_input(struct Shader_xlate * sx,struct ureg_program * ureg,const struct Shader_dst_operand * dst,uint dcl_siv_name,uint dcl_in_ps_interp)627 dcl_ps_siv_input(struct Shader_xlate *sx,
628                  struct ureg_program *ureg,
629                  const struct Shader_dst_operand *dst,
630                  uint dcl_siv_name, uint dcl_in_ps_interp)
631 {
632    struct ureg_src reg;
633    assert(dst->base.index_dim == 1);
634    assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
635 
636    reg = ureg_DECL_fs_input(ureg,
637                             translate_system_name(dcl_siv_name),
638                             0,
639                             translate_interpolation(dcl_in_ps_interp));
640 
641    if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
642       /* D3D10 expects reciprocal of interpolated 1/w as 4th component,
643        * gallium/GL just interpolated 1/w */
644       struct ureg_dst tmp = ureg_DECL_temporary(ureg);
645 
646       ureg_MOV(ureg, tmp, reg);
647       ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W),
648                ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W));
649       reg = ureg_src(tmp);
650    }
651 
652    dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
653                   dcl_siv_name);
654 }
655 
656 static struct ureg_src
translate_relative_operand(struct Shader_xlate * sx,const struct Shader_relative_operand * operand)657 translate_relative_operand(struct Shader_xlate *sx,
658                            const struct Shader_relative_operand *operand)
659 {
660    struct ureg_src reg;
661 
662    switch (operand->type) {
663    case D3D10_SB_OPERAND_TYPE_TEMP:
664       assert(operand->index[0].imm < SHADER_MAX_TEMPS);
665 
666       reg = ureg_src(sx->temps[sx->temp_offset + operand->index[0].imm]);
667       break;
668 
669    case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
670       reg = sx->prim_id;
671       break;
672 
673    case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
674       assert(operand->index[1].imm < SHADER_MAX_TEMPS);
675 
676       reg = ureg_src(sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
677             operand->index[1].imm]);
678       break;
679 
680    case D3D10_SB_OPERAND_TYPE_INPUT:
681    case D3D10_SB_OPERAND_TYPE_OUTPUT:
682    case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
683    case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
684    case D3D10_SB_OPERAND_TYPE_SAMPLER:
685    case D3D10_SB_OPERAND_TYPE_RESOURCE:
686    case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
687    case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
688    case D3D10_SB_OPERAND_TYPE_LABEL:
689    case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
690    case D3D10_SB_OPERAND_TYPE_NULL:
691    case D3D10_SB_OPERAND_TYPE_RASTERIZER:
692    case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
693       LOG_UNSUPPORTED(TRUE);
694       reg = ureg_src(ureg_DECL_temporary(sx->ureg));
695       break;
696 
697    default:
698       assert(0);                /* should never happen */
699       reg = ureg_src(ureg_DECL_temporary(sx->ureg));
700    }
701 
702    reg = ureg_scalar(reg, operand->comp);
703    return reg;
704 }
705 
706 static struct ureg_dst
translate_operand(struct Shader_xlate * sx,const struct Shader_operand * operand,unsigned writemask)707 translate_operand(struct Shader_xlate *sx,
708                   const struct Shader_operand *operand,
709                   unsigned writemask)
710 {
711    struct ureg_dst reg;
712 
713    switch (operand->type) {
714    case D3D10_SB_OPERAND_TYPE_TEMP:
715       assert(operand->index_dim == 1);
716       assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
717       assert(operand->index[0].imm < SHADER_MAX_TEMPS);
718 
719       reg = sx->temps[sx->temp_offset + operand->index[0].imm];
720       break;
721 
722    case D3D10_SB_OPERAND_TYPE_OUTPUT:
723       assert(operand->index_dim == 1);
724       assert(operand->index[0].imm < SHADER_MAX_OUTPUTS);
725 
726       if (operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {
727          if (!writemask) {
728             reg = sx->outputs[operand->index[0].imm].reg[0];
729          } else {
730             unsigned i;
731             for (i = 0; i < 4; ++i) {
732                unsigned mask = 1 << i;
733                if ((writemask & mask)) {
734                   reg = sx->outputs[operand->index[0].imm].reg[i];
735                   break;
736                }
737             }
738          }
739       } else {
740          struct ureg_src addr =
741             translate_relative_operand(sx, &operand->index[0].rel);
742          assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
743          reg = ureg_dst_indirect(sx->outputs[operand->index[0].imm].reg[0], addr);
744       }
745       break;
746 
747    case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
748       assert(operand->index_dim == 0);
749 
750       reg = sx->output_depth;
751       break;
752 
753    case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
754       assert(operand->index_dim == 0);
755 
756       reg = ureg_dst(sx->prim_id);
757       break;
758 
759    case D3D10_SB_OPERAND_TYPE_INPUT:
760    case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
761    case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
762    case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
763    case D3D10_SB_OPERAND_TYPE_SAMPLER:
764    case D3D10_SB_OPERAND_TYPE_RESOURCE:
765    case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
766    case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
767    case D3D10_SB_OPERAND_TYPE_LABEL:
768    case D3D10_SB_OPERAND_TYPE_NULL:
769    case D3D10_SB_OPERAND_TYPE_RASTERIZER:
770    case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
771       /* XXX: Translate more operands types.
772        */
773       LOG_UNSUPPORTED(TRUE);
774       reg = ureg_DECL_temporary(sx->ureg);
775    }
776 
777    return reg;
778 }
779 
780 static struct ureg_src
translate_indexable_temp(struct Shader_xlate * sx,const struct Shader_operand * operand)781 translate_indexable_temp(struct Shader_xlate *sx,
782                          const struct Shader_operand *operand)
783 {
784    struct ureg_src reg;
785    switch (operand->index[1].index_rep) {
786    case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
787       reg = ureg_src(
788          sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
789                    operand->index[1].imm]);
790       break;
791    case D3D10_SB_OPERAND_INDEX_RELATIVE:
792       reg = ureg_src_indirect(
793          ureg_src(sx->temps[
794                      sx->indexable_temp_offsets[operand->index[0].imm]]),
795          translate_relative_operand(sx,
796                                     &operand->index[1].rel));
797       break;
798    case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
799       reg = ureg_src_indirect(
800          ureg_src(sx->temps[
801                      operand->index[1].imm +
802                      sx->indexable_temp_offsets[operand->index[0].imm]]),
803          translate_relative_operand(sx,
804                                     &operand->index[1].rel));
805       break;
806    default:
807       /* XXX: Other index representations.
808        */
809       LOG_UNSUPPORTED(TRUE);
810       reg = ureg_src(ureg_DECL_temporary(sx->ureg));
811    }
812    return reg;
813 }
814 
815 static struct ureg_dst
translate_dst_operand(struct Shader_xlate * sx,const struct Shader_dst_operand * operand,boolean saturate)816 translate_dst_operand(struct Shader_xlate *sx,
817                       const struct Shader_dst_operand *operand,
818                       boolean saturate)
819 {
820    struct ureg_dst reg;
821    unsigned writemask =
822       operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
823 
824    assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT) == 4);
825    assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_X >> 4) == TGSI_WRITEMASK_X);
826    assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Y >> 4) == TGSI_WRITEMASK_Y);
827    assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Z >> 4) == TGSI_WRITEMASK_Z);
828    assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_W >> 4) == TGSI_WRITEMASK_W);
829 
830    switch (operand->base.type) {
831    case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
832       assert(operand->base.index_dim == 2);
833       assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
834       assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
835 
836       reg = ureg_dst(translate_indexable_temp(sx, &operand->base));
837       break;
838 
839    default:
840       reg = translate_operand(sx, &operand->base, writemask);
841    }
842 
843    /* oDepth often has an empty writemask */
844    if (operand->base.type != D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
845       reg = ureg_writemask(reg, writemask);
846    }
847 
848    if (saturate) {
849       reg = ureg_saturate(reg);
850    }
851 
852    return reg;
853 }
854 
855 static struct ureg_src
translate_src_operand(struct Shader_xlate * sx,const struct Shader_src_operand * operand,const enum dx10_opcode_format format)856 translate_src_operand(struct Shader_xlate *sx,
857                       const struct Shader_src_operand *operand,
858                       const enum dx10_opcode_format format)
859 {
860    struct ureg_src reg;
861 
862    switch (operand->base.type) {
863    case D3D10_SB_OPERAND_TYPE_INPUT:
864       if (operand->base.index_dim == 1) {
865          switch (operand->base.index[0].index_rep) {
866          case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
867             assert(operand->base.index[0].imm < SHADER_MAX_INPUTS);
868             reg = sx->inputs[operand->base.index[0].imm].reg;
869             break;
870          case D3D10_SB_OPERAND_INDEX_RELATIVE: {
871             struct ureg_src tmp =
872                translate_relative_operand(sx, &operand->base.index[0].rel);
873             reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
874          }
875             break;
876          case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
877             struct ureg_src tmp =
878                translate_relative_operand(sx, &operand->base.index[0].rel);
879             reg = ureg_src_indirect(sx->inputs[operand->base.index[0].imm].reg, tmp);
880          }
881             break;
882          default:
883             /* XXX: Other index representations.
884              */
885             LOG_UNSUPPORTED(TRUE);
886 
887          }
888       } else {
889          assert(operand->base.index_dim == 2);
890          assert(operand->base.index[1].imm < SHADER_MAX_INPUTS);
891 
892          switch (operand->base.index[1].index_rep) {
893          case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
894             reg = sx->inputs[operand->base.index[1].imm].reg;
895             break;
896          case D3D10_SB_OPERAND_INDEX_RELATIVE: {
897             struct ureg_src tmp =
898                translate_relative_operand(sx, &operand->base.index[1].rel);
899             reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
900          }
901             break;
902          case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
903             struct ureg_src tmp =
904                translate_relative_operand(sx, &operand->base.index[1].rel);
905             reg = ureg_src_indirect(sx->inputs[operand->base.index[1].imm].reg, tmp);
906          }
907             break;
908          default:
909             /* XXX: Other index representations.
910              */
911             LOG_UNSUPPORTED(TRUE);
912          }
913 
914          switch (operand->base.index[0].index_rep) {
915          case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
916             reg = ureg_src_dimension(reg, operand->base.index[0].imm);
917             break;
918          case D3D10_SB_OPERAND_INDEX_RELATIVE:{
919             struct ureg_src tmp =
920                translate_relative_operand(sx, &operand->base.index[0].rel);
921             reg = ureg_src_dimension_indirect(reg, tmp, 0);
922          }
923             break;
924          case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
925             struct ureg_src tmp =
926                translate_relative_operand(sx, &operand->base.index[0].rel);
927             reg = ureg_src_dimension_indirect(reg, tmp, operand->base.index[0].imm);
928          }
929             break;
930          default:
931             /* XXX: Other index representations.
932              */
933             LOG_UNSUPPORTED(TRUE);
934          }
935       }
936       break;
937 
938    case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
939       assert(operand->base.index_dim == 2);
940       assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
941       assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
942 
943       reg = translate_indexable_temp(sx, &operand->base);
944       break;
945 
946    case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
947       switch (format) {
948       case OF_FLOAT:
949          reg = ureg_imm4f(sx->ureg,
950                           operand->imm[0].f32,
951                           operand->imm[1].f32,
952                           operand->imm[2].f32,
953                           operand->imm[3].f32);
954          break;
955       case OF_INT:
956          reg = ureg_imm4i(sx->ureg,
957                           operand->imm[0].i32,
958                           operand->imm[1].i32,
959                           operand->imm[2].i32,
960                           operand->imm[3].i32);
961          break;
962       case OF_UINT:
963          reg = ureg_imm4u(sx->ureg,
964                           operand->imm[0].u32,
965                           operand->imm[1].u32,
966                           operand->imm[2].u32,
967                           operand->imm[3].u32);
968          break;
969       default:
970          assert(0);
971          reg = ureg_src(ureg_DECL_temporary(sx->ureg));
972       }
973       break;
974 
975    case D3D10_SB_OPERAND_TYPE_SAMPLER:
976       assert(operand->base.index_dim == 1);
977       assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
978       assert(operand->base.index[0].imm < SHADER_MAX_SAMPLERS);
979 
980       reg = sx->samplers[operand->base.index[0].imm];
981       break;
982 
983    case D3D10_SB_OPERAND_TYPE_RESOURCE:
984       assert(operand->base.index_dim == 1);
985       assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
986       assert(operand->base.index[0].imm < SHADER_MAX_RESOURCES);
987 
988       reg = sx->sv[operand->base.index[0].imm];
989       break;
990 
991    case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
992       assert(operand->base.index_dim == 2);
993 
994       assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
995       assert(operand->base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
996 
997       switch (operand->base.index[1].index_rep) {
998       case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
999          assert(operand->base.index[1].imm < SHADER_MAX_CONSTS);
1000 
1001          reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
1002          reg = ureg_src_dimension(reg, operand->base.index[0].imm);
1003          break;
1004       case D3D10_SB_OPERAND_INDEX_RELATIVE:
1005       case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1006          reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
1007          reg = ureg_src_indirect(
1008             reg,
1009             translate_relative_operand(sx, &operand->base.index[1].rel));
1010          reg = ureg_src_dimension(reg, operand->base.index[0].imm);
1011          break;
1012       default:
1013          /* XXX: Other index representations.
1014           */
1015          LOG_UNSUPPORTED(TRUE);
1016       }
1017 
1018       break;
1019 
1020    case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
1021       assert(operand->base.index_dim == 1);
1022 
1023       switch (operand->base.index[0].index_rep) {
1024       case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
1025          reg = sx->imms;
1026          reg.Index += operand->base.index[0].imm;
1027          break;
1028       case D3D10_SB_OPERAND_INDEX_RELATIVE:
1029       case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1030          reg = sx->imms;
1031          reg.Index += operand->base.index[0].imm;
1032          reg = ureg_src_indirect(
1033             sx->imms,
1034             translate_relative_operand(sx, &operand->base.index[0].rel));
1035          break;
1036       default:
1037          /* XXX: Other index representations.
1038           */
1039          LOG_UNSUPPORTED(TRUE);
1040       }
1041       break;
1042 
1043    case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
1044       reg = sx->prim_id;
1045       break;
1046 
1047    default:
1048       reg = ureg_src(translate_operand(sx, &operand->base, 0));
1049    }
1050 
1051    reg = ureg_swizzle(reg,
1052                       operand->swizzle[0],
1053                       operand->swizzle[1],
1054                       operand->swizzle[2],
1055                       operand->swizzle[3]);
1056 
1057    switch (operand->modifier) {
1058    case D3D10_SB_OPERAND_MODIFIER_NONE:
1059       break;
1060    case D3D10_SB_OPERAND_MODIFIER_NEG:
1061       reg = ureg_negate(reg);
1062       break;
1063    case D3D10_SB_OPERAND_MODIFIER_ABS:
1064       reg = ureg_abs(reg);
1065       break;
1066    case D3D10_SB_OPERAND_MODIFIER_ABSNEG:
1067       reg = ureg_negate(ureg_abs(reg));
1068       break;
1069    default:
1070       assert(0);
1071    }
1072 
1073    return reg;
1074 }
1075 
1076 static uint
translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim)1077 translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim)
1078 {
1079    switch (dim) {
1080    case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN:
1081       return TGSI_TEXTURE_UNKNOWN;
1082    case D3D10_SB_RESOURCE_DIMENSION_BUFFER:
1083       return TGSI_TEXTURE_BUFFER;
1084    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D:
1085       return TGSI_TEXTURE_1D;
1086    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D:
1087       return TGSI_TEXTURE_2D;
1088    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS:
1089       return TGSI_TEXTURE_2D_MSAA;
1090    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D:
1091       return TGSI_TEXTURE_3D;
1092    case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE:
1093       return TGSI_TEXTURE_CUBE;
1094    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY:
1095       return TGSI_TEXTURE_1D_ARRAY;
1096    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY:
1097       return TGSI_TEXTURE_2D_ARRAY;
1098    case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
1099       return TGSI_TEXTURE_2D_ARRAY_MSAA;
1100    case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY:
1101       return TGSI_TEXTURE_CUBE_ARRAY;
1102    default:
1103       assert(0);
1104       return TGSI_TEXTURE_UNKNOWN;
1105    }
1106 }
1107 
1108 static uint
texture_dim_from_tgsi_target(unsigned tgsi_target)1109 texture_dim_from_tgsi_target(unsigned tgsi_target)
1110 {
1111    switch (tgsi_target) {
1112    case TGSI_TEXTURE_BUFFER:
1113    case TGSI_TEXTURE_1D:
1114    case TGSI_TEXTURE_1D_ARRAY:
1115       return 1;
1116    case TGSI_TEXTURE_2D:
1117    case TGSI_TEXTURE_2D_MSAA:
1118    case TGSI_TEXTURE_CUBE:
1119    case TGSI_TEXTURE_2D_ARRAY:
1120    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1121       return 2;
1122    case TGSI_TEXTURE_3D:
1123       return 3;
1124    case TGSI_TEXTURE_UNKNOWN:
1125    default:
1126       assert(0);
1127       return 1;
1128    }
1129 }
1130 
1131 static boolean
operand_is_scalar(const struct Shader_src_operand * operand)1132 operand_is_scalar(const struct Shader_src_operand *operand)
1133 {
1134    return operand->swizzle[0] == operand->swizzle[1] &&
1135           operand->swizzle[1] == operand->swizzle[2] &&
1136           operand->swizzle[2] == operand->swizzle[3];
1137 }
1138 
1139 static void
Shader_add_call(struct Shader_xlate * sx,unsigned d3d_label,unsigned tgsi_label_token)1140 Shader_add_call(struct Shader_xlate *sx,
1141                 unsigned d3d_label,
1142                 unsigned tgsi_label_token)
1143 {
1144    ASSERT(sx->num_calls < sx->max_calls);
1145 
1146    sx->calls[sx->num_calls].d3d_label = d3d_label;
1147    sx->calls[sx->num_calls].tgsi_label_token = tgsi_label_token;
1148    sx->num_calls++;
1149 }
1150 
1151 static void
Shader_add_label(struct Shader_xlate * sx,unsigned d3d_label,unsigned tgsi_insn_no)1152 Shader_add_label(struct Shader_xlate *sx,
1153                  unsigned d3d_label,
1154                  unsigned tgsi_insn_no)
1155 {
1156    ASSERT(sx->num_labels < sx->max_labels);
1157 
1158    sx->labels[sx->num_labels].d3d_label = d3d_label;
1159    sx->labels[sx->num_labels].tgsi_insn_no = tgsi_insn_no;
1160    sx->num_labels++;
1161 }
1162 
1163 
1164 static void
sample_ureg_emit(struct ureg_program * ureg,unsigned tgsi_opcode,unsigned num_src,struct Shader_opcode * opcode,struct ureg_dst dst,struct ureg_src * src)1165 sample_ureg_emit(struct ureg_program *ureg,
1166                  unsigned tgsi_opcode,
1167                  unsigned num_src,
1168                  struct Shader_opcode *opcode,
1169                  struct ureg_dst dst,
1170                  struct ureg_src *src)
1171 {
1172    unsigned num_offsets = 0;
1173    struct tgsi_texture_offset texoffsets;
1174 
1175    memset(&texoffsets, 0, sizeof texoffsets);
1176 
1177    if (opcode->imm_texel_offset.u ||
1178        opcode->imm_texel_offset.v ||
1179        opcode->imm_texel_offset.w) {
1180       struct ureg_src offsetreg;
1181       num_offsets = 1;
1182       /* don't actually always need all 3 values */
1183       offsetreg = ureg_imm3i(ureg,
1184                              opcode->imm_texel_offset.u,
1185                              opcode->imm_texel_offset.v,
1186                              opcode->imm_texel_offset.w);
1187       texoffsets.File = offsetreg.File;
1188       texoffsets.Index = offsetreg.Index;
1189       texoffsets.SwizzleX = offsetreg.SwizzleX;
1190       texoffsets.SwizzleY = offsetreg.SwizzleY;
1191       texoffsets.SwizzleZ = offsetreg.SwizzleZ;
1192    }
1193 
1194    ureg_tex_insn(ureg,
1195                  tgsi_opcode,
1196                  &dst, 1,
1197                  TGSI_TEXTURE_UNKNOWN,
1198                  TGSI_RETURN_TYPE_UNKNOWN,
1199                  &texoffsets, num_offsets,
1200                  src, num_src);
1201 }
1202 
1203 typedef void (*unary_ureg_func)(struct ureg_program *ureg, struct ureg_dst dst,
1204                                 struct ureg_src src);
1205 static void
expand_unary_to_scalarf(struct ureg_program * ureg,unary_ureg_func func,struct Shader_xlate * sx,struct Shader_opcode * opcode)1206 expand_unary_to_scalarf(struct ureg_program *ureg, unary_ureg_func func,
1207                         struct Shader_xlate *sx, struct Shader_opcode *opcode)
1208 {
1209    struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1210    struct ureg_dst dst = translate_dst_operand(sx, &opcode->dst[0],
1211                                                opcode->saturate);
1212    struct ureg_src src = translate_src_operand(sx, &opcode->src[0], OF_FLOAT);
1213    struct ureg_dst scalar_dst;
1214    ureg_MOV(ureg, tmp, src);
1215    src = ureg_src(tmp);
1216 
1217    scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_X);
1218    if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1219       func(ureg, scalar_dst,
1220            ureg_scalar(src, TGSI_SWIZZLE_X));
1221    }
1222    scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Y);
1223    if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1224       func(ureg, scalar_dst,
1225            ureg_scalar(src, TGSI_SWIZZLE_Y));
1226    }
1227    scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Z);
1228    if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1229       func(ureg, scalar_dst,
1230            ureg_scalar(src, TGSI_SWIZZLE_Z));
1231    }
1232    scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_W);
1233    if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1234       func(ureg, scalar_dst,
1235            ureg_scalar(src, TGSI_SWIZZLE_W));
1236    }
1237    ureg_release_temporary(ureg, tmp);
1238 }
1239 
1240 const struct tgsi_token *
Shader_tgsi_translate(const unsigned * code,unsigned * output_mapping)1241 Shader_tgsi_translate(const unsigned *code,
1242                       unsigned *output_mapping)
1243 {
1244    struct Shader_xlate sx;
1245    struct Shader_parser parser;
1246    struct ureg_program *ureg = NULL;
1247    struct Shader_opcode opcode;
1248    const struct tgsi_token *tokens = NULL;
1249    uint nr_tokens;
1250    boolean shader_dumped = FALSE;
1251    boolean inside_sub = FALSE;
1252    uint i, j;
1253 
1254    memset(&sx, 0, sizeof sx);
1255 
1256    Shader_parse_init(&parser, code);
1257 
1258    if (st_debug & ST_DEBUG_TGSI) {
1259       dx10_shader_dump_tokens(code);
1260       shader_dumped = TRUE;
1261    }
1262 
1263    sx.max_calls = 64;
1264    sx.calls = (struct Shader_call *)MALLOC(sx.max_calls *
1265                                            sizeof(struct Shader_call));
1266    sx.num_calls = 0;
1267 
1268    sx.max_labels = 64;
1269    sx.labels = (struct Shader_label *)MALLOC(sx.max_labels *
1270                                              sizeof(struct Shader_call));
1271    sx.num_labels = 0;
1272 
1273 
1274 
1275    /* Header. */
1276    switch (parser.header.type) {
1277    case D3D10_SB_PIXEL_SHADER:
1278       ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1279       break;
1280    case D3D10_SB_VERTEX_SHADER:
1281       ureg = ureg_create(PIPE_SHADER_VERTEX);
1282       break;
1283    case D3D10_SB_GEOMETRY_SHADER:
1284       ureg = ureg_create(PIPE_SHADER_GEOMETRY);
1285       break;
1286    }
1287 
1288    assert(ureg);
1289    sx.ureg = ureg;
1290 
1291    while (Shader_parse_opcode(&parser, &opcode)) {
1292       const struct dx10_opcode_xlate *ox;
1293 
1294       assert(opcode.type < D3D10_SB_NUM_OPCODES);
1295       ox = &opcode_xlate[opcode.type];
1296 
1297       switch (opcode.type) {
1298       case D3D10_SB_OPCODE_EXP:
1299          expand_unary_to_scalarf(ureg, ureg_EX2, &sx, &opcode);
1300          break;
1301       case D3D10_SB_OPCODE_SQRT:
1302          expand_unary_to_scalarf(ureg, ureg_SQRT, &sx, &opcode);
1303          break;
1304       case D3D10_SB_OPCODE_RSQ:
1305          expand_unary_to_scalarf(ureg, ureg_RSQ, &sx, &opcode);
1306          break;
1307       case D3D10_SB_OPCODE_LOG:
1308          expand_unary_to_scalarf(ureg, ureg_LG2, &sx, &opcode);
1309          break;
1310       case D3D10_SB_OPCODE_IMUL:
1311          if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1312             ureg_IMUL_HI(ureg,
1313                         translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1314                         translate_src_operand(&sx, &opcode.src[0], OF_INT),
1315                         translate_src_operand(&sx, &opcode.src[1], OF_INT));
1316          }
1317 
1318          if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1319             ureg_UMUL(ureg,
1320                       translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate),
1321                       translate_src_operand(&sx, &opcode.src[0], OF_INT),
1322                       translate_src_operand(&sx, &opcode.src[1], OF_INT));
1323          }
1324 
1325          break;
1326 
1327       case D3D10_SB_OPCODE_FTOI: {
1328          /* XXX: tgsi (and just about everybody else, c, opencl, glsl) has
1329           * out-of-range (and NaN) values undefined for f2i/f2u, but d3d10
1330           * requires clamping to min and max representable value (as well as 0
1331           * for NaNs) (this applies to both ftoi and ftou). At least the online
1332           * docs state that - this is consistent with generic d3d10 conversion
1333           * rules.
1334           * For FTOI, we cheat a bit here - in particular depending on noone
1335           * caring about NaNs, and depending on the (undefined!) behavior of
1336           * F2I returning 0x80000000 for too negative values (which works with
1337           * x86 sse). Hence only need to clamp too positive values.
1338           * Note that it is impossible to clamp using a float, since 2^31 - 1
1339           * is not exactly representable with a float.
1340           */
1341          struct ureg_dst too_large = ureg_DECL_temporary(ureg);
1342          struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1343          ureg_FSGE(ureg, too_large,
1344                    translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1345                    ureg_imm1f(ureg, 2147483648.0f));
1346          ureg_F2I(ureg, tmp,
1347                   translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1348          ureg_UCMP(ureg,
1349                    translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1350                    ureg_src(too_large),
1351                    ureg_imm1i(ureg, 0x7fffffff),
1352                    ureg_src(tmp));
1353          ureg_release_temporary(ureg, too_large);
1354          ureg_release_temporary(ureg, tmp);
1355       }
1356          break;
1357 
1358       case D3D10_SB_OPCODE_FTOU: {
1359          /* For ftou, we need to do both clamps, which as a bonus also
1360           * gets us correct NaN behavior.
1361           * Note that it is impossible to clamp using a float against the upper
1362           * limit, since 2^32 - 1 is not exactly representable with a float,
1363           * but the clamp against 0.0 certainly works just fine.
1364           */
1365          struct ureg_dst too_large = ureg_DECL_temporary(ureg);
1366          struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1367          ureg_FSGE(ureg, too_large,
1368                    translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1369                    ureg_imm1f(ureg, 4294967296.0f));
1370          /* clamp negative values + NaN to zero.
1371           * (Could be done slightly more efficient in llvmpipe due to
1372           * MAX NaN behavior handling.)
1373           */
1374          ureg_MAX(ureg, tmp,
1375                   ureg_imm1f(ureg, 0.0f),
1376                   translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1377          ureg_F2U(ureg, tmp,
1378                   ureg_src(tmp));
1379          ureg_UCMP(ureg,
1380                    translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1381                    ureg_src(too_large),
1382                    ureg_imm1u(ureg, 0xffffffff),
1383                    ureg_src(tmp));
1384          ureg_release_temporary(ureg, too_large);
1385          ureg_release_temporary(ureg, tmp);
1386       }
1387          break;
1388 
1389       case D3D10_SB_OPCODE_LD_MS:
1390          /* XXX: We don't support multi-sampling yet, but we need to parse
1391           * this opcode regardless, so we just ignore sample index operand
1392           * for now */
1393       case D3D10_SB_OPCODE_LD:
1394          if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1395             unsigned resource = opcode.src[1].base.index[0].imm;
1396             assert(opcode.src[1].base.index_dim == 1);
1397             assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1398 
1399             if (ureg_src_is_undef(sx.samplers[resource])) {
1400                sx.samplers[resource] =
1401                   ureg_DECL_sampler(ureg, resource);
1402             }
1403 
1404             ureg_TXF(ureg,
1405                      translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1406                      sx.resources[resource].target,
1407                      translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1408                      sx.samplers[resource]);
1409          }
1410          else {
1411             struct ureg_src srcreg[2];
1412             srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_INT);
1413             srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_INT);
1414 
1415             sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_I, 2, &opcode,
1416                              translate_dst_operand(&sx, &opcode.dst[0],
1417                                                    opcode.saturate),
1418                              srcreg);
1419          }
1420          break;
1421 
1422       case D3D10_SB_OPCODE_CUSTOMDATA:
1423          if (opcode.customdata._class ==
1424              D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) {
1425             sx.imms =
1426                ureg_DECL_immediate_block_uint(ureg,
1427                                               opcode.customdata.u.constbuf.data,
1428                                               opcode.customdata.u.constbuf.count);
1429          } else {
1430             assert(0);
1431          }
1432          break;
1433 
1434       case D3D10_SB_OPCODE_RESINFO:
1435          if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1436             unsigned resource = opcode.src[1].base.index[0].imm;
1437             assert(opcode.src[1].base.index_dim == 1);
1438             assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1439 
1440             if (ureg_src_is_undef(sx.samplers[resource])) {
1441                sx.samplers[resource] =
1442                   ureg_DECL_sampler(ureg, resource);
1443             }
1444             /* don't bother with swizzle, ret type etc. */
1445             ureg_TXQ(ureg,
1446                      translate_dst_operand(&sx, &opcode.dst[0],
1447                                            opcode.saturate),
1448                      sx.resources[resource].target,
1449                      translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1450                      sx.samplers[resource]);
1451          }
1452          else {
1453             struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1454             struct ureg_src tsrc = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1455             struct ureg_dst dstreg = translate_dst_operand(&sx, &opcode.dst[0],
1456                                                            opcode.saturate);
1457 
1458             /* while specs say swizzle is ignored better safe than sorry */
1459             tsrc.SwizzleX = TGSI_SWIZZLE_X;
1460             tsrc.SwizzleY = TGSI_SWIZZLE_Y;
1461             tsrc.SwizzleZ = TGSI_SWIZZLE_Z;
1462             tsrc.SwizzleW = TGSI_SWIZZLE_W;
1463 
1464             ureg_SVIEWINFO(ureg, r0,
1465                            translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1466                            tsrc);
1467 
1468             tsrc = ureg_src(r0);
1469             tsrc.SwizzleX = opcode.src[1].swizzle[0];
1470             tsrc.SwizzleY = opcode.src[1].swizzle[1];
1471             tsrc.SwizzleZ = opcode.src[1].swizzle[2];
1472             tsrc.SwizzleW = opcode.src[1].swizzle[3];
1473 
1474             if (opcode.specific.resinfo_ret_type ==
1475                 D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT) {
1476                ureg_MOV(ureg, dstreg, tsrc);
1477             }
1478             else if (opcode.specific.resinfo_ret_type ==
1479                      D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT) {
1480                 ureg_I2F(ureg, dstreg, tsrc);
1481             }
1482             else { /* D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT */
1483                unsigned i;
1484                /*
1485                 * Must apply rcp only to parts determined by dims,
1486                 * (width/height/depth) but NOT to array size nor mip levels
1487                 * hence need to figure that out here.
1488                 * This is one sick modifier if you ask me!
1489                 */
1490                unsigned res_index = opcode.src[1].base.index[0].imm;
1491                unsigned target = sx.resources[res_index].target;
1492                unsigned dims = texture_dim_from_tgsi_target(target);
1493 
1494                ureg_I2F(ureg, r0, ureg_src(r0));
1495                tsrc = ureg_src(r0);
1496                for (i = 0; i < 4; i++) {
1497                   unsigned dst_swizzle = opcode.src[1].swizzle[i];
1498                   struct ureg_dst dstregmasked = ureg_writemask(dstreg, 1 << i);
1499                   /*
1500                    * could do one mov with multiple write mask bits set
1501                    * but rcp is scalar anyway.
1502                    */
1503                   if (dst_swizzle < dims) {
1504                      ureg_RCP(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
1505                   }
1506                   else {
1507                      ureg_MOV(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
1508                   }
1509                }
1510             }
1511             ureg_release_temporary(ureg, r0);
1512          }
1513          break;
1514 
1515       case D3D10_SB_OPCODE_SAMPLE:
1516          if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1517             assert(opcode.src[1].base.index_dim == 1);
1518             assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1519 
1520             LOG_UNSUPPORTED(opcode.src[1].base.index[0].imm != opcode.src[2].base.index[0].imm);
1521 
1522             ureg_TEX(ureg,
1523                      translate_dst_operand(&sx, &opcode.dst[0],
1524                                            opcode.saturate),
1525                      sx.resources[opcode.src[1].base.index[0].imm].target,
1526                      translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1527                      translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1528          }
1529          else {
1530             struct ureg_src srcreg[3];
1531             srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1532             srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1533             srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1534 
1535             sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE, 3, &opcode,
1536                              translate_dst_operand(&sx, &opcode.dst[0],
1537                                                    opcode.saturate),
1538                              srcreg);
1539          }
1540          break;
1541 
1542       case D3D10_SB_OPCODE_SAMPLE_C:
1543          if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1544             struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1545 
1546             /* XXX: Support only 2D texture targets for now.
1547              *      Need to figure out how to pack the compare value
1548              *      for other dimensions and if there is enough space
1549              *      in a single operand for all possible cases.
1550              */
1551             LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
1552                             TGSI_TEXTURE_2D);
1553 
1554             assert(opcode.src[1].base.index_dim == 1);
1555             assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1556 
1557             /* Insert the compare value into .z component.
1558              */
1559             ureg_MOV(ureg,
1560                      ureg_writemask(r0, TGSI_WRITEMASK_XYW),
1561                      translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1562             ureg_MOV(ureg,
1563                      ureg_writemask(r0, TGSI_WRITEMASK_Z),
1564                      translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1565 
1566             /* XXX: Pass explicit Lod=0 in D3D10_SB_OPCODE_SAMPLE_C_LZ case.
1567              */
1568 
1569             ureg_TEX(ureg,
1570                      translate_dst_operand(&sx, &opcode.dst[0],
1571                                            opcode.saturate),
1572                      sx.resources[opcode.src[1].base.index[0].imm].target,
1573                      ureg_src(r0),
1574                      translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1575 
1576             ureg_release_temporary(ureg, r0);
1577          }
1578          else {
1579             struct ureg_src srcreg[4];
1580             srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1581             srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1582             srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1583             srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1584 
1585             sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C, 4, &opcode,
1586                              translate_dst_operand(&sx, &opcode.dst[0],
1587                                                    opcode.saturate),
1588                              srcreg);
1589          }
1590          break;
1591 
1592       case D3D10_SB_OPCODE_SAMPLE_C_LZ:
1593          if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1594             struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1595 
1596             assert(opcode.src[1].base.index_dim == 1);
1597             assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1598 
1599             /* XXX: Support only 2D texture targets for now.
1600              *      Need to figure out how to pack the compare value
1601              *      for other dimensions and if there is enough space
1602              *      in a single operand for all possible cases.
1603              */
1604             LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
1605                             TGSI_TEXTURE_2D);
1606 
1607             /* Insert the compare value into .z component.
1608              * Insert 0 into .w component.
1609              */
1610             ureg_MOV(ureg,
1611                      ureg_writemask(r0, TGSI_WRITEMASK_XY),
1612                      translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1613             ureg_MOV(ureg,
1614                      ureg_writemask(r0, TGSI_WRITEMASK_Z),
1615                      translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1616             ureg_MOV(ureg,
1617                      ureg_writemask(r0, TGSI_WRITEMASK_W),
1618                      ureg_imm1f(ureg, 0.0f));
1619 
1620             ureg_TXL(ureg,
1621                      translate_dst_operand(&sx, &opcode.dst[0],
1622                                            opcode.saturate),
1623                      sx.resources[opcode.src[1].base.index[0].imm].target,
1624                      ureg_src(r0),
1625                      translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1626 
1627             ureg_release_temporary(ureg, r0);
1628          }
1629          else {
1630             struct ureg_src srcreg[4];
1631             srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1632             srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1633             srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1634             srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1635 
1636             sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C_LZ, 4, &opcode,
1637                              translate_dst_operand(&sx, &opcode.dst[0],
1638                                                    opcode.saturate),
1639                              srcreg);
1640          }
1641          break;
1642 
1643       case D3D10_SB_OPCODE_SAMPLE_L:
1644          if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1645             struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1646 
1647             assert(opcode.src[1].base.index_dim == 1);
1648             assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1649 
1650             /* Insert LOD into .w component.
1651              */
1652             ureg_MOV(ureg,
1653                      ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
1654                      translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1655             ureg_MOV(ureg,
1656                      ureg_writemask(r0, TGSI_WRITEMASK_W),
1657                      translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1658 
1659             ureg_TXL(ureg,
1660                      translate_dst_operand(&sx, &opcode.dst[0],
1661                                            opcode.saturate),
1662                      sx.resources[opcode.src[1].base.index[0].imm].target,
1663                      ureg_src(r0),
1664                      translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1665 
1666             ureg_release_temporary(ureg, r0);
1667          }
1668          else {
1669             struct ureg_src srcreg[4];
1670             srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1671             srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1672             srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1673             srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1674 
1675             sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_L, 4, &opcode,
1676                              translate_dst_operand(&sx, &opcode.dst[0],
1677                                                    opcode.saturate),
1678                              srcreg);
1679          }
1680          break;
1681 
1682       case D3D10_SB_OPCODE_SAMPLE_D:
1683          if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1684             assert(opcode.src[1].base.index_dim == 1);
1685             assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1686 
1687             ureg_TXD(ureg,
1688                      translate_dst_operand(&sx, &opcode.dst[0],
1689                                            opcode.saturate),
1690                      sx.resources[opcode.src[1].base.index[0].imm].target,
1691                      translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1692                      translate_src_operand(&sx, &opcode.src[3], OF_FLOAT),
1693                      translate_src_operand(&sx, &opcode.src[4], OF_FLOAT),
1694                      translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1695          }
1696          else {
1697             struct ureg_src srcreg[5];
1698             srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1699             srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1700             srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1701             srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1702             srcreg[4] = translate_src_operand(&sx, &opcode.src[4], OF_FLOAT);
1703 
1704             sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_D, 5, &opcode,
1705                              translate_dst_operand(&sx, &opcode.dst[0],
1706                                                    opcode.saturate),
1707                              srcreg);
1708          }
1709          break;
1710 
1711       case D3D10_SB_OPCODE_SAMPLE_B:
1712          if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1713             struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1714 
1715             assert(opcode.src[1].base.index_dim == 1);
1716             assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1717 
1718             /* Insert LOD bias into .w component.
1719              */
1720             ureg_MOV(ureg,
1721                      ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
1722                      translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1723             ureg_MOV(ureg,
1724                      ureg_writemask(r0, TGSI_WRITEMASK_W),
1725                      translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1726 
1727             ureg_TXB(ureg,
1728                      translate_dst_operand(&sx, &opcode.dst[0],
1729                                            opcode.saturate),
1730                      sx.resources[opcode.src[1].base.index[0].imm].target,
1731                      ureg_src(r0),
1732                      translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1733 
1734             ureg_release_temporary(ureg, r0);
1735          }
1736          else {
1737             struct ureg_src srcreg[4];
1738             srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1739             srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1740             srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1741             srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1742 
1743             sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_B, 4, &opcode,
1744                              translate_dst_operand(&sx, &opcode.dst[0],
1745                                                    opcode.saturate),
1746                              srcreg);
1747          }
1748          break;
1749 
1750       case D3D10_SB_OPCODE_SINCOS: {
1751          struct ureg_dst src0 = ureg_DECL_temporary(ureg);
1752          ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1753          if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1754             struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[0],
1755                                                         opcode.saturate);
1756             struct ureg_src src = ureg_src(src0);
1757             ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1758                      ureg_scalar(src, TGSI_SWIZZLE_X));
1759             ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1760                      ureg_scalar(src, TGSI_SWIZZLE_Y));
1761             ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
1762                      ureg_scalar(src, TGSI_SWIZZLE_Z));
1763             ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1764                      ureg_scalar(src, TGSI_SWIZZLE_W));
1765          }
1766          if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1767             struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[1],
1768                                                         opcode.saturate);
1769             struct ureg_src src = ureg_src(src0);
1770             ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1771                      ureg_scalar(src, TGSI_SWIZZLE_X));
1772             ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1773                      ureg_scalar(src, TGSI_SWIZZLE_Y));
1774             ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
1775                      ureg_scalar(src, TGSI_SWIZZLE_Z));
1776             ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1777                      ureg_scalar(src, TGSI_SWIZZLE_W));
1778          }
1779          ureg_release_temporary(ureg, src0);
1780       }
1781          break;
1782 
1783       case D3D10_SB_OPCODE_UDIV: {
1784          struct ureg_dst src0 = ureg_DECL_temporary(ureg);
1785          struct ureg_dst src1 = ureg_DECL_temporary(ureg);
1786          ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_UINT));
1787          ureg_MOV(ureg, src1, translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1788          if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1789             ureg_UDIV(ureg,
1790                       translate_dst_operand(&sx, &opcode.dst[0],
1791                                             opcode.saturate),
1792                       ureg_src(src0), ureg_src(src1));
1793          }
1794          if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1795             ureg_UMOD(ureg,
1796                       translate_dst_operand(&sx, &opcode.dst[1],
1797                                             opcode.saturate),
1798                       ureg_src(src0), ureg_src(src1));
1799          }
1800          ureg_release_temporary(ureg, src0);
1801          ureg_release_temporary(ureg, src1);
1802       }
1803          break;
1804       case D3D10_SB_OPCODE_UMUL: {
1805          if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1806             ureg_UMUL_HI(ureg,
1807                          translate_dst_operand(&sx, &opcode.dst[0],
1808                                                opcode.saturate),
1809                          translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1810                          translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1811          }
1812          if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1813             ureg_UMUL(ureg,
1814                       translate_dst_operand(&sx, &opcode.dst[1],
1815                                             opcode.saturate),
1816                       translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1817                       translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1818          }
1819       }
1820          break;
1821 
1822       case D3D10_SB_OPCODE_DCL_RESOURCE:
1823       {
1824          unsigned target;
1825          unsigned res_index = opcode.dst[0].base.index[0].imm;
1826          assert(opcode.dst[0].base.index_dim == 1);
1827          assert(res_index < SHADER_MAX_RESOURCES);
1828 
1829          target = translate_resource_dimension(opcode.specific.dcl_resource_dimension);
1830          sx.resources[res_index].target = target;
1831          if (!(st_debug & ST_DEBUG_OLD_TEX_OPS)) {
1832             sx.sv[res_index] =
1833                ureg_DECL_sampler_view(ureg, res_index, target,
1834                                       trans_dcl_ret_type(opcode.dcl_resource_ret_type[0]),
1835                                       trans_dcl_ret_type(opcode.dcl_resource_ret_type[1]),
1836                                       trans_dcl_ret_type(opcode.dcl_resource_ret_type[2]),
1837                                       trans_dcl_ret_type(opcode.dcl_resource_ret_type[3]));
1838          }
1839          break;
1840       }
1841 
1842       case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: {
1843          unsigned num_constants = opcode.src[0].base.index[1].imm;
1844 
1845          assert(opcode.src[0].base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
1846 
1847          if (num_constants == 0) {
1848             num_constants = SHADER_MAX_CONSTS;
1849          } else {
1850             assert(num_constants <= SHADER_MAX_CONSTS);
1851          }
1852 
1853          ureg_DECL_constant2D(ureg,
1854                               0,
1855                               num_constants - 1,
1856                               opcode.src[0].base.index[0].imm);
1857          break;
1858       }
1859 
1860       case D3D10_SB_OPCODE_DCL_SAMPLER:
1861          assert(opcode.dst[0].base.index_dim == 1);
1862          assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_SAMPLERS);
1863 
1864          sx.samplers[opcode.dst[0].base.index[0].imm] =
1865             ureg_DECL_sampler(ureg,
1866                               opcode.dst[0].base.index[0].imm);
1867          break;
1868 
1869       case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
1870          assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1871 
1872          switch (opcode.specific.dcl_gs_output_primitive_topology) {
1873          case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST:
1874             ureg_property(sx.ureg,
1875                           TGSI_PROPERTY_GS_OUTPUT_PRIM,
1876                           PIPE_PRIM_POINTS);
1877             break;
1878 
1879          case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP:
1880             ureg_property(sx.ureg,
1881                           TGSI_PROPERTY_GS_OUTPUT_PRIM,
1882                           PIPE_PRIM_LINE_STRIP);
1883             break;
1884 
1885          case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP:
1886             ureg_property(sx.ureg,
1887                           TGSI_PROPERTY_GS_OUTPUT_PRIM,
1888                           PIPE_PRIM_TRIANGLE_STRIP);
1889             break;
1890 
1891          default:
1892             assert(0);
1893          }
1894          break;
1895 
1896       case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:
1897          assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1898 
1899          /* Figure out the second dimension of GS inputs.
1900           */
1901          switch (opcode.specific.dcl_gs_input_primitive) {
1902          case D3D10_SB_PRIMITIVE_POINT:
1903             declare_vertices_in(&sx, 1);
1904             ureg_property(sx.ureg,
1905                           TGSI_PROPERTY_GS_INPUT_PRIM,
1906                           PIPE_PRIM_POINTS);
1907             break;
1908 
1909          case D3D10_SB_PRIMITIVE_LINE:
1910             declare_vertices_in(&sx, 2);
1911             ureg_property(sx.ureg,
1912                           TGSI_PROPERTY_GS_INPUT_PRIM,
1913                           PIPE_PRIM_LINES);
1914             break;
1915 
1916          case D3D10_SB_PRIMITIVE_TRIANGLE:
1917             declare_vertices_in(&sx, 3);
1918             ureg_property(sx.ureg,
1919                           TGSI_PROPERTY_GS_INPUT_PRIM,
1920                           PIPE_PRIM_TRIANGLES);
1921             break;
1922 
1923          case D3D10_SB_PRIMITIVE_LINE_ADJ:
1924             declare_vertices_in(&sx, 4);
1925             ureg_property(sx.ureg,
1926                           TGSI_PROPERTY_GS_INPUT_PRIM,
1927                           PIPE_PRIM_LINES_ADJACENCY);
1928             break;
1929 
1930          case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ:
1931             declare_vertices_in(&sx, 6);
1932             ureg_property(sx.ureg,
1933                           TGSI_PROPERTY_GS_INPUT_PRIM,
1934                           PIPE_PRIM_TRIANGLES_ADJACENCY);
1935             break;
1936 
1937          default:
1938             assert(0);
1939          }
1940          break;
1941 
1942       case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1943          assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1944 
1945          ureg_property(sx.ureg,
1946                        TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1947                        opcode.specific.dcl_max_output_vertex_count);
1948          break;
1949 
1950       case D3D10_SB_OPCODE_DCL_INPUT:
1951          if (parser.header.type == D3D10_SB_VERTEX_SHADER) {
1952             dcl_vs_input(&sx, ureg, &opcode.dst[0]);
1953          } else {
1954             assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1955             dcl_gs_input(&sx, ureg, &opcode.dst[0]);
1956          }
1957          break;
1958 
1959       case D3D10_SB_OPCODE_DCL_INPUT_SGV:
1960          assert(parser.header.type == D3D10_SB_VERTEX_SHADER);
1961          dcl_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
1962          break;
1963 
1964       case D3D10_SB_OPCODE_DCL_INPUT_SIV:
1965          assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1966          dcl_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
1967          break;
1968 
1969       case D3D10_SB_OPCODE_DCL_INPUT_PS:
1970          assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1971          dcl_ps_input(&sx, ureg, &opcode.dst[0],
1972                       opcode.specific.dcl_in_ps_interp);
1973          break;
1974 
1975       case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV:
1976          assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1977          dcl_ps_sgv_input(&sx, ureg, &opcode.dst[0],
1978                           opcode.dcl_siv_name);
1979          break;
1980 
1981       case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV:
1982          assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1983          dcl_ps_siv_input(&sx, ureg, &opcode.dst[0],
1984                           opcode.dcl_siv_name,
1985                           opcode.specific.dcl_in_ps_interp);
1986          break;
1987 
1988       case D3D10_SB_OPCODE_DCL_OUTPUT:
1989          if (parser.header.type == D3D10_SB_PIXEL_SHADER) {
1990             /* Pixel shader outputs. */
1991             if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
1992                /* Depth output. */
1993                assert(opcode.dst[0].base.index_dim == 0);
1994 
1995                sx.output_depth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z, 0, 1);
1996                sx.output_depth = ureg_writemask(sx.output_depth, TGSI_WRITEMASK_Z);
1997             } else {
1998                /* Color outputs. */
1999                assert(opcode.dst[0].base.index_dim == 1);
2000                assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2001 
2002                dcl_base_output(&sx, ureg,
2003                                ureg_DECL_output(ureg,
2004                                                 TGSI_SEMANTIC_COLOR,
2005                                                 opcode.dst[0].base.index[0].imm),
2006                                &opcode.dst[0]);
2007             }
2008          } else {
2009             assert(opcode.dst[0].base.index_dim == 1);
2010             assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2011 
2012             if (output_mapping) {
2013                unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2014                output_mapping[nr_outputs]
2015                   = opcode.dst[0].base.index[0].imm;
2016             }
2017             dcl_base_output(&sx, ureg,
2018                             ureg_DECL_output(ureg,
2019                                              TGSI_SEMANTIC_GENERIC,
2020                                              opcode.dst[0].base.index[0].imm),
2021                             &opcode.dst[0]);
2022          }
2023          break;
2024 
2025       case D3D10_SB_OPCODE_DCL_OUTPUT_SIV:
2026          assert(opcode.dst[0].base.index_dim == 1);
2027          assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2028 
2029          if (output_mapping) {
2030             unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2031             output_mapping[nr_outputs]
2032                = opcode.dst[0].base.index[0].imm;
2033          }
2034          if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE ||
2035              opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
2036             /*
2037              * FIXME: this is quite broken. gallium no longer has separate
2038              * clip/cull dists, using (max 2) combined clipdist/culldist regs
2039              * instead. Unlike d3d10 though, which is clip and which cull is
2040              * simply determined by by number of clip/cull dists (that is,
2041              * all clip dists must come first).
2042              */
2043             unsigned numcliporcull = sx.num_clip_distances_declared +
2044                                      sx.num_cull_distances_declared;
2045             sx.clip_distance_mapping[numcliporcull].d3d =
2046                opcode.dst[0].base.index[0].imm;
2047             sx.clip_distance_mapping[numcliporcull].tgsi = numcliporcull;
2048             if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE) {
2049                ++sx.num_clip_distances_declared;
2050                /* re-emit should be safe... */
2051                ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
2052                              sx.num_clip_distances_declared);
2053             } else {
2054                ++sx.num_cull_distances_declared;
2055                ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2056                              sx.num_cull_distances_declared);
2057             }
2058          } else if (0 && opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
2059             sx.cull_distance_mapping[sx.num_cull_distances_declared].d3d =
2060                opcode.dst[0].base.index[0].imm;
2061             sx.cull_distance_mapping[sx.num_cull_distances_declared].tgsi =
2062                sx.num_cull_distances_declared;
2063             ++sx.num_cull_distances_declared;
2064             ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2065                           sx.num_cull_distances_declared);
2066          }
2067 
2068          dcl_base_output(&sx, ureg,
2069                          ureg_DECL_output_masked(
2070                             ureg,
2071                             translate_system_name(opcode.dcl_siv_name),
2072                             translate_semantic_index(&sx, opcode.dcl_siv_name,
2073                                                      &opcode.dst[0]),
2074                             opcode.dst[0].mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT,
2075                             0, 1),
2076                          &opcode.dst[0]);
2077          break;
2078 
2079       case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:
2080          assert(opcode.dst[0].base.index_dim == 1);
2081          assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2082 
2083          if (output_mapping) {
2084             unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2085             output_mapping[nr_outputs]
2086                = opcode.dst[0].base.index[0].imm;
2087          }
2088          dcl_base_output(&sx, ureg,
2089                          ureg_DECL_output(ureg,
2090                                           translate_system_name(opcode.dcl_siv_name),
2091                                           0),
2092                          &opcode.dst[0]);
2093          break;
2094 
2095       case D3D10_SB_OPCODE_DCL_TEMPS:
2096          {
2097             uint i;
2098 
2099             assert(opcode.specific.dcl_num_temps + sx.declared_temps <=
2100                    SHADER_MAX_TEMPS);
2101 
2102             sx.temp_offset = sx.declared_temps;
2103 
2104             for (i = 0; i < opcode.specific.dcl_num_temps; i++) {
2105                sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
2106             }
2107             sx.declared_temps += opcode.specific.dcl_num_temps;
2108          }
2109          break;
2110 
2111       case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP:
2112          {
2113             uint i;
2114 
2115             /* XXX: Add true indexable temps to gallium.
2116              */
2117 
2118             assert(opcode.specific.dcl_indexable_temp.index <
2119                    SHADER_MAX_INDEXABLE_TEMPS);
2120             assert(opcode.specific.dcl_indexable_temp.count + sx.declared_temps <=
2121                    SHADER_MAX_TEMPS);
2122 
2123             sx.indexable_temp_offsets[opcode.specific.dcl_indexable_temp.index] =
2124                sx.declared_temps;
2125 
2126             for (i = 0; i < opcode.specific.dcl_indexable_temp.count; i++) {
2127                sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
2128             }
2129             sx.declared_temps += opcode.specific.dcl_indexable_temp.count;
2130          }
2131          break;
2132       case D3D10_SB_OPCODE_IF: {
2133          unsigned label = 0;
2134          if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
2135             struct ureg_src src =
2136                translate_src_operand(&sx, &opcode.src[0], OF_INT);
2137             struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
2138             ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
2139             ureg_UIF(ureg, ureg_src(src_nz), &label);
2140             ureg_release_temporary(ureg, src_nz);;
2141          } else {
2142             ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
2143          }
2144       }
2145          break;
2146       case D3D10_SB_OPCODE_RETC:
2147       case D3D10_SB_OPCODE_CONTINUEC:
2148       case D3D10_SB_OPCODE_CALLC:
2149       case D3D10_SB_OPCODE_DISCARD:
2150       case D3D10_SB_OPCODE_BREAKC:
2151       {
2152          unsigned label = 0;
2153          assert(operand_is_scalar(&opcode.src[0]));
2154          if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
2155             struct ureg_src src =
2156                translate_src_operand(&sx, &opcode.src[0], OF_INT);
2157             struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
2158             ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
2159             ureg_UIF(ureg, ureg_src(src_nz), &label);
2160             ureg_release_temporary(ureg, src_nz);
2161          }
2162          else {
2163             ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
2164          }
2165          switch (opcode.type) {
2166          case D3D10_SB_OPCODE_RETC:
2167             ureg_RET(ureg);
2168             break;
2169          case D3D10_SB_OPCODE_CONTINUEC:
2170             ureg_CONT(ureg);
2171             break;
2172          case D3D10_SB_OPCODE_CALLC: {
2173             unsigned label = opcode.src[1].base.index[0].imm;
2174             unsigned tgsi_token_label = 0;
2175             ureg_CAL(ureg, &tgsi_token_label);
2176             Shader_add_call(&sx, label, tgsi_token_label);
2177          }
2178             break;
2179          case D3D10_SB_OPCODE_DISCARD:
2180             ureg_KILL(ureg);
2181             break;
2182          case D3D10_SB_OPCODE_BREAKC:
2183             ureg_BRK(ureg);
2184             break;
2185          default:
2186             assert(0);
2187             break;
2188          }
2189          ureg_ENDIF(ureg);
2190       }
2191          break;
2192       case D3D10_SB_OPCODE_LABEL: {
2193          unsigned label = opcode.src[0].base.index[0].imm;
2194          unsigned tgsi_inst_no = 0;
2195          if (inside_sub) {
2196             ureg_ENDSUB(ureg);
2197          }
2198          tgsi_inst_no = ureg_get_instruction_number(ureg);
2199          ureg_BGNSUB(ureg);
2200          inside_sub = TRUE;
2201          Shader_add_label(&sx, label, tgsi_inst_no);
2202       }
2203          break;
2204       case D3D10_SB_OPCODE_CALL: {
2205          unsigned label = opcode.src[0].base.index[0].imm;
2206          unsigned tgsi_token_label = 0;
2207          ureg_CAL(ureg, &tgsi_token_label);
2208          Shader_add_call(&sx, label, tgsi_token_label);
2209       }
2210          break;
2211       case D3D10_SB_OPCODE_EMIT:
2212          ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
2213          break;
2214       case D3D10_SB_OPCODE_CUT:
2215          ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
2216          break;
2217       case D3D10_SB_OPCODE_EMITTHENCUT:
2218          ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
2219          ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
2220          break;
2221       case D3D10_SB_OPCODE_DCL_INDEX_RANGE:
2222       case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
2223          /* Ignore */
2224          break;
2225       default:
2226          {
2227             uint i;
2228             struct ureg_dst dst[SHADER_MAX_DST_OPERANDS];
2229             struct ureg_src src[SHADER_MAX_SRC_OPERANDS];
2230 
2231             assert(ox->tgsi_opcode != TGSI_EXPAND);
2232 
2233             if (ox->tgsi_opcode == TGSI_LOG_UNSUPPORTED) {
2234                if (!shader_dumped) {
2235                   dx10_shader_dump_tokens(code);
2236                   shader_dumped = TRUE;
2237                }
2238                debug_printf("%s: unsupported opcode %i\n",
2239                             __FUNCTION__, ox->type);
2240                assert(ox->tgsi_opcode != TGSI_LOG_UNSUPPORTED);
2241             }
2242 
2243             /* Destination operands. */
2244             for (i = 0; i < opcode.num_dst; i++) {
2245                dst[i] = translate_dst_operand(&sx, &opcode.dst[i],
2246                                               opcode.saturate);
2247             }
2248 
2249             /* Source operands. */
2250             for (i = 0; i < opcode.num_src; i++) {
2251                src[i] = translate_src_operand(&sx, &opcode.src[i], ox->format);
2252             }
2253 
2254             /* Try to re-route output depth to Z channel. */
2255             if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
2256                LOG_UNSUPPORTED(opcode.type != D3D10_SB_OPCODE_MOV);
2257                dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_Z);
2258                src[0] = ureg_scalar(src[0], TGSI_SWIZZLE_X);
2259             }
2260 
2261             ureg_insn(ureg,
2262                       ox->tgsi_opcode,
2263                       dst,
2264                       opcode.num_dst,
2265                       src,
2266                       opcode.num_src, 0);
2267          }
2268       }
2269 
2270       Shader_opcode_free(&opcode);
2271    }
2272 
2273    if (inside_sub) {
2274       ureg_ENDSUB(ureg);
2275    }
2276 
2277    ureg_END(ureg);
2278 
2279    for (i = 0; i < sx.num_calls; ++i) {
2280       for (j = 0; j < sx.num_labels; ++j) {
2281          if (sx.calls[i].d3d_label == sx.labels[j].d3d_label) {
2282             ureg_fixup_label(sx.ureg,
2283                              sx.calls[i].tgsi_label_token,
2284                              sx.labels[j].tgsi_insn_no);
2285             break;
2286          }
2287       }
2288       ASSERT(j < sx.num_labels);
2289    }
2290    FREE(sx.labels);
2291    FREE(sx.calls);
2292 
2293    tokens = ureg_get_tokens(ureg, &nr_tokens);
2294    assert(tokens);
2295    ureg_destroy(ureg);
2296 
2297    if (st_debug & ST_DEBUG_TGSI) {
2298       tgsi_dump(tokens, 0);
2299    }
2300 
2301    return tokens;
2302 }
2303