1 /*
2  * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3  * Copyright 2013 Christoph Bumiller
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 
24 #include "nine_shader.h"
25 
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30 
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "nir/tgsi_to_nir.h"
38 
39 #define DBG_CHANNEL DBG_SHADER
40 
41 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
42 
43 
44 struct shader_translator;
45 
46 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
47 
48 static inline const char *d3dsio_to_string(unsigned opcode);
49 
50 
51 #define NINED3D_SM1_VS 0xfffe
52 #define NINED3D_SM1_PS 0xffff
53 
54 #define NINE_MAX_COND_DEPTH 64
55 #define NINE_MAX_LOOP_DEPTH 64
56 
57 #define NINED3DSP_END 0x0000ffff
58 
59 #define NINED3DSPTYPE_FLOAT4  0
60 #define NINED3DSPTYPE_INT4    1
61 #define NINED3DSPTYPE_BOOL    2
62 
63 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
64 
65 #define NINED3DSP_WRITEMASK_MASK  D3DSP_WRITEMASK_ALL
66 #define NINED3DSP_WRITEMASK_SHIFT 16
67 
68 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
69 
70 #define NINED3DSHADER_REL_OP_GT 1
71 #define NINED3DSHADER_REL_OP_EQ 2
72 #define NINED3DSHADER_REL_OP_GE 3
73 #define NINED3DSHADER_REL_OP_LT 4
74 #define NINED3DSHADER_REL_OP_NE 5
75 #define NINED3DSHADER_REL_OP_LE 6
76 
77 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
78 #define NINED3DSIO_OPCODE_FLAGS_MASK  (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
79 
80 #define NINED3DSI_TEXLD_PROJECT 0x1
81 #define NINED3DSI_TEXLD_BIAS    0x2
82 
83 #define NINED3DSP_WRITEMASK_0   0x1
84 #define NINED3DSP_WRITEMASK_1   0x2
85 #define NINED3DSP_WRITEMASK_2   0x4
86 #define NINED3DSP_WRITEMASK_3   0x8
87 #define NINED3DSP_WRITEMASK_ALL 0xf
88 
89 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
90 
91 #define NINE_SWIZZLE4(x,y,z,w) \
92    TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
93 
94 #define NINE_APPLY_SWIZZLE(src, s) \
95    ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
96 
97 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
98 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
99 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
100 
101 /*
102  * NEG     all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
103  * BIAS    <= PS 1.4 (x-0.5)
104  * BIASNEG <= PS 1.4 (-(x-0.5))
105  * SIGN    <= PS 1.4 (2(x-0.5))
106  * SIGNNEG <= PS 1.4 (-2(x-0.5))
107  * COMP    <= PS 1.4 (1-x)
108  * X2       = PS 1.4 (2x)
109  * X2NEG    = PS 1.4 (-2x)
110  * DZ      <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
111  * DW      <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
112  * ABS     >= SM 3.0 (abs(x))
113  * ABSNEG  >= SM 3.0 (-abs(x))
114  * NOT     >= SM 2.0 pedication only
115  */
116 #define NINED3DSPSM_NONE    (D3DSPSM_NONE    >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_NEG     (D3DSPSM_NEG     >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_BIAS    (D3DSPSM_BIAS    >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_SIGN    (D3DSPSM_SIGN    >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_COMP    (D3DSPSM_COMP    >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_X2      (D3DSPSM_X2      >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_X2NEG   (D3DSPSM_X2NEG   >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_DZ      (D3DSPSM_DZ      >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_DW      (D3DSPSM_DW      >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_ABS     (D3DSPSM_ABS     >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_ABSNEG  (D3DSPSM_ABSNEG  >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_NOT     (D3DSPSM_NOT     >> D3DSP_SRCMOD_SHIFT)
130 
131 static const char *sm1_mod_str[] =
132 {
133     [NINED3DSPSM_NONE] = "",
134     [NINED3DSPSM_NEG] = "-",
135     [NINED3DSPSM_BIAS] = "bias",
136     [NINED3DSPSM_BIASNEG] = "biasneg",
137     [NINED3DSPSM_SIGN] = "sign",
138     [NINED3DSPSM_SIGNNEG] = "signneg",
139     [NINED3DSPSM_COMP] = "comp",
140     [NINED3DSPSM_X2] = "x2",
141     [NINED3DSPSM_X2NEG] = "x2neg",
142     [NINED3DSPSM_DZ] = "dz",
143     [NINED3DSPSM_DW] = "dw",
144     [NINED3DSPSM_ABS] = "abs",
145     [NINED3DSPSM_ABSNEG] = "-abs",
146     [NINED3DSPSM_NOT] = "not"
147 };
148 
149 static void
sm1_dump_writemask(BYTE mask)150 sm1_dump_writemask(BYTE mask)
151 {
152     if (mask & 1) DUMP("x"); else DUMP("_");
153     if (mask & 2) DUMP("y"); else DUMP("_");
154     if (mask & 4) DUMP("z"); else DUMP("_");
155     if (mask & 8) DUMP("w"); else DUMP("_");
156 }
157 
158 static void
sm1_dump_swizzle(BYTE s)159 sm1_dump_swizzle(BYTE s)
160 {
161     char c[4] = { 'x', 'y', 'z', 'w' };
162     DUMP("%c%c%c%c",
163          c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
164 }
165 
166 static const char sm1_file_char[] =
167 {
168     [D3DSPR_TEMP] = 'r',
169     [D3DSPR_INPUT] = 'v',
170     [D3DSPR_CONST] = 'c',
171     [D3DSPR_ADDR] = 'A',
172     [D3DSPR_RASTOUT] = 'R',
173     [D3DSPR_ATTROUT] = 'D',
174     [D3DSPR_OUTPUT] = 'o',
175     [D3DSPR_CONSTINT] = 'I',
176     [D3DSPR_COLOROUT] = 'C',
177     [D3DSPR_DEPTHOUT] = 'D',
178     [D3DSPR_SAMPLER] = 's',
179     [D3DSPR_CONST2] = 'c',
180     [D3DSPR_CONST3] = 'c',
181     [D3DSPR_CONST4] = 'c',
182     [D3DSPR_CONSTBOOL] = 'B',
183     [D3DSPR_LOOP] = 'L',
184     [D3DSPR_TEMPFLOAT16] = 'h',
185     [D3DSPR_MISCTYPE] = 'M',
186     [D3DSPR_LABEL] = 'X',
187     [D3DSPR_PREDICATE] = 'p'
188 };
189 
190 static void
sm1_dump_reg(BYTE file,INT index)191 sm1_dump_reg(BYTE file, INT index)
192 {
193     switch (file) {
194     case D3DSPR_LOOP:
195         DUMP("aL");
196         break;
197     case D3DSPR_COLOROUT:
198         DUMP("oC%i", index);
199         break;
200     case D3DSPR_DEPTHOUT:
201         DUMP("oDepth");
202         break;
203     case D3DSPR_RASTOUT:
204         DUMP("oRast%i", index);
205         break;
206     case D3DSPR_CONSTINT:
207         DUMP("iconst[%i]", index);
208         break;
209     case D3DSPR_CONSTBOOL:
210         DUMP("bconst[%i]", index);
211         break;
212     default:
213         DUMP("%c%i", sm1_file_char[file], index);
214         break;
215     }
216 }
217 
218 struct sm1_src_param
219 {
220     INT idx;
221     struct sm1_src_param *rel;
222     BYTE file;
223     BYTE swizzle;
224     BYTE mod;
225     BYTE type;
226     union {
227         DWORD d[4];
228         float f[4];
229         int i[4];
230         BOOL b;
231     } imm;
232 };
233 static void
234 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
235 
236 struct sm1_dst_param
237 {
238     INT idx;
239     struct sm1_src_param *rel;
240     BYTE file;
241     BYTE mask;
242     BYTE mod;
243     int8_t shift; /* sint4 */
244     BYTE type;
245 };
246 
247 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)248 assert_replicate_swizzle(const struct ureg_src *reg)
249 {
250     assert(reg->SwizzleY == reg->SwizzleX &&
251            reg->SwizzleZ == reg->SwizzleX &&
252            reg->SwizzleW == reg->SwizzleX);
253 }
254 
255 static void
sm1_dump_immediate(const struct sm1_src_param * param)256 sm1_dump_immediate(const struct sm1_src_param *param)
257 {
258     switch (param->type) {
259     case NINED3DSPTYPE_FLOAT4:
260         DUMP("{ %f %f %f %f }",
261              param->imm.f[0], param->imm.f[1],
262              param->imm.f[2], param->imm.f[3]);
263         break;
264     case NINED3DSPTYPE_INT4:
265         DUMP("{ %i %i %i %i }",
266              param->imm.i[0], param->imm.i[1],
267              param->imm.i[2], param->imm.i[3]);
268         break;
269     case NINED3DSPTYPE_BOOL:
270         DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
271         break;
272     default:
273         assert(0);
274         break;
275     }
276 }
277 
278 static void
sm1_dump_src_param(const struct sm1_src_param * param)279 sm1_dump_src_param(const struct sm1_src_param *param)
280 {
281     if (param->file == NINED3DSPR_IMMEDIATE) {
282         assert(!param->mod &&
283                !param->rel &&
284                param->swizzle == NINED3DSP_NOSWIZZLE);
285         sm1_dump_immediate(param);
286         return;
287     }
288 
289     if (param->mod)
290         DUMP("%s(", sm1_mod_str[param->mod]);
291     if (param->rel) {
292         DUMP("%c[", sm1_file_char[param->file]);
293         sm1_dump_src_param(param->rel);
294         DUMP("+%i]", param->idx);
295     } else {
296         sm1_dump_reg(param->file, param->idx);
297     }
298     if (param->mod)
299        DUMP(")");
300     if (param->swizzle != NINED3DSP_NOSWIZZLE) {
301        DUMP(".");
302        sm1_dump_swizzle(param->swizzle);
303     }
304 }
305 
306 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)307 sm1_dump_dst_param(const struct sm1_dst_param *param)
308 {
309    if (param->mod & NINED3DSPDM_SATURATE)
310       DUMP("sat ");
311    if (param->mod & NINED3DSPDM_PARTIALP)
312       DUMP("pp ");
313    if (param->mod & NINED3DSPDM_CENTROID)
314       DUMP("centroid ");
315    if (param->shift < 0)
316       DUMP("/%u ", 1 << -param->shift);
317    if (param->shift > 0)
318       DUMP("*%u ", 1 << param->shift);
319 
320    if (param->rel) {
321       DUMP("%c[", sm1_file_char[param->file]);
322       sm1_dump_src_param(param->rel);
323       DUMP("+%i]", param->idx);
324    } else {
325       sm1_dump_reg(param->file, param->idx);
326    }
327    if (param->mask != NINED3DSP_WRITEMASK_ALL) {
328       DUMP(".");
329       sm1_dump_writemask(param->mask);
330    }
331 }
332 
333 struct sm1_semantic
334 {
335    struct sm1_dst_param reg;
336    BYTE sampler_type;
337    D3DDECLUSAGE usage;
338    BYTE usage_idx;
339 };
340 
341 struct sm1_op_info
342 {
343     /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
344      * should be ignored completely */
345     unsigned sio;
346     unsigned opcode; /* TGSI_OPCODE_x */
347 
348     /* versions are still set even handler is set */
349     struct {
350         unsigned min;
351         unsigned max;
352     } vert_version, frag_version;
353 
354     /* number of regs parsed outside of special handler */
355     unsigned ndst;
356     unsigned nsrc;
357 
358     /* some instructions don't map perfectly, so use a special handler */
359     translate_instruction_func handler;
360 };
361 
362 struct sm1_instruction
363 {
364     D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
365     BYTE flags;
366     BOOL coissue;
367     BOOL predicated;
368     BYTE ndst;
369     BYTE nsrc;
370     struct sm1_src_param src[4];
371     struct sm1_src_param src_rel[4];
372     struct sm1_src_param pred;
373     struct sm1_src_param dst_rel[1];
374     struct sm1_dst_param dst[1];
375 
376     const struct sm1_op_info *info;
377 };
378 
379 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)380 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
381 {
382     unsigned i;
383 
384     /* no info stored for these: */
385     if (insn->opcode == D3DSIO_DCL)
386         return;
387     for (i = 0; i < indent; ++i)
388         DUMP("  ");
389 
390     if (insn->predicated) {
391         DUMP("@");
392         sm1_dump_src_param(&insn->pred);
393         DUMP(" ");
394     }
395     DUMP("%s", d3dsio_to_string(insn->opcode));
396     if (insn->flags) {
397         switch (insn->opcode) {
398         case D3DSIO_TEX:
399             DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
400             break;
401         default:
402             DUMP("_%x", insn->flags);
403             break;
404         }
405     }
406     if (insn->coissue)
407         DUMP("_co");
408     DUMP(" ");
409 
410     for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
411         sm1_dump_dst_param(&insn->dst[i]);
412         DUMP(" ");
413     }
414 
415     for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
416         sm1_dump_src_param(&insn->src[i]);
417         DUMP(" ");
418     }
419     if (insn->opcode == D3DSIO_DEF ||
420         insn->opcode == D3DSIO_DEFI ||
421         insn->opcode == D3DSIO_DEFB)
422         sm1_dump_immediate(&insn->src[0]);
423 
424     DUMP("\n");
425 }
426 
427 struct sm1_local_const
428 {
429     INT idx;
430     struct ureg_src reg;
431     float f[4]; /* for indirect addressing of float constants */
432 };
433 
434 struct shader_translator
435 {
436     const DWORD *byte_code;
437     const DWORD *parse;
438     const DWORD *parse_next;
439 
440     struct ureg_program *ureg;
441 
442     /* shader version */
443     struct {
444         BYTE major;
445         BYTE minor;
446     } version;
447     unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
448     unsigned num_constf_allowed;
449     unsigned num_consti_allowed;
450     unsigned num_constb_allowed;
451 
452     boolean native_integers;
453     boolean inline_subroutines;
454     boolean want_texcoord;
455     boolean shift_wpos;
456     boolean wpos_is_sysval;
457     boolean face_is_sysval_integer;
458     boolean mul_zero_wins;
459     unsigned texcoord_sn;
460 
461     struct sm1_instruction insn; /* current instruction */
462 
463     struct {
464         struct ureg_dst *r;
465         struct ureg_dst oPos;
466         struct ureg_dst oPos_out; /* the real output when doing streamout */
467         struct ureg_dst oFog;
468         struct ureg_dst oPts;
469         struct ureg_dst oCol[4];
470         struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
471         struct ureg_dst oDepth;
472         struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
473         struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
474         struct ureg_src vPos;
475         struct ureg_src vFace;
476         struct ureg_src s;
477         struct ureg_dst p;
478         struct ureg_dst address;
479         struct ureg_dst a0;
480         struct ureg_dst predicate;
481         struct ureg_dst predicate_tmp;
482         struct ureg_dst predicate_dst;
483         struct ureg_dst tS[8]; /* texture stage registers */
484         struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
485         struct ureg_dst t[8]; /* scratch TEMPs */
486         struct ureg_src vC[2]; /* PS color in */
487         struct ureg_src vT[8]; /* PS texcoord in */
488         struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
489     } regs;
490     unsigned num_temp; /* ARRAY_SIZE(regs.r) */
491     unsigned num_scratch;
492     unsigned loop_depth;
493     unsigned loop_depth_max;
494     unsigned cond_depth;
495     unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
496     unsigned cond_labels[NINE_MAX_COND_DEPTH];
497     boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
498     boolean predicated_activated;
499 
500     unsigned *inst_labels; /* LABEL op */
501     unsigned num_inst_labels;
502 
503     unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
504 
505     struct sm1_local_const *lconstf;
506     unsigned num_lconstf;
507     struct sm1_local_const *lconsti;
508     unsigned num_lconsti;
509     struct sm1_local_const *lconstb;
510     unsigned num_lconstb;
511 
512     boolean slots_used[NINE_MAX_CONST_ALL];
513     unsigned *slot_map;
514     unsigned num_slots;
515 
516     boolean indirect_const_access;
517     boolean failure;
518 
519     struct nine_vs_output_info output_info[16];
520     int num_outputs;
521 
522     struct nine_shader_info *info;
523 
524     int16_t op_info_map[D3DSIO_BREAKP + 1];
525 };
526 
527 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
528 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
529 
530 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
531 
532 static void
533 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
534 
535 static void
sm1_instruction_check(const struct sm1_instruction * insn)536 sm1_instruction_check(const struct sm1_instruction *insn)
537 {
538     if (insn->opcode == D3DSIO_CRS)
539     {
540         if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
541         {
542             DBG("CRS.mask.w\n");
543         }
544     }
545 }
546 
547 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)548 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
549                     int mask, int output_index)
550 {
551     tx->output_info[tx->num_outputs].output_semantic = Usage;
552     tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
553     tx->output_info[tx->num_outputs].mask = mask;
554     tx->output_info[tx->num_outputs].output_index = output_index;
555     tx->num_outputs++;
556 }
557 
nine_float_constant_src(struct shader_translator * tx,int idx)558 static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
559 {
560     struct ureg_src src;
561 
562     if (tx->slot_map)
563         idx = tx->slot_map[idx];
564     /* vswp constant handling: we use two buffers
565      * to fit all the float constants. The special handling
566      * doesn't need to be elsewhere, because all the instructions
567      * accessing the constants directly are VS1, and swvp
568      * is VS >= 2 */
569     if (tx->info->swvp_on && idx >= 4096) {
570         /* TODO: swvp rel is broken if many constants are used */
571         src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
572         src = ureg_src_dimension(src, 1);
573     } else {
574         src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
575         src = ureg_src_dimension(src, 0);
576     }
577 
578     if (!tx->info->swvp_on)
579         tx->slots_used[idx] = TRUE;
580     if (tx->info->const_float_slots < (idx + 1))
581         tx->info->const_float_slots = idx + 1;
582     if (tx->num_slots < (idx + 1))
583         tx->num_slots = idx + 1;
584 
585     return src;
586 }
587 
nine_integer_constant_src(struct shader_translator * tx,int idx)588 static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
589 {
590     struct ureg_src src;
591 
592     if (tx->info->swvp_on) {
593         src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
594         src = ureg_src_dimension(src, 2);
595     } else {
596         unsigned slot_idx = tx->info->const_i_base + idx;
597         if (tx->slot_map)
598             slot_idx = tx->slot_map[slot_idx];
599         src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
600         src = ureg_src_dimension(src, 0);
601         tx->slots_used[slot_idx] = TRUE;
602         tx->info->int_slots_used[idx] = TRUE;
603         if (tx->num_slots < (slot_idx + 1))
604             tx->num_slots = slot_idx + 1;
605     }
606 
607     if (tx->info->const_int_slots < (idx + 1))
608         tx->info->const_int_slots = idx + 1;
609 
610     return src;
611 }
612 
nine_boolean_constant_src(struct shader_translator * tx,int idx)613 static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
614 {
615     struct ureg_src src;
616 
617     char r = idx / 4;
618     char s = idx & 3;
619 
620     if (tx->info->swvp_on) {
621         src = ureg_src_register(TGSI_FILE_CONSTANT, r);
622         src = ureg_src_dimension(src, 3);
623     } else {
624         unsigned slot_idx = tx->info->const_b_base + r;
625         if (tx->slot_map)
626             slot_idx = tx->slot_map[slot_idx];
627         src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
628         src = ureg_src_dimension(src, 0);
629         tx->slots_used[slot_idx] = TRUE;
630         tx->info->bool_slots_used[idx] = TRUE;
631         if (tx->num_slots < (slot_idx + 1))
632             tx->num_slots = slot_idx + 1;
633     }
634     src = ureg_swizzle(src, s, s, s, s);
635 
636     if (tx->info->const_bool_slots < (idx + 1))
637         tx->info->const_bool_slots = idx + 1;
638 
639     return src;
640 }
641 
642 static boolean
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)643 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
644 {
645    INT i;
646 
647    if (index < 0 || index >= tx->num_constf_allowed) {
648        tx->failure = TRUE;
649        return FALSE;
650    }
651    for (i = 0; i < tx->num_lconstf; ++i) {
652       if (tx->lconstf[i].idx == index) {
653          *src = tx->lconstf[i].reg;
654          return TRUE;
655       }
656    }
657    return FALSE;
658 }
659 static boolean
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)660 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
661 {
662    int i;
663 
664    if (index < 0 || index >= tx->num_consti_allowed) {
665        tx->failure = TRUE;
666        return FALSE;
667    }
668    for (i = 0; i < tx->num_lconsti; ++i) {
669       if (tx->lconsti[i].idx == index) {
670          *src = tx->lconsti[i].reg;
671          return TRUE;
672       }
673    }
674    return FALSE;
675 }
676 static boolean
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)677 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
678 {
679    int i;
680 
681    if (index < 0 || index >= tx->num_constb_allowed) {
682        tx->failure = TRUE;
683        return FALSE;
684    }
685    for (i = 0; i < tx->num_lconstb; ++i) {
686       if (tx->lconstb[i].idx == index) {
687          *src = tx->lconstb[i].reg;
688          return TRUE;
689       }
690    }
691    return FALSE;
692 }
693 
694 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])695 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
696 {
697     unsigned n;
698 
699     FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
700 
701     for (n = 0; n < tx->num_lconstf; ++n)
702         if (tx->lconstf[n].idx == index)
703             break;
704     if (n == tx->num_lconstf) {
705        if ((n % 8) == 0) {
706           tx->lconstf = REALLOC(tx->lconstf,
707                                 (n + 0) * sizeof(tx->lconstf[0]),
708                                 (n + 8) * sizeof(tx->lconstf[0]));
709           assert(tx->lconstf);
710        }
711        tx->num_lconstf++;
712     }
713     tx->lconstf[n].idx = index;
714     tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
715 
716     memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
717 }
718 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])719 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
720 {
721     unsigned n;
722 
723     FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
724 
725     for (n = 0; n < tx->num_lconsti; ++n)
726         if (tx->lconsti[n].idx == index)
727             break;
728     if (n == tx->num_lconsti) {
729        if ((n % 8) == 0) {
730           tx->lconsti = REALLOC(tx->lconsti,
731                                 (n + 0) * sizeof(tx->lconsti[0]),
732                                 (n + 8) * sizeof(tx->lconsti[0]));
733           assert(tx->lconsti);
734        }
735        tx->num_lconsti++;
736     }
737 
738     tx->lconsti[n].idx = index;
739     tx->lconsti[n].reg = tx->native_integers ?
740        ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
741        ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
742 }
743 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)744 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
745 {
746     unsigned n;
747 
748     FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
749 
750     for (n = 0; n < tx->num_lconstb; ++n)
751         if (tx->lconstb[n].idx == index)
752             break;
753     if (n == tx->num_lconstb) {
754        if ((n % 8) == 0) {
755           tx->lconstb = REALLOC(tx->lconstb,
756                                 (n + 0) * sizeof(tx->lconstb[0]),
757                                 (n + 8) * sizeof(tx->lconstb[0]));
758           assert(tx->lconstb);
759        }
760        tx->num_lconstb++;
761     }
762 
763     tx->lconstb[n].idx = index;
764     tx->lconstb[n].reg = tx->native_integers ?
765        ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
766        ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
767 }
768 
769 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)770 tx_scratch(struct shader_translator *tx)
771 {
772     if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
773         tx->failure = TRUE;
774         return tx->regs.t[0];
775     }
776     if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
777         tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
778     return tx->regs.t[tx->num_scratch++];
779 }
780 
781 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)782 tx_scratch_scalar(struct shader_translator *tx)
783 {
784     return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
785 }
786 
787 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)788 tx_src_scalar(struct ureg_dst dst)
789 {
790     struct ureg_src src = ureg_src(dst);
791     int c = ffs(dst.WriteMask) - 1;
792     if (dst.WriteMask == (1 << c))
793         src = ureg_scalar(src, c);
794     return src;
795 }
796 
797 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)798 tx_temp_alloc(struct shader_translator *tx, INT idx)
799 {
800     assert(idx >= 0);
801     if (idx >= tx->num_temp) {
802        unsigned k = tx->num_temp;
803        unsigned n = idx + 1;
804        tx->regs.r = REALLOC(tx->regs.r,
805                             k * sizeof(tx->regs.r[0]),
806                             n * sizeof(tx->regs.r[0]));
807        for (; k < n; ++k)
808           tx->regs.r[k] = ureg_dst_undef();
809        tx->num_temp = n;
810     }
811     if (ureg_dst_is_undef(tx->regs.r[idx]))
812         tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
813 }
814 
815 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)816 tx_addr_alloc(struct shader_translator *tx, INT idx)
817 {
818     assert(idx == 0);
819     if (ureg_dst_is_undef(tx->regs.address))
820         tx->regs.address = ureg_DECL_address(tx->ureg);
821     if (ureg_dst_is_undef(tx->regs.a0))
822         tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
823 }
824 
825 static inline bool
TEX_if_fetch4(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)826 TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst,
827               unsigned target, struct ureg_src src0,
828               struct ureg_src src1, INT idx)
829 {
830     struct ureg_dst tmp;
831     struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1};
832 
833     if (!(tx->info->fetch4 & (1 << idx)))
834         return false;
835 
836     /* TODO: needs more tests, but this feature is not much used at all */
837 
838     tmp = tx_scratch(tx);
839     ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT,
840                   NULL, 0, src_tg4, 3);
841     ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W)));
842     return true;
843 }
844 
845 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
846  * the projection should be applied on the texture. It doesn't
847  * apply on texkill.
848  * The doc is very imprecise here (it says the projection is done
849  * before rasterization, thus in vs, which seems wrong since ps instructions
850  * are affected differently)
851  * For now we only apply to the ps TEX instruction and TEXBEM.
852  * Perhaps some other instructions would need it */
853 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)854 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
855                       struct ureg_src src, INT idx)
856 {
857     struct ureg_dst tmp;
858     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
859 
860     /* no projection */
861     if (dim == 1) {
862         ureg_MOV(tx->ureg, dst, src);
863     } else {
864         tmp = tx_scratch_scalar(tx);
865         ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
866         ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
867     }
868 }
869 
870 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)871 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
872                          unsigned target, struct ureg_src src0,
873                          struct ureg_src src1, INT idx)
874 {
875     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
876     struct ureg_dst tmp;
877     boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
878 
879     /* dim == 1: no projection
880      * Looks like must be disabled when it makes no
881      * sense according the texture dimensions
882      */
883     if (dim == 1 || (dim <= target && !shadow)) {
884         ureg_TEX(tx->ureg, dst, target, src0, src1);
885     } else if (dim == 4) {
886         ureg_TXP(tx->ureg, dst, target, src0, src1);
887     } else {
888         tmp = tx_scratch(tx);
889         apply_ps1x_projection(tx, tmp, src0, idx);
890         ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
891     }
892 }
893 
894 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)895 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
896 {
897     assert(IS_PS);
898     assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
899     if (ureg_src_is_undef(tx->regs.vT[idx]))
900        tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
901                                              TGSI_INTERPOLATE_PERSPECTIVE);
902 }
903 
904 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)905 tx_bgnloop(struct shader_translator *tx)
906 {
907     tx->loop_depth++;
908     if (tx->loop_depth_max < tx->loop_depth)
909         tx->loop_depth_max = tx->loop_depth;
910     assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
911     return &tx->loop_labels[tx->loop_depth - 1];
912 }
913 
914 static inline unsigned *
tx_endloop(struct shader_translator * tx)915 tx_endloop(struct shader_translator *tx)
916 {
917     assert(tx->loop_depth);
918     tx->loop_depth--;
919     ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
920                      ureg_get_instruction_number(tx->ureg));
921     return &tx->loop_labels[tx->loop_depth];
922 }
923 
924 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,boolean loop_or_rep)925 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
926 {
927     const unsigned l = tx->loop_depth - 1;
928 
929     if (!tx->loop_depth)
930     {
931         DBG("loop counter requested outside of loop\n");
932         return ureg_dst_undef();
933     }
934 
935     if (ureg_dst_is_undef(tx->regs.rL[l])) {
936         /* loop or rep ctr creation */
937         tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
938         tx->loop_or_rep[l] = loop_or_rep;
939     }
940     /* loop - rep - endloop - endrep not allowed */
941     assert(tx->loop_or_rep[l] == loop_or_rep);
942 
943     return tx->regs.rL[l];
944 }
945 
946 static struct ureg_src
tx_get_loopal(struct shader_translator * tx)947 tx_get_loopal(struct shader_translator *tx)
948 {
949     int loop_level = tx->loop_depth - 1;
950 
951     while (loop_level >= 0) {
952         /* handle loop - rep - endrep - endloop case */
953         if (tx->loop_or_rep[loop_level])
954             /* the value is in the loop counter y component (nine implementation) */
955             return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
956         loop_level--;
957     }
958 
959     DBG("aL counter requested outside of loop\n");
960     return ureg_src_undef();
961 }
962 
963 static inline unsigned *
tx_cond(struct shader_translator * tx)964 tx_cond(struct shader_translator *tx)
965 {
966    assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
967    tx->cond_depth++;
968    return &tx->cond_labels[tx->cond_depth - 1];
969 }
970 
971 static inline unsigned *
tx_elsecond(struct shader_translator * tx)972 tx_elsecond(struct shader_translator *tx)
973 {
974    assert(tx->cond_depth);
975    return &tx->cond_labels[tx->cond_depth - 1];
976 }
977 
978 static inline void
tx_endcond(struct shader_translator * tx)979 tx_endcond(struct shader_translator *tx)
980 {
981    assert(tx->cond_depth);
982    tx->cond_depth--;
983    ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
984                     ureg_get_instruction_number(tx->ureg));
985 }
986 
987 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)988 nine_ureg_dst_register(unsigned file, int index)
989 {
990     return ureg_dst(ureg_src_register(file, index));
991 }
992 
993 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)994 nine_get_position_input(struct shader_translator *tx)
995 {
996     struct ureg_program *ureg = tx->ureg;
997 
998     if (tx->wpos_is_sysval)
999         return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1000     else
1001         return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
1002                                   0, TGSI_INTERPOLATE_LINEAR);
1003 }
1004 
1005 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)1006 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
1007 {
1008     struct ureg_program *ureg = tx->ureg;
1009     struct ureg_src src;
1010     struct ureg_dst tmp;
1011 
1012     assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
1013         (param->file == D3DSPR_INPUT && tx->version.major == 3));
1014 
1015     switch (param->file)
1016     {
1017     case D3DSPR_TEMP:
1018         tx_temp_alloc(tx, param->idx);
1019         src = ureg_src(tx->regs.r[param->idx]);
1020         break;
1021  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1022     case D3DSPR_ADDR:
1023         if (IS_VS) {
1024             assert(param->idx == 0);
1025             /* the address register (vs only) must be
1026              * assigned before use */
1027             assert(!ureg_dst_is_undef(tx->regs.a0));
1028             /* Round to lowest for vs1.1 (contrary to the doc), else
1029              * round to nearest */
1030             if (tx->version.major < 2 && tx->version.minor < 2)
1031                 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1032             else
1033                 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1034             src = ureg_src(tx->regs.address);
1035         } else {
1036             if (tx->version.major < 2 && tx->version.minor < 4) {
1037                 /* no subroutines, so should be defined */
1038                 src = ureg_src(tx->regs.tS[param->idx]);
1039             } else {
1040                 tx_texcoord_alloc(tx, param->idx);
1041                 src = tx->regs.vT[param->idx];
1042             }
1043         }
1044         break;
1045     case D3DSPR_INPUT:
1046         if (IS_VS) {
1047             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1048         } else {
1049             if (tx->version.major < 3) {
1050                 src = ureg_DECL_fs_input_centroid(
1051                     ureg, TGSI_SEMANTIC_COLOR, param->idx,
1052                     TGSI_INTERPOLATE_COLOR,
1053                     tx->info->force_color_in_centroid ?
1054                       TGSI_INTERPOLATE_LOC_CENTROID : 0,
1055                     0, 1);
1056             } else {
1057                 if(param->rel) {
1058                     /* Copy all inputs (non consecutive)
1059                      * to temp array (consecutive).
1060                      * This is not good for performance.
1061                      * A better way would be to have inputs
1062                      * consecutive (would need implement alternative
1063                      * way to match vs outputs and ps inputs).
1064                      * However even with the better way, the temp array
1065                      * copy would need to be used if some inputs
1066                      * are not GENERIC or if they have different
1067                      * interpolation flag. */
1068                     if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1069                         int i;
1070                         tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1071                         for (i = 0; i < 10; i++) {
1072                             if (!ureg_src_is_undef(tx->regs.v[i]))
1073                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1074                             else
1075                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1076                         }
1077                     }
1078                     src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1079                 } else {
1080                     assert(param->idx < ARRAY_SIZE(tx->regs.v));
1081                     src = tx->regs.v[param->idx];
1082                 }
1083             }
1084         }
1085         if (param->rel)
1086             src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1087         break;
1088     case D3DSPR_PREDICATE:
1089         if (ureg_dst_is_undef(tx->regs.predicate)) {
1090             /* Forbidden to use the predicate register before being set */
1091             tx->failure = TRUE;
1092             tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1093         }
1094         src = ureg_src(tx->regs.predicate);
1095         break;
1096     case D3DSPR_SAMPLER:
1097         assert(param->mod == NINED3DSPSM_NONE);
1098         /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */
1099         src = ureg_DECL_sampler(ureg, param->idx);
1100         break;
1101     case D3DSPR_CONST:
1102         if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1103             src = nine_float_constant_src(tx, param->idx);
1104             if (param->rel) {
1105                 tx->indirect_const_access = TRUE;
1106                 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1107             }
1108         }
1109         if (!IS_VS && tx->version.major < 2) {
1110             /* ps 1.X clamps constants */
1111             tmp = tx_scratch(tx);
1112             ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1113             ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1114             src = ureg_src(tmp);
1115         }
1116         break;
1117     case D3DSPR_CONST2:
1118     case D3DSPR_CONST3:
1119     case D3DSPR_CONST4:
1120         DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1121         assert(!"CONST2/3/4");
1122         src = ureg_imm1f(ureg, 0.0f);
1123         break;
1124     case D3DSPR_CONSTINT:
1125         /* relative adressing only possible for float constants in vs */
1126         if (!tx_lconsti(tx, &src, param->idx))
1127             src = nine_integer_constant_src(tx, param->idx);
1128         break;
1129     case D3DSPR_CONSTBOOL:
1130         if (!tx_lconstb(tx, &src, param->idx))
1131             src = nine_boolean_constant_src(tx, param->idx);
1132         break;
1133     case D3DSPR_LOOP:
1134         if (ureg_dst_is_undef(tx->regs.address))
1135             tx->regs.address = ureg_DECL_address(ureg);
1136         if (!tx->native_integers)
1137             ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1138         else
1139             ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1140         src = ureg_src(tx->regs.address);
1141         break;
1142     case D3DSPR_MISCTYPE:
1143         switch (param->idx) {
1144         case D3DSMO_POSITION:
1145            if (ureg_src_is_undef(tx->regs.vPos))
1146               tx->regs.vPos = nine_get_position_input(tx);
1147            if (tx->shift_wpos) {
1148                /* TODO: do this only once */
1149                struct ureg_dst wpos = tx_scratch(tx);
1150                ureg_ADD(ureg, wpos, tx->regs.vPos,
1151                         ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1152                src = ureg_src(wpos);
1153            } else {
1154                src = tx->regs.vPos;
1155            }
1156            break;
1157         case D3DSMO_FACE:
1158            if (ureg_src_is_undef(tx->regs.vFace)) {
1159                if (tx->face_is_sysval_integer) {
1160                    tmp = ureg_DECL_temporary(ureg);
1161                    tx->regs.vFace =
1162                        ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1163 
1164                    /* convert bool to float */
1165                    ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1166                              ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1167                    tx->regs.vFace = ureg_src(tmp);
1168                } else {
1169                    tx->regs.vFace = ureg_DECL_fs_input(ureg,
1170                                                        TGSI_SEMANTIC_FACE, 0,
1171                                                        TGSI_INTERPOLATE_CONSTANT);
1172                }
1173                tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1174            }
1175            src = tx->regs.vFace;
1176            break;
1177         default:
1178             assert(!"invalid src D3DSMO");
1179             break;
1180         }
1181         break;
1182     case D3DSPR_TEMPFLOAT16:
1183         break;
1184     default:
1185         assert(!"invalid src D3DSPR");
1186     }
1187 
1188     switch (param->mod) {
1189     case NINED3DSPSM_DW:
1190         tmp = tx_scratch(tx);
1191         /* NOTE: app is not allowed to read w with this modifier */
1192         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1193         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1194         src = ureg_src(tmp);
1195         break;
1196     case NINED3DSPSM_DZ:
1197         tmp = tx_scratch(tx);
1198         /* NOTE: app is not allowed to read z with this modifier */
1199         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1200         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1201         src = ureg_src(tmp);
1202         break;
1203     default:
1204         break;
1205     }
1206 
1207     if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER)
1208         src = ureg_swizzle(src,
1209                            (param->swizzle >> 0) & 0x3,
1210                            (param->swizzle >> 2) & 0x3,
1211                            (param->swizzle >> 4) & 0x3,
1212                            (param->swizzle >> 6) & 0x3);
1213 
1214     switch (param->mod) {
1215     case NINED3DSPSM_ABS:
1216         src = ureg_abs(src);
1217         break;
1218     case NINED3DSPSM_ABSNEG:
1219         src = ureg_negate(ureg_abs(src));
1220         break;
1221     case NINED3DSPSM_NEG:
1222         src = ureg_negate(src);
1223         break;
1224     case NINED3DSPSM_BIAS:
1225         tmp = tx_scratch(tx);
1226         ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1227         src = ureg_src(tmp);
1228         break;
1229     case NINED3DSPSM_BIASNEG:
1230         tmp = tx_scratch(tx);
1231         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1232         src = ureg_src(tmp);
1233         break;
1234     case NINED3DSPSM_NOT:
1235         if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1236             tmp = tx_scratch(tx);
1237             ureg_NOT(ureg, tmp, src);
1238             src = ureg_src(tmp);
1239             break;
1240         } else { /* predicate */
1241             tmp = tx_scratch(tx);
1242             ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1243             src = ureg_src(tmp);
1244         }
1245         FALLTHROUGH;
1246     case NINED3DSPSM_COMP:
1247         tmp = tx_scratch(tx);
1248         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1249         src = ureg_src(tmp);
1250         break;
1251     case NINED3DSPSM_DZ:
1252     case NINED3DSPSM_DW:
1253         /* Already handled*/
1254         break;
1255     case NINED3DSPSM_SIGN:
1256         tmp = tx_scratch(tx);
1257         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1258         src = ureg_src(tmp);
1259         break;
1260     case NINED3DSPSM_SIGNNEG:
1261         tmp = tx_scratch(tx);
1262         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1263         src = ureg_src(tmp);
1264         break;
1265     case NINED3DSPSM_X2:
1266         tmp = tx_scratch(tx);
1267         ureg_ADD(ureg, tmp, src, src);
1268         src = ureg_src(tmp);
1269         break;
1270     case NINED3DSPSM_X2NEG:
1271         tmp = tx_scratch(tx);
1272         ureg_ADD(ureg, tmp, src, src);
1273         src = ureg_negate(ureg_src(tmp));
1274         break;
1275     default:
1276         assert(param->mod == NINED3DSPSM_NONE);
1277         break;
1278     }
1279 
1280     return src;
1281 }
1282 
1283 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1284 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1285 {
1286     struct ureg_dst dst;
1287 
1288     switch (param->file)
1289     {
1290     case D3DSPR_TEMP:
1291         assert(!param->rel);
1292         tx_temp_alloc(tx, param->idx);
1293         dst = tx->regs.r[param->idx];
1294         break;
1295  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1296     case D3DSPR_ADDR:
1297         assert(!param->rel);
1298         if (tx->version.major < 2 && !IS_VS) {
1299             if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1300                 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1301             dst = tx->regs.tS[param->idx];
1302         } else
1303         if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1304             tx_texcoord_alloc(tx, param->idx);
1305             dst = ureg_dst(tx->regs.vT[param->idx]);
1306         } else {
1307             tx_addr_alloc(tx, param->idx);
1308             dst = tx->regs.a0;
1309         }
1310         break;
1311     case D3DSPR_RASTOUT:
1312         assert(!param->rel);
1313         switch (param->idx) {
1314         case 0:
1315             if (ureg_dst_is_undef(tx->regs.oPos))
1316                 tx->regs.oPos =
1317                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1318             dst = tx->regs.oPos;
1319             break;
1320         case 1:
1321             if (ureg_dst_is_undef(tx->regs.oFog))
1322                 tx->regs.oFog =
1323                     ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
1324             dst = tx->regs.oFog;
1325             break;
1326         case 2:
1327             if (ureg_dst_is_undef(tx->regs.oPts))
1328                 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1329             dst = tx->regs.oPts;
1330             break;
1331         default:
1332             assert(0);
1333             break;
1334         }
1335         break;
1336  /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1337     case D3DSPR_OUTPUT:
1338         if (tx->version.major < 3) {
1339             assert(!param->rel);
1340             dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1341         } else {
1342             assert(!param->rel); /* TODO */
1343             assert(param->idx < ARRAY_SIZE(tx->regs.o));
1344             dst = tx->regs.o[param->idx];
1345         }
1346         break;
1347     case D3DSPR_ATTROUT: /* VS */
1348     case D3DSPR_COLOROUT: /* PS */
1349         assert(param->idx >= 0 && param->idx < 4);
1350         assert(!param->rel);
1351         tx->info->rt_mask |= 1 << param->idx;
1352         if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1353             /* ps < 3: oCol[0] will have fog blending afterward */
1354             if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1355                 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1356             } else {
1357                 tx->regs.oCol[param->idx] =
1358                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1359             }
1360         }
1361         dst = tx->regs.oCol[param->idx];
1362         if (IS_VS && tx->version.major < 3)
1363             dst = ureg_saturate(dst);
1364         break;
1365     case D3DSPR_DEPTHOUT:
1366         assert(!param->rel);
1367         if (ureg_dst_is_undef(tx->regs.oDepth))
1368            tx->regs.oDepth =
1369               ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1370                                       TGSI_WRITEMASK_Z, 0, 1);
1371         dst = tx->regs.oDepth; /* XXX: must write .z component */
1372         break;
1373     case D3DSPR_PREDICATE:
1374         if (ureg_dst_is_undef(tx->regs.predicate))
1375             tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1376         dst = tx->regs.predicate;
1377         break;
1378     case D3DSPR_TEMPFLOAT16:
1379         DBG("unhandled D3DSPR: %u\n", param->file);
1380         break;
1381     default:
1382         assert(!"invalid dst D3DSPR");
1383         break;
1384     }
1385     if (param->rel)
1386         dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1387 
1388     if (param->mask != NINED3DSP_WRITEMASK_ALL)
1389         dst = ureg_writemask(dst, param->mask);
1390     if (param->mod & NINED3DSPDM_SATURATE)
1391         dst = ureg_saturate(dst);
1392 
1393     if (tx->predicated_activated) {
1394         tx->regs.predicate_dst = dst;
1395         dst = tx->regs.predicate_tmp;
1396     }
1397 
1398     return dst;
1399 }
1400 
1401 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1402 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1403 {
1404     if (param->shift) {
1405         tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1406         return tx->regs.tdst;
1407     }
1408     return _tx_dst_param(tx, param);
1409 }
1410 
1411 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1412 tx_apply_dst0_modifiers(struct shader_translator *tx)
1413 {
1414     struct ureg_dst rdst;
1415     float f;
1416 
1417     if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1418         return;
1419     rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1420 
1421     assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1422 
1423     if (tx->insn.dst[0].shift < 0)
1424         f = 1.0f / (1 << -tx->insn.dst[0].shift);
1425     else
1426         f = 1 << tx->insn.dst[0].shift;
1427 
1428     ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1429 }
1430 
1431 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1432 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1433 {
1434     struct ureg_src src;
1435 
1436     assert(!param->shift);
1437     assert(!(param->mod & NINED3DSPDM_SATURATE));
1438 
1439     switch (param->file) {
1440     case D3DSPR_INPUT:
1441         if (IS_VS) {
1442             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1443         } else {
1444             assert(!param->rel);
1445             assert(param->idx < ARRAY_SIZE(tx->regs.v));
1446             src = tx->regs.v[param->idx];
1447         }
1448         break;
1449     default:
1450         src = ureg_src(tx_dst_param(tx, param));
1451         break;
1452     }
1453     if (param->rel)
1454         src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1455 
1456     if (!param->mask)
1457         WARN("mask is 0, using identity swizzle\n");
1458 
1459     if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1460         char s[4];
1461         int n;
1462         int c;
1463         for (n = 0, c = 0; c < 4; ++c)
1464             if (param->mask & (1 << c))
1465                 s[n++] = c;
1466         assert(n);
1467         for (c = n; c < 4; ++c)
1468             s[c] = s[n - 1];
1469         src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1470     }
1471     return src;
1472 }
1473 
1474 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1475 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1476 {
1477     struct ureg_program *ureg = tx->ureg;
1478     struct ureg_dst dst;
1479     struct ureg_src src[2];
1480     struct sm1_src_param *src_mat = &tx->insn.src[1];
1481     unsigned i;
1482 
1483     dst = tx_dst_param(tx, &tx->insn.dst[0]);
1484     src[0] = tx_src_param(tx, &tx->insn.src[0]);
1485 
1486     for (i = 0; i < n; i++)
1487     {
1488         const unsigned m = (1 << i);
1489 
1490         src[1] = tx_src_param(tx, src_mat);
1491         src_mat->idx++;
1492 
1493         if (!(dst.WriteMask & m))
1494             continue;
1495 
1496         /* XXX: src == dst case ? */
1497 
1498         switch (k) {
1499         case 3:
1500             ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1501             break;
1502         case 4:
1503             ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1504             break;
1505         default:
1506             DBG("invalid operation: M%ux%u\n", m, n);
1507             break;
1508         }
1509     }
1510 
1511     return D3D_OK;
1512 }
1513 
1514 #define VNOTSUPPORTED   0, 0
1515 #define V(maj, min)     (((maj) << 8) | (min))
1516 
1517 static inline const char *
d3dsio_to_string(unsigned opcode)1518 d3dsio_to_string( unsigned opcode )
1519 {
1520     static const char *names[] = {
1521         "NOP",
1522         "MOV",
1523         "ADD",
1524         "SUB",
1525         "MAD",
1526         "MUL",
1527         "RCP",
1528         "RSQ",
1529         "DP3",
1530         "DP4",
1531         "MIN",
1532         "MAX",
1533         "SLT",
1534         "SGE",
1535         "EXP",
1536         "LOG",
1537         "LIT",
1538         "DST",
1539         "LRP",
1540         "FRC",
1541         "M4x4",
1542         "M4x3",
1543         "M3x4",
1544         "M3x3",
1545         "M3x2",
1546         "CALL",
1547         "CALLNZ",
1548         "LOOP",
1549         "RET",
1550         "ENDLOOP",
1551         "LABEL",
1552         "DCL",
1553         "POW",
1554         "CRS",
1555         "SGN",
1556         "ABS",
1557         "NRM",
1558         "SINCOS",
1559         "REP",
1560         "ENDREP",
1561         "IF",
1562         "IFC",
1563         "ELSE",
1564         "ENDIF",
1565         "BREAK",
1566         "BREAKC",
1567         "MOVA",
1568         "DEFB",
1569         "DEFI",
1570         NULL,
1571         NULL,
1572         NULL,
1573         NULL,
1574         NULL,
1575         NULL,
1576         NULL,
1577         NULL,
1578         NULL,
1579         NULL,
1580         NULL,
1581         NULL,
1582         NULL,
1583         NULL,
1584         NULL,
1585         "TEXCOORD",
1586         "TEXKILL",
1587         "TEX",
1588         "TEXBEM",
1589         "TEXBEML",
1590         "TEXREG2AR",
1591         "TEXREG2GB",
1592         "TEXM3x2PAD",
1593         "TEXM3x2TEX",
1594         "TEXM3x3PAD",
1595         "TEXM3x3TEX",
1596         NULL,
1597         "TEXM3x3SPEC",
1598         "TEXM3x3VSPEC",
1599         "EXPP",
1600         "LOGP",
1601         "CND",
1602         "DEF",
1603         "TEXREG2RGB",
1604         "TEXDP3TEX",
1605         "TEXM3x2DEPTH",
1606         "TEXDP3",
1607         "TEXM3x3",
1608         "TEXDEPTH",
1609         "CMP",
1610         "BEM",
1611         "DP2ADD",
1612         "DSX",
1613         "DSY",
1614         "TEXLDD",
1615         "SETP",
1616         "TEXLDL",
1617         "BREAKP"
1618     };
1619 
1620     if (opcode < ARRAY_SIZE(names)) return names[opcode];
1621 
1622     switch (opcode) {
1623     case D3DSIO_PHASE: return "PHASE";
1624     case D3DSIO_COMMENT: return "COMMENT";
1625     case D3DSIO_END: return "END";
1626     default:
1627         return NULL;
1628     }
1629 }
1630 
1631 #define NULL_INSTRUCTION            { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1632 #define IS_VALID_INSTRUCTION(inst)  ((inst).vert_version.min | \
1633                                      (inst).vert_version.max | \
1634                                      (inst).frag_version.min | \
1635                                      (inst).frag_version.max)
1636 
1637 #define SPECIAL(name) \
1638     NineTranslateInstruction_##name
1639 
1640 #define DECL_SPECIAL(name) \
1641     static HRESULT \
1642     NineTranslateInstruction_##name( struct shader_translator *tx )
1643 
1644 static HRESULT
1645 NineTranslateInstruction_Generic(struct shader_translator *);
1646 
DECL_SPECIAL(NOP)1647 DECL_SPECIAL(NOP)
1648 {
1649     /* Nothing to do. NOP was used to avoid hangs
1650      * with very old d3d drivers. */
1651     return D3D_OK;
1652 }
1653 
DECL_SPECIAL(SUB)1654 DECL_SPECIAL(SUB)
1655 {
1656     struct ureg_program *ureg = tx->ureg;
1657     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1658     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1659     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1660 
1661     ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1662     return D3D_OK;
1663 }
1664 
DECL_SPECIAL(ABS)1665 DECL_SPECIAL(ABS)
1666 {
1667     struct ureg_program *ureg = tx->ureg;
1668     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1669     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1670 
1671     ureg_MOV(ureg, dst, ureg_abs(src));
1672     return D3D_OK;
1673 }
1674 
DECL_SPECIAL(XPD)1675 DECL_SPECIAL(XPD)
1676 {
1677     struct ureg_program *ureg = tx->ureg;
1678     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1679     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1680     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1681 
1682     ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1683              ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1684                           TGSI_SWIZZLE_X, 0),
1685              ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1686                           TGSI_SWIZZLE_Y, 0));
1687     ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1688              ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1689                           TGSI_SWIZZLE_Y, 0),
1690              ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1691                                       TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1692              ureg_src(dst));
1693     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1694              ureg_imm1f(ureg, 1));
1695     return D3D_OK;
1696 }
1697 
DECL_SPECIAL(M4x4)1698 DECL_SPECIAL(M4x4)
1699 {
1700     return NineTranslateInstruction_Mkxn(tx, 4, 4);
1701 }
1702 
DECL_SPECIAL(M4x3)1703 DECL_SPECIAL(M4x3)
1704 {
1705     return NineTranslateInstruction_Mkxn(tx, 4, 3);
1706 }
1707 
DECL_SPECIAL(M3x4)1708 DECL_SPECIAL(M3x4)
1709 {
1710     return NineTranslateInstruction_Mkxn(tx, 3, 4);
1711 }
1712 
DECL_SPECIAL(M3x3)1713 DECL_SPECIAL(M3x3)
1714 {
1715     return NineTranslateInstruction_Mkxn(tx, 3, 3);
1716 }
1717 
DECL_SPECIAL(M3x2)1718 DECL_SPECIAL(M3x2)
1719 {
1720     return NineTranslateInstruction_Mkxn(tx, 3, 2);
1721 }
1722 
DECL_SPECIAL(CMP)1723 DECL_SPECIAL(CMP)
1724 {
1725     ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1726              tx_src_param(tx, &tx->insn.src[0]),
1727              tx_src_param(tx, &tx->insn.src[2]),
1728              tx_src_param(tx, &tx->insn.src[1]));
1729     return D3D_OK;
1730 }
1731 
DECL_SPECIAL(CND)1732 DECL_SPECIAL(CND)
1733 {
1734     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1735     struct ureg_dst cgt;
1736     struct ureg_src cnd;
1737 
1738     /* the coissue flag was a tip for compilers to advise to
1739      * execute two operations at the same time, in cases
1740      * the two executions had same dst with different channels.
1741      * It has no effect on current hw. However it seems CND
1742      * is affected. The handling of this very specific case
1743      * handled below mimick wine behaviour */
1744     if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1745         ureg_MOV(tx->ureg,
1746                  dst, tx_src_param(tx, &tx->insn.src[1]));
1747         return D3D_OK;
1748     }
1749 
1750     cnd = tx_src_param(tx, &tx->insn.src[0]);
1751     cgt = tx_scratch(tx);
1752 
1753     if (tx->version.major == 1 && tx->version.minor < 4)
1754         cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1755 
1756     ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1757 
1758     ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1759              tx_src_param(tx, &tx->insn.src[1]),
1760              tx_src_param(tx, &tx->insn.src[2]));
1761     return D3D_OK;
1762 }
1763 
DECL_SPECIAL(CALL)1764 DECL_SPECIAL(CALL)
1765 {
1766     assert(tx->insn.src[0].idx < tx->num_inst_labels);
1767     ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1768     return D3D_OK;
1769 }
1770 
DECL_SPECIAL(CALLNZ)1771 DECL_SPECIAL(CALLNZ)
1772 {
1773     struct ureg_program *ureg = tx->ureg;
1774     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1775 
1776     if (!tx->native_integers)
1777         ureg_IF(ureg, src, tx_cond(tx));
1778     else
1779         ureg_UIF(ureg, src, tx_cond(tx));
1780     ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1781     tx_endcond(tx);
1782     ureg_ENDIF(ureg);
1783     return D3D_OK;
1784 }
1785 
DECL_SPECIAL(LOOP)1786 DECL_SPECIAL(LOOP)
1787 {
1788     struct ureg_program *ureg = tx->ureg;
1789     unsigned *label;
1790     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1791     struct ureg_dst ctr;
1792     struct ureg_dst tmp;
1793     struct ureg_src ctrx;
1794 
1795     label = tx_bgnloop(tx);
1796     ctr = tx_get_loopctr(tx, TRUE);
1797     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1798 
1799     /* src: num_iterations - start_value of al - step for al - 0 */
1800     ureg_MOV(ureg, ctr, src);
1801     ureg_BGNLOOP(tx->ureg, label);
1802     tmp = tx_scratch_scalar(tx);
1803     /* Initially ctr.x contains the number of iterations.
1804      * ctr.y will contain the updated value of al.
1805      * We decrease ctr.x at the end of every iteration,
1806      * and stop when it reaches 0. */
1807 
1808     if (!tx->native_integers) {
1809         /* case src and ctr contain floats */
1810         /* to avoid precision issue, we stop when ctr <= 0.5 */
1811         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1812         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1813     } else {
1814         /* case src and ctr contain integers */
1815         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1816         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1817     }
1818     ureg_BRK(ureg);
1819     tx_endcond(tx);
1820     ureg_ENDIF(ureg);
1821     return D3D_OK;
1822 }
1823 
DECL_SPECIAL(RET)1824 DECL_SPECIAL(RET)
1825 {
1826     /* RET as a last instruction could be safely ignored.
1827      * Remove it to prevent crashes/warnings in case underlying
1828      * driver doesn't implement arbitrary returns.
1829      */
1830     if (*(tx->parse_next) != NINED3DSP_END) {
1831         ureg_RET(tx->ureg);
1832     }
1833     return D3D_OK;
1834 }
1835 
DECL_SPECIAL(ENDLOOP)1836 DECL_SPECIAL(ENDLOOP)
1837 {
1838     struct ureg_program *ureg = tx->ureg;
1839     struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1840     struct ureg_dst dst_ctrx, dst_al;
1841     struct ureg_src src_ctr, al_counter;
1842 
1843     dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1844     dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1845     src_ctr = ureg_src(ctr);
1846     al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1847 
1848     /* ctr.x -= 1
1849      * ctr.y (aL) += step */
1850     if (!tx->native_integers) {
1851         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1852         ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1853     } else {
1854         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1855         ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1856     }
1857     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1858     return D3D_OK;
1859 }
1860 
DECL_SPECIAL(LABEL)1861 DECL_SPECIAL(LABEL)
1862 {
1863     unsigned k = tx->num_inst_labels;
1864     unsigned n = tx->insn.src[0].idx;
1865     assert(n < 2048);
1866     if (n >= k)
1867        tx->inst_labels = REALLOC(tx->inst_labels,
1868                                  k * sizeof(tx->inst_labels[0]),
1869                                  n * sizeof(tx->inst_labels[0]));
1870 
1871     tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1872     return D3D_OK;
1873 }
1874 
DECL_SPECIAL(SINCOS)1875 DECL_SPECIAL(SINCOS)
1876 {
1877     struct ureg_program *ureg = tx->ureg;
1878     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1879     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1880     struct ureg_dst tmp = tx_scratch_scalar(tx);
1881 
1882     assert(!(dst.WriteMask & 0xc));
1883 
1884     /* Copying to a temporary register avoids src/dst aliasing.
1885      * src is supposed to have replicated swizzle. */
1886     ureg_MOV(ureg, tmp, src);
1887 
1888     /* z undefined, w untouched */
1889     ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1890              tx_src_scalar(tmp));
1891     ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1892              tx_src_scalar(tmp));
1893     return D3D_OK;
1894 }
1895 
DECL_SPECIAL(SGN)1896 DECL_SPECIAL(SGN)
1897 {
1898     ureg_SSG(tx->ureg,
1899              tx_dst_param(tx, &tx->insn.dst[0]),
1900              tx_src_param(tx, &tx->insn.src[0]));
1901     return D3D_OK;
1902 }
1903 
DECL_SPECIAL(REP)1904 DECL_SPECIAL(REP)
1905 {
1906     struct ureg_program *ureg = tx->ureg;
1907     unsigned *label;
1908     struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1909     struct ureg_dst ctr;
1910     struct ureg_dst tmp;
1911     struct ureg_src ctrx;
1912 
1913     label = tx_bgnloop(tx);
1914     ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1915     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1916 
1917     /* NOTE: rep must be constant, so we don't have to save the count */
1918     assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1919 
1920     /* rep: num_iterations - 0 - 0 - 0 */
1921     ureg_MOV(ureg, ctr, rep);
1922     ureg_BGNLOOP(ureg, label);
1923     tmp = tx_scratch_scalar(tx);
1924     /* Initially ctr.x contains the number of iterations.
1925      * We decrease ctr.x at the end of every iteration,
1926      * and stop when it reaches 0. */
1927 
1928     if (!tx->native_integers) {
1929         /* case src and ctr contain floats */
1930         /* to avoid precision issue, we stop when ctr <= 0.5 */
1931         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1932         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1933     } else {
1934         /* case src and ctr contain integers */
1935         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1936         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1937     }
1938     ureg_BRK(ureg);
1939     tx_endcond(tx);
1940     ureg_ENDIF(ureg);
1941 
1942     return D3D_OK;
1943 }
1944 
DECL_SPECIAL(ENDREP)1945 DECL_SPECIAL(ENDREP)
1946 {
1947     struct ureg_program *ureg = tx->ureg;
1948     struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1949     struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1950     struct ureg_src src_ctr = ureg_src(ctr);
1951 
1952     /* ctr.x -= 1 */
1953     if (!tx->native_integers)
1954         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1955     else
1956         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1957 
1958     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1959     return D3D_OK;
1960 }
1961 
DECL_SPECIAL(ENDIF)1962 DECL_SPECIAL(ENDIF)
1963 {
1964     tx_endcond(tx);
1965     ureg_ENDIF(tx->ureg);
1966     return D3D_OK;
1967 }
1968 
DECL_SPECIAL(IF)1969 DECL_SPECIAL(IF)
1970 {
1971     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1972 
1973     if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1974         ureg_UIF(tx->ureg, src, tx_cond(tx));
1975     else
1976         ureg_IF(tx->ureg, src, tx_cond(tx));
1977 
1978     return D3D_OK;
1979 }
1980 
1981 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)1982 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1983 {
1984     switch (flags) {
1985     case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1986     case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1987     case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1988     case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1989     case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1990     case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1991     default:
1992         assert(!"invalid comparison flags");
1993         return TGSI_OPCODE_SGT;
1994     }
1995 }
1996 
DECL_SPECIAL(IFC)1997 DECL_SPECIAL(IFC)
1998 {
1999     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2000     struct ureg_src src[2];
2001     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2002     src[0] = tx_src_param(tx, &tx->insn.src[0]);
2003     src[1] = tx_src_param(tx, &tx->insn.src[1]);
2004     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2005     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2006     return D3D_OK;
2007 }
2008 
DECL_SPECIAL(ELSE)2009 DECL_SPECIAL(ELSE)
2010 {
2011     ureg_ELSE(tx->ureg, tx_elsecond(tx));
2012     return D3D_OK;
2013 }
2014 
DECL_SPECIAL(BREAKC)2015 DECL_SPECIAL(BREAKC)
2016 {
2017     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2018     struct ureg_src src[2];
2019     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2020     src[0] = tx_src_param(tx, &tx->insn.src[0]);
2021     src[1] = tx_src_param(tx, &tx->insn.src[1]);
2022     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2023     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2024     ureg_BRK(tx->ureg);
2025     tx_endcond(tx);
2026     ureg_ENDIF(tx->ureg);
2027     return D3D_OK;
2028 }
2029 
2030 static const char *sm1_declusage_names[] =
2031 {
2032     [D3DDECLUSAGE_POSITION] = "POSITION",
2033     [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
2034     [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2035     [D3DDECLUSAGE_NORMAL] = "NORMAL",
2036     [D3DDECLUSAGE_PSIZE] = "PSIZE",
2037     [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2038     [D3DDECLUSAGE_TANGENT] = "TANGENT",
2039     [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2040     [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2041     [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2042     [D3DDECLUSAGE_COLOR] = "COLOR",
2043     [D3DDECLUSAGE_FOG] = "FOG",
2044     [D3DDECLUSAGE_DEPTH] = "DEPTH",
2045     [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2046 };
2047 
2048 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)2049 sm1_to_nine_declusage(struct sm1_semantic *dcl)
2050 {
2051     return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2052 }
2053 
2054 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,boolean tc,struct sm1_semantic * dcl)2055 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2056                       boolean tc,
2057                       struct sm1_semantic *dcl)
2058 {
2059     BYTE index = dcl->usage_idx;
2060 
2061     /* For everything that is not matching to a TGSI_SEMANTIC_****,
2062      * we match to a TGSI_SEMANTIC_GENERIC with index.
2063      *
2064      * The index can be anything UINT16 and usage_idx is BYTE,
2065      * so we can fit everything. It doesn't matter if indices
2066      * are close together or low.
2067      *
2068      *
2069      * POSITION >= 1: 10 * index + 7
2070      * COLOR >= 2: 10 * (index-1) + 8
2071      * FOG: 16
2072      * TEXCOORD[0..15]: index
2073      * BLENDWEIGHT: 10 * index + 19
2074      * BLENDINDICES: 10 * index + 20
2075      * NORMAL: 10 * index + 21
2076      * TANGENT: 10 * index + 22
2077      * BINORMAL: 10 * index + 23
2078      * TESSFACTOR: 10 * index + 24
2079      */
2080 
2081     switch (dcl->usage) {
2082     case D3DDECLUSAGE_POSITION:
2083     case D3DDECLUSAGE_POSITIONT:
2084     case D3DDECLUSAGE_DEPTH:
2085         if (index == 0) {
2086             sem->Name = TGSI_SEMANTIC_POSITION;
2087             sem->Index = 0;
2088         } else {
2089             sem->Name = TGSI_SEMANTIC_GENERIC;
2090             sem->Index = 10 * index + 7;
2091         }
2092         break;
2093     case D3DDECLUSAGE_COLOR:
2094         if (index < 2) {
2095             sem->Name = TGSI_SEMANTIC_COLOR;
2096             sem->Index = index;
2097         } else {
2098             sem->Name = TGSI_SEMANTIC_GENERIC;
2099             sem->Index = 10 * (index-1) + 8;
2100         }
2101         break;
2102     case D3DDECLUSAGE_FOG:
2103         assert(index == 0);
2104         sem->Name = TGSI_SEMANTIC_GENERIC;
2105         sem->Index = 16;
2106         break;
2107     case D3DDECLUSAGE_PSIZE:
2108         assert(index == 0);
2109         sem->Name = TGSI_SEMANTIC_PSIZE;
2110         sem->Index = 0;
2111         break;
2112     case D3DDECLUSAGE_TEXCOORD:
2113         assert(index < 16);
2114         if (index < 8 && tc)
2115             sem->Name = TGSI_SEMANTIC_TEXCOORD;
2116         else
2117             sem->Name = TGSI_SEMANTIC_GENERIC;
2118         sem->Index = index;
2119         break;
2120     case D3DDECLUSAGE_BLENDWEIGHT:
2121         sem->Name = TGSI_SEMANTIC_GENERIC;
2122         sem->Index = 10 * index + 19;
2123         break;
2124     case D3DDECLUSAGE_BLENDINDICES:
2125         sem->Name = TGSI_SEMANTIC_GENERIC;
2126         sem->Index = 10 * index + 20;
2127         break;
2128     case D3DDECLUSAGE_NORMAL:
2129         sem->Name = TGSI_SEMANTIC_GENERIC;
2130         sem->Index = 10 * index + 21;
2131         break;
2132     case D3DDECLUSAGE_TANGENT:
2133         sem->Name = TGSI_SEMANTIC_GENERIC;
2134         sem->Index = 10 * index + 22;
2135         break;
2136     case D3DDECLUSAGE_BINORMAL:
2137         sem->Name = TGSI_SEMANTIC_GENERIC;
2138         sem->Index = 10 * index + 23;
2139         break;
2140     case D3DDECLUSAGE_TESSFACTOR:
2141         sem->Name = TGSI_SEMANTIC_GENERIC;
2142         sem->Index = 10 * index + 24;
2143         break;
2144     case D3DDECLUSAGE_SAMPLE:
2145         sem->Name = TGSI_SEMANTIC_COUNT;
2146         sem->Index = 0;
2147         break;
2148     default:
2149         unreachable("Invalid DECLUSAGE.");
2150         break;
2151     }
2152 }
2153 
2154 #define NINED3DSTT_1D     (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2155 #define NINED3DSTT_2D     (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2156 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2157 #define NINED3DSTT_CUBE   (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2158 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2159 d3dstt_to_tgsi_tex(BYTE sampler_type)
2160 {
2161     switch (sampler_type) {
2162     case NINED3DSTT_1D:     return TGSI_TEXTURE_1D;
2163     case NINED3DSTT_2D:     return TGSI_TEXTURE_2D;
2164     case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2165     case NINED3DSTT_CUBE:   return TGSI_TEXTURE_CUBE;
2166     default:
2167         assert(0);
2168         return TGSI_TEXTURE_UNKNOWN;
2169     }
2170 }
2171 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2172 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2173 {
2174     switch (sampler_type) {
2175     case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2176     case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2177     case NINED3DSTT_VOLUME:
2178     case NINED3DSTT_CUBE:
2179     default:
2180         assert(0);
2181         return TGSI_TEXTURE_UNKNOWN;
2182     }
2183 }
2184 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2185 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2186 {
2187     boolean shadow = !!(info->sampler_mask_shadow & (1 << stage));
2188     switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2189     case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2190     case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2191     case 3: return TGSI_TEXTURE_3D;
2192     default:
2193         return TGSI_TEXTURE_CUBE;
2194     }
2195 }
2196 
2197 static const char *
sm1_sampler_type_name(BYTE sampler_type)2198 sm1_sampler_type_name(BYTE sampler_type)
2199 {
2200     switch (sampler_type) {
2201     case NINED3DSTT_1D:     return "1D";
2202     case NINED3DSTT_2D:     return "2D";
2203     case NINED3DSTT_VOLUME: return "VOLUME";
2204     case NINED3DSTT_CUBE:   return "CUBE";
2205     default:
2206         return "(D3DSTT_?)";
2207     }
2208 }
2209 
2210 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2211 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2212 {
2213     switch (sem->Name) {
2214     case TGSI_SEMANTIC_POSITION:
2215     case TGSI_SEMANTIC_NORMAL:
2216         return TGSI_INTERPOLATE_LINEAR;
2217     case TGSI_SEMANTIC_BCOLOR:
2218     case TGSI_SEMANTIC_COLOR:
2219         return TGSI_INTERPOLATE_COLOR;
2220     case TGSI_SEMANTIC_FOG:
2221     case TGSI_SEMANTIC_GENERIC:
2222     case TGSI_SEMANTIC_TEXCOORD:
2223     case TGSI_SEMANTIC_CLIPDIST:
2224     case TGSI_SEMANTIC_CLIPVERTEX:
2225         return TGSI_INTERPOLATE_PERSPECTIVE;
2226     case TGSI_SEMANTIC_EDGEFLAG:
2227     case TGSI_SEMANTIC_FACE:
2228     case TGSI_SEMANTIC_INSTANCEID:
2229     case TGSI_SEMANTIC_PCOORD:
2230     case TGSI_SEMANTIC_PRIMID:
2231     case TGSI_SEMANTIC_PSIZE:
2232     case TGSI_SEMANTIC_VERTEXID:
2233         return TGSI_INTERPOLATE_CONSTANT;
2234     default:
2235         assert(0);
2236         return TGSI_INTERPOLATE_CONSTANT;
2237     }
2238 }
2239 
DECL_SPECIAL(DCL)2240 DECL_SPECIAL(DCL)
2241 {
2242     struct ureg_program *ureg = tx->ureg;
2243     boolean is_input;
2244     boolean is_sampler;
2245     struct tgsi_declaration_semantic tgsi;
2246     struct sm1_semantic sem;
2247     sm1_read_semantic(tx, &sem);
2248 
2249     is_input = sem.reg.file == D3DSPR_INPUT;
2250     is_sampler =
2251         sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2252 
2253     DUMP("DCL ");
2254     sm1_dump_dst_param(&sem.reg);
2255     if (is_sampler)
2256         DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2257     else
2258     if (tx->version.major >= 3)
2259         DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2260     else
2261     if (sem.usage | sem.usage_idx)
2262         DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2263     else
2264         DUMP("\n");
2265 
2266     if (is_sampler) {
2267         const unsigned m = 1 << sem.reg.idx;
2268         ureg_DECL_sampler(ureg, sem.reg.idx);
2269         tx->info->sampler_mask |= m;
2270         tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2271             d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2272             d3dstt_to_tgsi_tex(sem.sampler_type);
2273         return D3D_OK;
2274     }
2275 
2276     sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2277     if (IS_VS) {
2278         if (is_input) {
2279             /* linkage outside of shader with vertex declaration */
2280             ureg_DECL_vs_input(ureg, sem.reg.idx);
2281             assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2282             tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2283             tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2284             /* NOTE: preserving order in case of indirect access */
2285         } else
2286         if (tx->version.major >= 3) {
2287             /* SM2 output semantic determined by file */
2288             assert(sem.reg.mask != 0);
2289             if (sem.usage == D3DDECLUSAGE_POSITIONT)
2290                 tx->info->position_t = TRUE;
2291             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2292             assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2293             tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2294                 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2295             nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2296             if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2297                 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2298                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2299                 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2300             }
2301 
2302             if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2303                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2304                 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2305             }
2306         }
2307     } else {
2308         if (is_input && tx->version.major >= 3) {
2309             unsigned interp_location = 0;
2310             /* SM3 only, SM2 input semantic determined by file */
2311             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2312             assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2313             /* PositionT and tessfactor forbidden */
2314             if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2315                 return D3DERR_INVALIDCALL;
2316 
2317             if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2318                 /* Position0 is forbidden (likely because vPos already does that) */
2319                 if (sem.usage == D3DDECLUSAGE_POSITION)
2320                     return D3DERR_INVALIDCALL;
2321                 /* Following code is for depth */
2322                 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2323                 return D3D_OK;
2324             }
2325 
2326             if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2327                 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2328                 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2329 
2330             tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
2331                 ureg, tgsi.Name, tgsi.Index,
2332                 nine_tgsi_to_interp_mode(&tgsi),
2333                 interp_location, 0, 1);
2334         } else
2335         if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2336             /* FragColor or FragDepth */
2337             assert(sem.reg.mask != 0);
2338             ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2339                                     0, 1);
2340         }
2341     }
2342     return D3D_OK;
2343 }
2344 
DECL_SPECIAL(DEF)2345 DECL_SPECIAL(DEF)
2346 {
2347     tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2348     return D3D_OK;
2349 }
2350 
DECL_SPECIAL(DEFB)2351 DECL_SPECIAL(DEFB)
2352 {
2353     tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2354     return D3D_OK;
2355 }
2356 
DECL_SPECIAL(DEFI)2357 DECL_SPECIAL(DEFI)
2358 {
2359     tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2360     return D3D_OK;
2361 }
2362 
DECL_SPECIAL(POW)2363 DECL_SPECIAL(POW)
2364 {
2365     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2366     struct ureg_src src[2] = {
2367         tx_src_param(tx, &tx->insn.src[0]),
2368         tx_src_param(tx, &tx->insn.src[1])
2369     };
2370     ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2371     return D3D_OK;
2372 }
2373 
2374 /* Tests results on Win 10:
2375  * NV (NVIDIA GeForce GT 635M)
2376  * AMD (AMD Radeon HD 7730M)
2377  * INTEL (Intel(R) HD Graphics 4000)
2378  * PS2 and PS3:
2379  * RCP and RSQ can generate inf on NV and AMD.
2380  * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2381  * NV: log not clamped
2382  * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2383  * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2384  * All devices have 0*anything = 0
2385  *
2386  * INTEL VS2 and VS3: same behaviour.
2387  * Some differences VS2 and VS3 for constants defined with inf/NaN.
2388  * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2389  * VS2 seems to clamp to zero (may be test failure).
2390  * AMD VS2: unknown, VS3: very likely behaviour of PS3
2391  * NV VS2 and VS3: very likely behaviour of PS3
2392  * For both, Inf in VS becomes NaN is PS
2393  * "Very likely" because the test was less extensive.
2394  *
2395  * Thus all clamping can be removed for shaders 2 and 3,
2396  * as long as 0*anything = 0.
2397  * Else clamps to enforce 0*anything = 0 (anything being then
2398  * neither inf or NaN, the user being unlikely to pass them
2399  * as constant).
2400  * The status for VS1 and PS1 is unknown.
2401  */
2402 
DECL_SPECIAL(RCP)2403 DECL_SPECIAL(RCP)
2404 {
2405     struct ureg_program *ureg = tx->ureg;
2406     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2407     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2408     struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2409     ureg_RCP(ureg, tmp, src);
2410     if (!tx->mul_zero_wins) {
2411         /* FLT_MAX has issues with Rayman */
2412         ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2413         ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2414     }
2415     return D3D_OK;
2416 }
2417 
DECL_SPECIAL(RSQ)2418 DECL_SPECIAL(RSQ)
2419 {
2420     struct ureg_program *ureg = tx->ureg;
2421     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2422     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2423     struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2424     ureg_RSQ(ureg, tmp, ureg_abs(src));
2425     if (!tx->mul_zero_wins)
2426         ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2427     return D3D_OK;
2428 }
2429 
DECL_SPECIAL(LOG)2430 DECL_SPECIAL(LOG)
2431 {
2432     struct ureg_program *ureg = tx->ureg;
2433     struct ureg_dst tmp = tx_scratch_scalar(tx);
2434     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2435     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2436     ureg_LG2(ureg, tmp, ureg_abs(src));
2437     if (tx->mul_zero_wins) {
2438         ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2439     } else {
2440         ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2441     }
2442     return D3D_OK;
2443 }
2444 
DECL_SPECIAL(LIT)2445 DECL_SPECIAL(LIT)
2446 {
2447     struct ureg_program *ureg = tx->ureg;
2448     struct ureg_dst tmp = tx_scratch(tx);
2449     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2450     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2451     ureg_LIT(ureg, tmp, src);
2452     /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2453      * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2454      * it 0^0 if src.w=0, which value is driver dependent. */
2455     ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2456              ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2457              ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2458     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2459     return D3D_OK;
2460 }
2461 
DECL_SPECIAL(NRM)2462 DECL_SPECIAL(NRM)
2463 {
2464     struct ureg_program *ureg = tx->ureg;
2465     struct ureg_dst tmp = tx_scratch_scalar(tx);
2466     struct ureg_src nrm = tx_src_scalar(tmp);
2467     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2468     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2469     ureg_DP3(ureg, tmp, src, src);
2470     ureg_RSQ(ureg, tmp, nrm);
2471     if (!tx->mul_zero_wins)
2472         ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2473     ureg_MUL(ureg, dst, src, nrm);
2474     return D3D_OK;
2475 }
2476 
DECL_SPECIAL(DP2ADD)2477 DECL_SPECIAL(DP2ADD)
2478 {
2479     struct ureg_dst tmp = tx_scratch_scalar(tx);
2480     struct ureg_src dp2 = tx_src_scalar(tmp);
2481     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2482     struct ureg_src src[3];
2483     int i;
2484     for (i = 0; i < 3; ++i)
2485         src[i] = tx_src_param(tx, &tx->insn.src[i]);
2486     assert_replicate_swizzle(&src[2]);
2487 
2488     ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2489     ureg_ADD(tx->ureg, dst, src[2], dp2);
2490 
2491     return D3D_OK;
2492 }
2493 
DECL_SPECIAL(TEXCOORD)2494 DECL_SPECIAL(TEXCOORD)
2495 {
2496     struct ureg_program *ureg = tx->ureg;
2497     const unsigned s = tx->insn.dst[0].idx;
2498     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2499 
2500     tx_texcoord_alloc(tx, s);
2501     ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2502     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2503 
2504     return D3D_OK;
2505 }
2506 
DECL_SPECIAL(TEXCOORD_ps14)2507 DECL_SPECIAL(TEXCOORD_ps14)
2508 {
2509     struct ureg_program *ureg = tx->ureg;
2510     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2511     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2512 
2513     assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2514 
2515     ureg_MOV(ureg, dst, src);
2516 
2517     return D3D_OK;
2518 }
2519 
DECL_SPECIAL(TEXKILL)2520 DECL_SPECIAL(TEXKILL)
2521 {
2522     struct ureg_src reg;
2523 
2524     if (tx->version.major > 1 || tx->version.minor > 3) {
2525         reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2526     } else {
2527         tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2528         reg = tx->regs.vT[tx->insn.dst[0].idx];
2529     }
2530     if (tx->version.major < 2)
2531         reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2532     ureg_KILL_IF(tx->ureg, reg);
2533 
2534     return D3D_OK;
2535 }
2536 
DECL_SPECIAL(TEXBEM)2537 DECL_SPECIAL(TEXBEM)
2538 {
2539     struct ureg_program *ureg = tx->ureg;
2540     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2541     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2542     struct ureg_dst tmp, tmp2, texcoord;
2543     struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2544     struct ureg_src bumpenvlscale, bumpenvloffset;
2545     const int m = tx->insn.dst[0].idx;
2546 
2547     assert(tx->version.major == 1);
2548 
2549     sample = ureg_DECL_sampler(ureg, m);
2550     tx->info->sampler_mask |= 1 << m;
2551 
2552     tx_texcoord_alloc(tx, m);
2553 
2554     tmp = tx_scratch(tx);
2555     tmp2 = tx_scratch(tx);
2556     texcoord = tx_scratch(tx);
2557     /*
2558      * Bump-env-matrix:
2559      * 00 is X
2560      * 01 is Y
2561      * 10 is Z
2562      * 11 is W
2563      */
2564     c8m = nine_float_constant_src(tx, 8+m);
2565     c16m2 = nine_float_constant_src(tx, 8+8+m/2);
2566 
2567     m00 = NINE_APPLY_SWIZZLE(c8m, X);
2568     m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2569     m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2570     m11 = NINE_APPLY_SWIZZLE(c8m, W);
2571 
2572     /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2573     if (m % 2 == 0) {
2574         bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2575         bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2576     } else {
2577         bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2578         bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2579     }
2580 
2581     apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2582 
2583     /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
2584     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2585              NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2586     /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2587     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2588              NINE_APPLY_SWIZZLE(src, Y),
2589              NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2590 
2591     /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2592     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2593              NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2594     /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2595     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2596              NINE_APPLY_SWIZZLE(src, Y),
2597              NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2598 
2599     /* Now the texture coordinates are in tmp.xy */
2600 
2601     if (tx->insn.opcode == D3DSIO_TEXBEM) {
2602         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2603     } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2604         /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2605         ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2606         ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2607                  bumpenvlscale, bumpenvloffset);
2608         ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2609     }
2610 
2611     tx->info->bumpenvmat_needed = 1;
2612 
2613     return D3D_OK;
2614 }
2615 
DECL_SPECIAL(TEXREG2AR)2616 DECL_SPECIAL(TEXREG2AR)
2617 {
2618     struct ureg_program *ureg = tx->ureg;
2619     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2620     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2621     struct ureg_src sample;
2622     const int m = tx->insn.dst[0].idx;
2623     ASSERTED const int n = tx->insn.src[0].idx;
2624     assert(m >= 0 && m > n);
2625 
2626     sample = ureg_DECL_sampler(ureg, m);
2627     tx->info->sampler_mask |= 1 << m;
2628     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2629 
2630     return D3D_OK;
2631 }
2632 
DECL_SPECIAL(TEXREG2GB)2633 DECL_SPECIAL(TEXREG2GB)
2634 {
2635     struct ureg_program *ureg = tx->ureg;
2636     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2637     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2638     struct ureg_src sample;
2639     const int m = tx->insn.dst[0].idx;
2640     ASSERTED const int n = tx->insn.src[0].idx;
2641     assert(m >= 0 && m > n);
2642 
2643     sample = ureg_DECL_sampler(ureg, m);
2644     tx->info->sampler_mask |= 1 << m;
2645     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2646 
2647     return D3D_OK;
2648 }
2649 
DECL_SPECIAL(TEXM3x2PAD)2650 DECL_SPECIAL(TEXM3x2PAD)
2651 {
2652     return D3D_OK; /* this is just padding */
2653 }
2654 
DECL_SPECIAL(TEXM3x2TEX)2655 DECL_SPECIAL(TEXM3x2TEX)
2656 {
2657     struct ureg_program *ureg = tx->ureg;
2658     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2659     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2660     struct ureg_src sample;
2661     const int m = tx->insn.dst[0].idx - 1;
2662     ASSERTED const int n = tx->insn.src[0].idx;
2663     assert(m >= 0 && m > n);
2664 
2665     tx_texcoord_alloc(tx, m);
2666     tx_texcoord_alloc(tx, m+1);
2667 
2668     /* performs the matrix multiplication */
2669     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2670     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2671 
2672     sample = ureg_DECL_sampler(ureg, m + 1);
2673     tx->info->sampler_mask |= 1 << (m + 1);
2674     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2675 
2676     return D3D_OK;
2677 }
2678 
DECL_SPECIAL(TEXM3x3PAD)2679 DECL_SPECIAL(TEXM3x3PAD)
2680 {
2681     return D3D_OK; /* this is just padding */
2682 }
2683 
DECL_SPECIAL(TEXM3x3SPEC)2684 DECL_SPECIAL(TEXM3x3SPEC)
2685 {
2686     struct ureg_program *ureg = tx->ureg;
2687     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2688     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2689     struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2690     struct ureg_src sample;
2691     struct ureg_dst tmp;
2692     const int m = tx->insn.dst[0].idx - 2;
2693     ASSERTED const int n = tx->insn.src[0].idx;
2694     assert(m >= 0 && m > n);
2695 
2696     tx_texcoord_alloc(tx, m);
2697     tx_texcoord_alloc(tx, m+1);
2698     tx_texcoord_alloc(tx, m+2);
2699 
2700     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2701     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2702     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2703 
2704     sample = ureg_DECL_sampler(ureg, m + 2);
2705     tx->info->sampler_mask |= 1 << (m + 2);
2706     tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2707 
2708     /* At this step, dst = N = (u', w', z').
2709      * We want dst to be the texture sampled at (u'', w'', z''), with
2710      * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2711     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2712     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2713     /* at this step tmp.x = 1/N.N */
2714     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2715     /* at this step tmp.y = N.E */
2716     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2717     /* at this step tmp.x = N.E/N.N */
2718     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2719     ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2720     /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2721     ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2722     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2723 
2724     return D3D_OK;
2725 }
2726 
DECL_SPECIAL(TEXREG2RGB)2727 DECL_SPECIAL(TEXREG2RGB)
2728 {
2729     struct ureg_program *ureg = tx->ureg;
2730     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2731     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2732     struct ureg_src sample;
2733     const int m = tx->insn.dst[0].idx;
2734     ASSERTED const int n = tx->insn.src[0].idx;
2735     assert(m >= 0 && m > n);
2736 
2737     sample = ureg_DECL_sampler(ureg, m);
2738     tx->info->sampler_mask |= 1 << m;
2739     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2740 
2741     return D3D_OK;
2742 }
2743 
DECL_SPECIAL(TEXDP3TEX)2744 DECL_SPECIAL(TEXDP3TEX)
2745 {
2746     struct ureg_program *ureg = tx->ureg;
2747     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2748     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2749     struct ureg_dst tmp;
2750     struct ureg_src sample;
2751     const int m = tx->insn.dst[0].idx;
2752     ASSERTED const int n = tx->insn.src[0].idx;
2753     assert(m >= 0 && m > n);
2754 
2755     tx_texcoord_alloc(tx, m);
2756 
2757     tmp = tx_scratch(tx);
2758     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2759     ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2760 
2761     sample = ureg_DECL_sampler(ureg, m);
2762     tx->info->sampler_mask |= 1 << m;
2763     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2764 
2765     return D3D_OK;
2766 }
2767 
DECL_SPECIAL(TEXM3x2DEPTH)2768 DECL_SPECIAL(TEXM3x2DEPTH)
2769 {
2770     struct ureg_program *ureg = tx->ureg;
2771     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2772     struct ureg_dst tmp;
2773     const int m = tx->insn.dst[0].idx - 1;
2774     ASSERTED const int n = tx->insn.src[0].idx;
2775     assert(m >= 0 && m > n);
2776 
2777     tx_texcoord_alloc(tx, m);
2778     tx_texcoord_alloc(tx, m+1);
2779 
2780     tmp = tx_scratch(tx);
2781 
2782     /* performs the matrix multiplication */
2783     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2784     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2785 
2786     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2787     /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2788     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2789     /* res = 'w' == 0 ? 1.0 : z/w */
2790     ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2791              ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2792     /* replace the depth for depth testing with the result */
2793     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2794                                               TGSI_WRITEMASK_Z, 0, 1);
2795     ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2796     /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2797     return D3D_OK;
2798 }
2799 
DECL_SPECIAL(TEXDP3)2800 DECL_SPECIAL(TEXDP3)
2801 {
2802     struct ureg_program *ureg = tx->ureg;
2803     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2804     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2805     const int m = tx->insn.dst[0].idx;
2806     ASSERTED const int n = tx->insn.src[0].idx;
2807     assert(m >= 0 && m > n);
2808 
2809     tx_texcoord_alloc(tx, m);
2810 
2811     ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2812 
2813     return D3D_OK;
2814 }
2815 
DECL_SPECIAL(TEXM3x3)2816 DECL_SPECIAL(TEXM3x3)
2817 {
2818     struct ureg_program *ureg = tx->ureg;
2819     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2820     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2821     struct ureg_src sample;
2822     struct ureg_dst E, tmp;
2823     const int m = tx->insn.dst[0].idx - 2;
2824     ASSERTED const int n = tx->insn.src[0].idx;
2825     assert(m >= 0 && m > n);
2826 
2827     tx_texcoord_alloc(tx, m);
2828     tx_texcoord_alloc(tx, m+1);
2829     tx_texcoord_alloc(tx, m+2);
2830 
2831     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2832     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2833     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2834 
2835     switch (tx->insn.opcode) {
2836     case D3DSIO_TEXM3x3:
2837         ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2838         break;
2839     case D3DSIO_TEXM3x3TEX:
2840         sample = ureg_DECL_sampler(ureg, m + 2);
2841         tx->info->sampler_mask |= 1 << (m + 2);
2842         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2843         break;
2844     case D3DSIO_TEXM3x3VSPEC:
2845         sample = ureg_DECL_sampler(ureg, m + 2);
2846         tx->info->sampler_mask |= 1 << (m + 2);
2847         E = tx_scratch(tx);
2848         tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2849         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2850         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2851         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2852         /* At this step, dst = N = (u', w', z').
2853          * We want dst to be the texture sampled at (u'', w'', z''), with
2854          * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2855         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2856         ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2857         /* at this step tmp.x = 1/N.N */
2858         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2859         /* at this step tmp.y = N.E */
2860         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2861         /* at this step tmp.x = N.E/N.N */
2862         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2863         ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2864         /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2865         ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2866         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2867         break;
2868     default:
2869         return D3DERR_INVALIDCALL;
2870     }
2871     return D3D_OK;
2872 }
2873 
DECL_SPECIAL(TEXDEPTH)2874 DECL_SPECIAL(TEXDEPTH)
2875 {
2876     struct ureg_program *ureg = tx->ureg;
2877     struct ureg_dst r5;
2878     struct ureg_src r5r, r5g;
2879 
2880     assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2881 
2882     /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2883      * r5 won't be used afterward, thus we can use r5.ba */
2884     r5 = tx->regs.r[5];
2885     r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2886     r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2887 
2888     ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2889     ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2890     /* r5.r = r/g */
2891     ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2892              r5r, ureg_imm1f(ureg, 1.0f));
2893     /* replace the depth for depth testing with the result */
2894     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2895                                               TGSI_WRITEMASK_Z, 0, 1);
2896     ureg_MOV(ureg, tx->regs.oDepth, r5r);
2897 
2898     return D3D_OK;
2899 }
2900 
DECL_SPECIAL(BEM)2901 DECL_SPECIAL(BEM)
2902 {
2903     struct ureg_program *ureg = tx->ureg;
2904     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2905     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2906     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2907     struct ureg_src m00, m01, m10, m11, c8m;
2908     const int m = tx->insn.dst[0].idx;
2909     struct ureg_dst tmp = tx_scratch(tx);
2910     /*
2911      * Bump-env-matrix:
2912      * 00 is X
2913      * 01 is Y
2914      * 10 is Z
2915      * 11 is W
2916      */
2917     c8m = nine_float_constant_src(tx, 8+m);
2918     m00 = NINE_APPLY_SWIZZLE(c8m, X);
2919     m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2920     m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2921     m11 = NINE_APPLY_SWIZZLE(c8m, W);
2922     /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
2923     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2924              NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2925     /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2926     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2927              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2928 
2929     /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2930     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2931              NINE_APPLY_SWIZZLE(src1, X), src0);
2932     /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2933     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2934              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2935     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2936 
2937     tx->info->bumpenvmat_needed = 1;
2938 
2939     return D3D_OK;
2940 }
2941 
DECL_SPECIAL(TEXLD)2942 DECL_SPECIAL(TEXLD)
2943 {
2944     struct ureg_program *ureg = tx->ureg;
2945     unsigned target;
2946     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2947     struct ureg_src src[2] = {
2948         tx_src_param(tx, &tx->insn.src[0]),
2949         tx_src_param(tx, &tx->insn.src[1])
2950     };
2951     assert(tx->insn.src[1].idx >= 0 &&
2952            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2953     target = tx->sampler_targets[tx->insn.src[1].idx];
2954 
2955     if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
2956         return D3D_OK;
2957 
2958     switch (tx->insn.flags) {
2959     case 0:
2960         ureg_TEX(ureg, dst, target, src[0], src[1]);
2961         break;
2962     case NINED3DSI_TEXLD_PROJECT:
2963         ureg_TXP(ureg, dst, target, src[0], src[1]);
2964         break;
2965     case NINED3DSI_TEXLD_BIAS:
2966         ureg_TXB(ureg, dst, target, src[0], src[1]);
2967         break;
2968     default:
2969         assert(0);
2970         return D3DERR_INVALIDCALL;
2971     }
2972     return D3D_OK;
2973 }
2974 
DECL_SPECIAL(TEXLD_14)2975 DECL_SPECIAL(TEXLD_14)
2976 {
2977     struct ureg_program *ureg = tx->ureg;
2978     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2979     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2980     const unsigned s = tx->insn.dst[0].idx;
2981     const unsigned t = ps1x_sampler_type(tx->info, s);
2982 
2983     tx->info->sampler_mask |= 1 << s;
2984     ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2985 
2986     return D3D_OK;
2987 }
2988 
DECL_SPECIAL(TEX)2989 DECL_SPECIAL(TEX)
2990 {
2991     struct ureg_program *ureg = tx->ureg;
2992     const unsigned s = tx->insn.dst[0].idx;
2993     const unsigned t = ps1x_sampler_type(tx->info, s);
2994     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2995     struct ureg_src src[2];
2996 
2997     tx_texcoord_alloc(tx, s);
2998 
2999     src[0] = tx->regs.vT[s];
3000     src[1] = ureg_DECL_sampler(ureg, s);
3001     tx->info->sampler_mask |= 1 << s;
3002 
3003     TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
3004 
3005     return D3D_OK;
3006 }
3007 
DECL_SPECIAL(TEXLDD)3008 DECL_SPECIAL(TEXLDD)
3009 {
3010     unsigned target;
3011     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3012     struct ureg_src src[4] = {
3013         tx_src_param(tx, &tx->insn.src[0]),
3014         tx_src_param(tx, &tx->insn.src[1]),
3015         tx_src_param(tx, &tx->insn.src[2]),
3016         tx_src_param(tx, &tx->insn.src[3])
3017     };
3018     assert(tx->insn.src[1].idx >= 0 &&
3019            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3020     target = tx->sampler_targets[tx->insn.src[1].idx];
3021 
3022     if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3023         return D3D_OK;
3024 
3025     ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
3026     return D3D_OK;
3027 }
3028 
DECL_SPECIAL(TEXLDL)3029 DECL_SPECIAL(TEXLDL)
3030 {
3031     unsigned target;
3032     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3033     struct ureg_src src[2] = {
3034        tx_src_param(tx, &tx->insn.src[0]),
3035        tx_src_param(tx, &tx->insn.src[1])
3036     };
3037     assert(tx->insn.src[1].idx >= 0 &&
3038            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3039     target = tx->sampler_targets[tx->insn.src[1].idx];
3040 
3041     if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3042         return D3D_OK;
3043 
3044     ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3045     return D3D_OK;
3046 }
3047 
DECL_SPECIAL(SETP)3048 DECL_SPECIAL(SETP)
3049 {
3050     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3051     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3052     struct ureg_src src[2] = {
3053        tx_src_param(tx, &tx->insn.src[0]),
3054        tx_src_param(tx, &tx->insn.src[1])
3055     };
3056     ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3057     return D3D_OK;
3058 }
3059 
DECL_SPECIAL(BREAKP)3060 DECL_SPECIAL(BREAKP)
3061 {
3062     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3063     ureg_IF(tx->ureg, src, tx_cond(tx));
3064     ureg_BRK(tx->ureg);
3065     tx_endcond(tx);
3066     ureg_ENDIF(tx->ureg);
3067     return D3D_OK;
3068 }
3069 
DECL_SPECIAL(PHASE)3070 DECL_SPECIAL(PHASE)
3071 {
3072     return D3D_OK; /* we don't care about phase */
3073 }
3074 
DECL_SPECIAL(COMMENT)3075 DECL_SPECIAL(COMMENT)
3076 {
3077     return D3D_OK; /* nothing to do */
3078 }
3079 
3080 
3081 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3082     { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3083 
3084 static const struct sm1_op_info inst_table[] =
3085 {
3086     _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3087     _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3088     _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3089     _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3090     _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3091     _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3092     _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3093     _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3094     _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3095     _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3096     _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3097     _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3098     _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3099     _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3100     _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3101     _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3102     _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3103     _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3104     _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3105     _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3106 
3107     _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3108     _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3109     _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3110     _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3111     _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3112 
3113     _OPI(CALL,    CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3114     _OPI(CALLNZ,  CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3115     _OPI(LOOP,    BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3116     _OPI(RET,     RET,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3117     _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3118     _OPI(LABEL,   NOP,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3119 
3120     _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3121 
3122     _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3123     _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3124     _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3125     _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3126     _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3127 
3128     _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3129     _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3130 
3131     /* More flow control */
3132     _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3133     _OPI(ENDREP, NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3134     _OPI(IF,     IF,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3135     _OPI(IFC,    IF,     V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3136     _OPI(ELSE,   ELSE,   V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3137     _OPI(ENDIF,  ENDIF,  V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3138     _OPI(BREAK,  BRK,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3139     _OPI(BREAKC, NOP,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3140     /* we don't write to the address register, but a normal register (copied
3141      * when needed to the address register), thus we don't use ARR */
3142     _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3143 
3144     _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3145     _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3146 
3147     _OPI(TEXCOORD,     NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3148     _OPI(TEXCOORD,     MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3149     _OPI(TEXKILL,      KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3150     _OPI(TEX,          TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3151     _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3152     _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3153     _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3154     _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3155     _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3156     _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3157     _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3158     _OPI(TEXM3x2TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3159     _OPI(TEXM3x3PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3160     _OPI(TEXM3x3TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3161     _OPI(TEXM3x3SPEC,  TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3162     _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3163 
3164     _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3165     _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3166     _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3167     _OPI(CND,  NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3168 
3169     _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3170 
3171     /* More tex stuff */
3172     _OPI(TEXREG2RGB,   TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3173     _OPI(TEXDP3TEX,    TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3174     _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3175     _OPI(TEXDP3,       TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3176     _OPI(TEXM3x3,      TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3177     _OPI(TEXDEPTH,     TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3178 
3179     /* Misc */
3180     _OPI(CMP,    CMP,  V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3181     _OPI(BEM,    NOP,  V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3182     _OPI(DP2ADD, NOP,  V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3183     _OPI(DSX,    DDX,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3184     _OPI(DSY,    DDY,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3185     _OPI(TEXLDD, TXD,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3186     _OPI(SETP,   NOP,  V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3187     _OPI(TEXLDL, TXL,  V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3188     _OPI(BREAKP, BRK,  V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3189 };
3190 
3191 static const struct sm1_op_info inst_phase =
3192     _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3193 
3194 static const struct sm1_op_info inst_comment =
3195     _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3196 
3197 static void
create_op_info_map(struct shader_translator * tx)3198 create_op_info_map(struct shader_translator *tx)
3199 {
3200     const unsigned version = (tx->version.major << 8) | tx->version.minor;
3201     unsigned i;
3202 
3203     for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3204         tx->op_info_map[i] = -1;
3205 
3206     if (tx->processor == PIPE_SHADER_VERTEX) {
3207         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3208             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3209             if (inst_table[i].vert_version.min <= version &&
3210                 inst_table[i].vert_version.max >= version)
3211                 tx->op_info_map[inst_table[i].sio] = i;
3212         }
3213     } else {
3214         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3215             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3216             if (inst_table[i].frag_version.min <= version &&
3217                 inst_table[i].frag_version.max >= version)
3218                 tx->op_info_map[inst_table[i].sio] = i;
3219         }
3220     }
3221 }
3222 
3223 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3224 NineTranslateInstruction_Generic(struct shader_translator *tx)
3225 {
3226     struct ureg_dst dst[1];
3227     struct ureg_src src[4];
3228     unsigned i;
3229 
3230     for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3231         dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3232     for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3233         src[i] = tx_src_param(tx, &tx->insn.src[i]);
3234 
3235     ureg_insn(tx->ureg, tx->insn.info->opcode,
3236               dst, tx->insn.ndst,
3237               src, tx->insn.nsrc, 0);
3238     return D3D_OK;
3239 }
3240 
3241 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3242 TOKEN_PEEK(struct shader_translator *tx)
3243 {
3244     return *(tx->parse);
3245 }
3246 
3247 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3248 TOKEN_NEXT(struct shader_translator *tx)
3249 {
3250     return *(tx->parse)++;
3251 }
3252 
3253 static inline void
TOKEN_JUMP(struct shader_translator * tx)3254 TOKEN_JUMP(struct shader_translator *tx)
3255 {
3256     if (tx->parse_next && tx->parse != tx->parse_next) {
3257         WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3258         tx->parse = tx->parse_next;
3259     }
3260 }
3261 
3262 static inline boolean
sm1_parse_eof(struct shader_translator * tx)3263 sm1_parse_eof(struct shader_translator *tx)
3264 {
3265     return TOKEN_PEEK(tx) == NINED3DSP_END;
3266 }
3267 
3268 static void
sm1_read_version(struct shader_translator * tx)3269 sm1_read_version(struct shader_translator *tx)
3270 {
3271     const DWORD tok = TOKEN_NEXT(tx);
3272 
3273     tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3274     tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3275 
3276     switch (tok >> 16) {
3277     case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3278     case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3279     default:
3280        DBG("Invalid shader type: %x\n", tok);
3281        tx->processor = ~0;
3282        break;
3283     }
3284 }
3285 
3286 /* This is just to check if we parsed the instruction properly. */
3287 static void
sm1_parse_get_skip(struct shader_translator * tx)3288 sm1_parse_get_skip(struct shader_translator *tx)
3289 {
3290     const DWORD tok = TOKEN_PEEK(tx);
3291 
3292     if (tx->version.major >= 2) {
3293         tx->parse_next = tx->parse + 1 /* this */ +
3294             ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3295     } else {
3296         tx->parse_next = NULL; /* TODO: determine from param count */
3297     }
3298 }
3299 
3300 static void
sm1_print_comment(const char * comment,UINT size)3301 sm1_print_comment(const char *comment, UINT size)
3302 {
3303     if (!size)
3304         return;
3305     /* TODO */
3306 }
3307 
3308 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3309 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3310 {
3311     DWORD tok = TOKEN_PEEK(tx);
3312 
3313     while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3314     {
3315         const char *comment = "";
3316         UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3317         tx->parse += size + 1;
3318 
3319         if (print)
3320             sm1_print_comment(comment, size);
3321 
3322         tok = TOKEN_PEEK(tx);
3323     }
3324 }
3325 
3326 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3327 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3328 {
3329     *reg = TOKEN_NEXT(tx);
3330 
3331     if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3332     {
3333         if (tx->version.major < 2)
3334             *rel = (1 << 31) |
3335                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3336                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT)  & D3DSP_REGTYPE_MASK) |
3337                 D3DSP_NOSWIZZLE;
3338         else
3339             *rel = TOKEN_NEXT(tx);
3340     }
3341 }
3342 
3343 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3344 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3345 {
3346     int8_t shift;
3347     dst->file =
3348         (tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT |
3349         (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3350     dst->type = TGSI_RETURN_TYPE_FLOAT;
3351     dst->idx = tok & D3DSP_REGNUM_MASK;
3352     dst->rel = NULL;
3353     dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3354     dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3355     shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3356     dst->shift = (shift & 0x7) - (shift & 0x8);
3357 }
3358 
3359 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3360 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3361 {
3362     src->file =
3363         ((tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT) |
3364         ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3365     src->type = TGSI_RETURN_TYPE_FLOAT;
3366     src->idx = tok & D3DSP_REGNUM_MASK;
3367     src->rel = NULL;
3368     src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3369     src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3370 
3371     switch (src->file) {
3372     case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3373     case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3374     case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3375     default:
3376         break;
3377     }
3378 }
3379 
3380 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3381 sm1_parse_immediate(struct shader_translator *tx,
3382                     struct sm1_src_param *imm)
3383 {
3384     imm->file = NINED3DSPR_IMMEDIATE;
3385     imm->idx = INT_MIN;
3386     imm->rel = NULL;
3387     imm->swizzle = NINED3DSP_NOSWIZZLE;
3388     imm->mod = 0;
3389     switch (tx->insn.opcode) {
3390     case D3DSIO_DEF:
3391         imm->type = NINED3DSPTYPE_FLOAT4;
3392         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3393         tx->parse += 4;
3394         break;
3395     case D3DSIO_DEFI:
3396         imm->type = NINED3DSPTYPE_INT4;
3397         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3398         tx->parse += 4;
3399         break;
3400     case D3DSIO_DEFB:
3401         imm->type = NINED3DSPTYPE_BOOL;
3402         memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3403         tx->parse += 1;
3404         break;
3405     default:
3406        assert(0);
3407        break;
3408     }
3409 }
3410 
3411 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3412 sm1_read_dst_param(struct shader_translator *tx,
3413                    struct sm1_dst_param *dst,
3414                    struct sm1_src_param *rel)
3415 {
3416     DWORD tok_dst, tok_rel = 0;
3417 
3418     sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3419     sm1_parse_dst_param(dst, tok_dst);
3420     if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3421         sm1_parse_src_param(rel, tok_rel);
3422         dst->rel = rel;
3423     }
3424 }
3425 
3426 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3427 sm1_read_src_param(struct shader_translator *tx,
3428                    struct sm1_src_param *src,
3429                    struct sm1_src_param *rel)
3430 {
3431     DWORD tok_src, tok_rel = 0;
3432 
3433     sm1_parse_get_param(tx, &tok_src, &tok_rel);
3434     sm1_parse_src_param(src, tok_src);
3435     if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3436         assert(rel);
3437         sm1_parse_src_param(rel, tok_rel);
3438         src->rel = rel;
3439     }
3440 }
3441 
3442 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3443 sm1_read_semantic(struct shader_translator *tx,
3444                   struct sm1_semantic *sem)
3445 {
3446     const DWORD tok_usg = TOKEN_NEXT(tx);
3447     const DWORD tok_dst = TOKEN_NEXT(tx);
3448 
3449     sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3450     sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3451     sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3452 
3453     sm1_parse_dst_param(&sem->reg, tok_dst);
3454 }
3455 
3456 static void
sm1_parse_instruction(struct shader_translator * tx)3457 sm1_parse_instruction(struct shader_translator *tx)
3458 {
3459     struct sm1_instruction *insn = &tx->insn;
3460     HRESULT hr;
3461     DWORD tok;
3462     const struct sm1_op_info *info = NULL;
3463     unsigned i;
3464 
3465     sm1_parse_comments(tx, TRUE);
3466     sm1_parse_get_skip(tx);
3467 
3468     tok = TOKEN_NEXT(tx);
3469 
3470     insn->opcode = tok & D3DSI_OPCODE_MASK;
3471     insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3472     insn->coissue = !!(tok & D3DSI_COISSUE);
3473     insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3474 
3475     if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3476         int k = tx->op_info_map[insn->opcode];
3477         if (k >= 0) {
3478             assert(k < ARRAY_SIZE(inst_table));
3479             info = &inst_table[k];
3480         }
3481     } else {
3482        if (insn->opcode == D3DSIO_PHASE)   info = &inst_phase;
3483        if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3484     }
3485     if (!info) {
3486        DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3487        TOKEN_JUMP(tx);
3488        return;
3489     }
3490     insn->info = info;
3491     insn->ndst = info->ndst;
3492     insn->nsrc = info->nsrc;
3493 
3494     /* check version */
3495     {
3496         unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3497         unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3498         unsigned ver = (tx->version.major << 8) | tx->version.minor;
3499         if (ver < min || ver > max) {
3500             DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3501                 min, ver, max);
3502             return;
3503         }
3504     }
3505 
3506     for (i = 0; i < insn->ndst; ++i)
3507         sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3508     if (insn->predicated)
3509         sm1_read_src_param(tx, &insn->pred, NULL);
3510     for (i = 0; i < insn->nsrc; ++i)
3511         sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3512 
3513     /* parse here so we can dump them before processing */
3514     if (insn->opcode == D3DSIO_DEF ||
3515         insn->opcode == D3DSIO_DEFI ||
3516         insn->opcode == D3DSIO_DEFB)
3517         sm1_parse_immediate(tx, &tx->insn.src[0]);
3518 
3519     sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3520     sm1_instruction_check(insn);
3521 
3522     if (insn->predicated) {
3523         tx->predicated_activated = true;
3524         if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3525             tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3526             tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3527         }
3528     }
3529 
3530     if (info->handler)
3531         hr = info->handler(tx);
3532     else
3533         hr = NineTranslateInstruction_Generic(tx);
3534     tx_apply_dst0_modifiers(tx);
3535 
3536     if (insn->predicated) {
3537         tx->predicated_activated = false;
3538         /* TODO: predicate might be allowed on outputs,
3539          * which cannot be src. Workaround it. */
3540         ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3541                  ureg_negate(tx_src_param(tx, &insn->pred)),
3542                  ureg_src(tx->regs.predicate_tmp),
3543                  ureg_src(tx->regs.predicate_dst));
3544     }
3545 
3546     if (hr != D3D_OK)
3547         tx->failure = TRUE;
3548     tx->num_scratch = 0; /* reset */
3549 
3550     TOKEN_JUMP(tx);
3551 }
3552 
3553 #define GET_CAP(n) screen->get_param( \
3554       screen, PIPE_CAP_##n)
3555 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3556       screen, info->type, PIPE_SHADER_CAP_##n)
3557 
3558 static HRESULT
tx_ctor(struct shader_translator * tx,struct pipe_screen * screen,struct nine_shader_info * info)3559 tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3560 {
3561     unsigned i;
3562 
3563     memset(tx, 0, sizeof(*tx));
3564 
3565     tx->info = info;
3566 
3567     tx->byte_code = info->byte_code;
3568     tx->parse = info->byte_code;
3569 
3570     for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3571         info->input_map[i] = NINE_DECLUSAGE_NONE;
3572     info->num_inputs = 0;
3573 
3574     info->position_t = FALSE;
3575     info->point_size = FALSE;
3576 
3577     memset(tx->slots_used, 0, sizeof(tx->slots_used));
3578     memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3579     memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3580 
3581     tx->info->const_float_slots = 0;
3582     tx->info->const_int_slots = 0;
3583     tx->info->const_bool_slots = 0;
3584 
3585     info->sampler_mask = 0x0;
3586     info->rt_mask = 0x0;
3587 
3588     info->lconstf.data = NULL;
3589     info->lconstf.ranges = NULL;
3590 
3591     info->bumpenvmat_needed = 0;
3592 
3593     for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3594         tx->regs.rL[i] = ureg_dst_undef();
3595     }
3596     tx->regs.address = ureg_dst_undef();
3597     tx->regs.a0 = ureg_dst_undef();
3598     tx->regs.p = ureg_dst_undef();
3599     tx->regs.oDepth = ureg_dst_undef();
3600     tx->regs.vPos = ureg_src_undef();
3601     tx->regs.vFace = ureg_src_undef();
3602     for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3603         tx->regs.o[i] = ureg_dst_undef();
3604     for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3605         tx->regs.oCol[i] = ureg_dst_undef();
3606     for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3607         tx->regs.vC[i] = ureg_src_undef();
3608     for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3609         tx->regs.vT[i] = ureg_src_undef();
3610 
3611     sm1_read_version(tx);
3612 
3613     info->version = (tx->version.major << 4) | tx->version.minor;
3614 
3615     tx->num_outputs = 0;
3616 
3617     create_op_info_map(tx);
3618 
3619     tx->ureg = ureg_create(info->type);
3620     if (!tx->ureg) {
3621         return E_OUTOFMEMORY;
3622     }
3623 
3624     tx->native_integers = GET_SHADER_CAP(INTEGERS);
3625     tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3626     tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3627     tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3628     tx->texcoord_sn = tx->want_texcoord ?
3629         TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3630     tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3631     tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3632 
3633     if (IS_VS) {
3634         tx->num_constf_allowed = NINE_MAX_CONST_F;
3635     } else if (tx->version.major < 2) {/* IS_PS v1 */
3636         tx->num_constf_allowed = 8;
3637     } else if (tx->version.major == 2) {/* IS_PS v2 */
3638         tx->num_constf_allowed = 32;
3639     } else {/* IS_PS v3 */
3640         tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3641     }
3642 
3643     if (tx->version.major < 2) {
3644         tx->num_consti_allowed = 0;
3645         tx->num_constb_allowed = 0;
3646     } else {
3647         tx->num_consti_allowed = NINE_MAX_CONST_I;
3648         tx->num_constb_allowed = NINE_MAX_CONST_B;
3649     }
3650 
3651     if (info->swvp_on) {
3652         /* TODO: The values tx->version.major == 1 */
3653         tx->num_constf_allowed = 8192;
3654         tx->num_consti_allowed = 2048;
3655         tx->num_constb_allowed = 2048;
3656     }
3657 
3658     /* VS must always write position. Declare it here to make it the 1st output.
3659      * (Some drivers like nv50 are buggy and rely on that.)
3660      */
3661     if (IS_VS) {
3662         tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3663     } else {
3664         ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3665         if (!tx->shift_wpos)
3666             ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3667     }
3668 
3669     tx->mul_zero_wins = GET_CAP(TGSI_MUL_ZERO_WINS);
3670     if (tx->mul_zero_wins)
3671        ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
3672 
3673     /* Add additional definition of constants */
3674     if (info->add_constants_defs.c_combination) {
3675         unsigned i;
3676 
3677         assert(info->add_constants_defs.int_const_added);
3678         assert(info->add_constants_defs.bool_const_added);
3679         /* We only add constants that are used by the shader
3680          * and that are not defined in the shader */
3681         for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3682             if ((*info->add_constants_defs.int_const_added)[i]) {
3683                 DBG("Defining const i%i : { %i %i %i %i }\n", i,
3684                     info->add_constants_defs.c_combination->const_i[i][0],
3685                     info->add_constants_defs.c_combination->const_i[i][1],
3686                     info->add_constants_defs.c_combination->const_i[i][2],
3687                     info->add_constants_defs.c_combination->const_i[i][3]);
3688                 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3689             }
3690         }
3691         for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3692             if ((*info->add_constants_defs.bool_const_added)[i]) {
3693                 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3694                 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3695             }
3696         }
3697     }
3698     return D3D_OK;
3699 }
3700 
3701 static void
tx_dtor(struct shader_translator * tx)3702 tx_dtor(struct shader_translator *tx)
3703 {
3704     if (tx->slot_map)
3705         FREE(tx->slot_map);
3706     if (tx->num_inst_labels)
3707         FREE(tx->inst_labels);
3708     FREE(tx->lconstf);
3709     FREE(tx->regs.r);
3710     FREE(tx);
3711 }
3712 
3713 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3714  * CONST[1].xyz = x+width/2, y+height/2, zmin */
3715 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3716 shader_add_vs_viewport_transform(struct shader_translator *tx)
3717 {
3718     struct ureg_program *ureg = tx->ureg;
3719     struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3720     struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3721     /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3722 
3723     c0 = ureg_src_dimension(c0, 4);
3724     c1 = ureg_src_dimension(c1, 4);
3725     /* TODO: find out when we need to apply the viewport transformation or not.
3726      * Likely will be XYZ vs XYZRHW in vdecl_out
3727      * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3728      * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3729      */
3730     ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3731 }
3732 
3733 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_src src_col)3734 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3735 {
3736     struct ureg_program *ureg = tx->ureg;
3737     struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3738     struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3739     struct ureg_src fog_vs, fog_color;
3740     struct ureg_dst fog_factor, depth;
3741 
3742     if (!tx->info->fog_enable) {
3743         ureg_MOV(ureg, oCol0, src_col);
3744         return;
3745     }
3746 
3747     if (tx->info->fog_mode != D3DFOG_NONE) {
3748         depth = tx_scratch_scalar(tx);
3749         /* Depth used for fog is perspective interpolated */
3750         ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3751         ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3752     }
3753 
3754     fog_color = nine_float_constant_src(tx, 32);
3755     fog_params = nine_float_constant_src(tx, 33);
3756     fog_factor = tx_scratch_scalar(tx);
3757 
3758     if (tx->info->fog_mode == D3DFOG_LINEAR) {
3759         fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3760         fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3761         ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3762         ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3763     } else if (tx->info->fog_mode == D3DFOG_EXP) {
3764         fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3765         ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3766         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3767         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3768     } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3769         fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3770         ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3771         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3772         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3773         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3774     } else {
3775         fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
3776                                             TGSI_INTERPOLATE_PERSPECTIVE),
3777                                             TGSI_SWIZZLE_X);
3778         ureg_MOV(ureg, fog_factor, fog_vs);
3779     }
3780 
3781     ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3782              tx_src_scalar(fog_factor), src_col, fog_color);
3783     ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3784 }
3785 
parse_shader(struct shader_translator * tx)3786 static void parse_shader(struct shader_translator *tx)
3787 {
3788     struct nine_shader_info *info = tx->info;
3789 
3790     while (!sm1_parse_eof(tx) && !tx->failure)
3791         sm1_parse_instruction(tx);
3792     tx->parse++; /* for byte_size */
3793 
3794     if (tx->failure)
3795         return;
3796 
3797     if (IS_PS && tx->version.major < 3) {
3798         if (tx->version.major < 2) {
3799             assert(tx->num_temp); /* there must be color output */
3800             info->rt_mask |= 0x1;
3801             shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3802         } else {
3803             shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3804         }
3805     }
3806 
3807     if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3808         tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
3809         ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3810     }
3811 
3812     if (info->position_t)
3813         ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3814 
3815     if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3816         struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3817         ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3818         ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3819         info->point_size = TRUE;
3820     }
3821 
3822     if (info->process_vertices)
3823         shader_add_vs_viewport_transform(tx);
3824 
3825     ureg_END(tx->ureg);
3826 }
3827 
3828 #define NINE_SHADER_DEBUG_OPTION_NIR_VS           (1 << 0)
3829 #define NINE_SHADER_DEBUG_OPTION_NIR_PS           (1 << 1)
3830 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS        (1 << 2)
3831 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS        (1 << 3)
3832 #define NINE_SHADER_DEBUG_OPTION_DUMP_NIR         (1 << 4)
3833 #define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI        (1 << 5)
3834 
3835 static const struct debug_named_value nine_shader_debug_options[] = {
3836     { "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS, "Use NIR for vertex shaders even if the driver doesn't prefer it." },
3837     { "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS, "Use NIR for pixel shaders even if the driver doesn't prefer it." },
3838     { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3839     { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3840     { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3841     { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
3842     DEBUG_NAMED_VALUE_END /* must be last */
3843 };
3844 
3845 static inline boolean
nine_shader_get_debug_flag(uint64_t flag)3846 nine_shader_get_debug_flag(uint64_t flag)
3847 {
3848     static uint64_t flags = 0;
3849     static boolean first_run = TRUE;
3850 
3851     if (unlikely(first_run)) {
3852         first_run = FALSE;
3853         flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3854 
3855         // Check old TGSI dump envvar too
3856         if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3857             flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3858         }
3859     }
3860 
3861     return !!(flags & flag);
3862 }
3863 
3864 static void
nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state * state,const struct tgsi_token * tgsi_tokens,struct pipe_screen * screen)3865 nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
3866                                      struct pipe_screen *screen)
3867 {
3868     struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL);
3869 
3870     if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
3871         nir_print_shader(nir, stdout);
3872     }
3873 
3874     state->type = PIPE_SHADER_IR_NIR;
3875     state->tokens = NULL;
3876     state->ir.nir = nir;
3877     memset(&state->stream_output, 0, sizeof(state->stream_output));
3878 }
3879 
3880 static void *
nine_ureg_create_shader(struct ureg_program * ureg,struct pipe_context * pipe,const struct pipe_stream_output_info * so)3881 nine_ureg_create_shader(struct ureg_program                  *ureg,
3882                         struct pipe_context                  *pipe,
3883                         const struct pipe_stream_output_info   *so)
3884 {
3885     struct pipe_shader_state state;
3886     const struct tgsi_token *tgsi_tokens;
3887     struct pipe_screen *screen = pipe->screen;
3888 
3889     tgsi_tokens = ureg_finalize(ureg);
3890     if (!tgsi_tokens)
3891         return NULL;
3892 
3893     assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
3894     enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
3895 
3896     int preferred_ir = screen->get_shader_param(screen, shader_type, PIPE_SHADER_CAP_PREFERRED_IR);
3897     bool prefer_nir = (preferred_ir == PIPE_SHADER_IR_NIR);
3898     bool use_nir = prefer_nir ||
3899         ((shader_type == PIPE_SHADER_VERTEX) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS)) ||
3900         ((shader_type == PIPE_SHADER_FRAGMENT) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS));
3901 
3902     /* Allow user to override preferred IR, this is very useful for debugging */
3903     if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
3904         use_nir = false;
3905     if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
3906         use_nir = false;
3907 
3908     DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n",
3909          shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
3910          prefer_nir ? "NIR" : "TGSI",
3911          use_nir ? "NIR" : "TGSI");
3912 
3913     if (use_nir) {
3914         nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
3915     } else {
3916         pipe_shader_state_from_tgsi(&state, tgsi_tokens);
3917     }
3918 
3919     assert(state.tokens || state.ir.nir);
3920 
3921     if (so)
3922         state.stream_output = *so;
3923 
3924     switch (shader_type) {
3925     case PIPE_SHADER_VERTEX:
3926         return pipe->create_vs_state(pipe, &state);
3927     case PIPE_SHADER_FRAGMENT:
3928         return pipe->create_fs_state(pipe, &state);
3929     default:
3930         unreachable("unsupported shader type");
3931     }
3932 }
3933 
3934 
3935 void *
nine_create_shader_with_so_and_destroy(struct ureg_program * p,struct pipe_context * pipe,const struct pipe_stream_output_info * so)3936 nine_create_shader_with_so_and_destroy(struct ureg_program                   *p,
3937                                        struct pipe_context                *pipe,
3938                                        const struct pipe_stream_output_info *so)
3939 {
3940     void *result = nine_ureg_create_shader(p, pipe, so);
3941     ureg_destroy(p);
3942     return result;
3943 }
3944 
3945 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)3946 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3947 {
3948     struct shader_translator *tx;
3949     HRESULT hr = D3D_OK;
3950     const unsigned processor = info->type;
3951     struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3952     unsigned *const_ranges = NULL;
3953 
3954     user_assert(processor != ~0, D3DERR_INVALIDCALL);
3955 
3956     tx = MALLOC_STRUCT(shader_translator);
3957     if (!tx)
3958         return E_OUTOFMEMORY;
3959 
3960     if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
3961         hr = E_OUTOFMEMORY;
3962         goto out;
3963     }
3964 
3965     assert(IS_VS || !info->swvp_on);
3966 
3967     if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3968         hr = D3DERR_INVALIDCALL;
3969         DBG("Unsupported shader version: %u.%u !\n",
3970             tx->version.major, tx->version.minor);
3971         goto out;
3972     }
3973     if (tx->processor != processor) {
3974         hr = D3DERR_INVALIDCALL;
3975         DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3976         goto out;
3977     }
3978     DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3979          tx->version.major, tx->version.minor);
3980 
3981     parse_shader(tx);
3982 
3983     if (tx->failure) {
3984         /* For VS shaders, we print the warning later,
3985          * we first try with swvp. */
3986         if (IS_PS)
3987             ERR("Encountered buggy shader\n");
3988         ureg_destroy(tx->ureg);
3989         hr = D3DERR_INVALIDCALL;
3990         goto out;
3991     }
3992 
3993     /* Recompile after compacting constant slots if possible */
3994     if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
3995         unsigned *slot_map;
3996         unsigned c;
3997         int i, j, num_ranges, prev;
3998 
3999         DBG("Recompiling shader for constant compaction\n");
4000         ureg_destroy(tx->ureg);
4001 
4002         if (tx->num_inst_labels)
4003             FREE(tx->inst_labels);
4004         FREE(tx->lconstf);
4005         FREE(tx->regs.r);
4006 
4007         num_ranges = 0;
4008         prev = -2;
4009         for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
4010             if (tx->slots_used[i]) {
4011                 if (prev != i - 1)
4012                     num_ranges++;
4013                 prev = i;
4014             }
4015         }
4016         slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned));
4017         const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
4018         if (!slot_map || !const_ranges) {
4019             hr = E_OUTOFMEMORY;
4020             goto out;
4021         }
4022         c = 0;
4023         j = -1;
4024         prev = -2;
4025         for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
4026             if (tx->slots_used[i]) {
4027                 if (prev != i - 1)
4028                     j++;
4029                 /* Initialize first slot of the range */
4030                 if (!const_ranges[2*j+1])
4031                     const_ranges[2*j] = i;
4032                 const_ranges[2*j+1]++;
4033                 prev = i;
4034                 slot_map[i] = c++;
4035             }
4036         }
4037 
4038         if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4039             hr = E_OUTOFMEMORY;
4040             goto out;
4041         }
4042         tx->slot_map = slot_map;
4043         parse_shader(tx);
4044         assert(!tx->failure);
4045 #if !defined(NDEBUG)
4046         i = 0;
4047         j = 0;
4048         while (const_ranges[i*2+1] != 0) {
4049             j += const_ranges[i*2+1];
4050             i++;
4051         }
4052         assert(j == tx->num_slots);
4053 #endif
4054     }
4055 
4056     /* record local constants */
4057     if (tx->num_lconstf && tx->indirect_const_access) {
4058         struct nine_range *ranges;
4059         float *data;
4060         int *indices;
4061         unsigned i, k, n;
4062 
4063         hr = E_OUTOFMEMORY;
4064 
4065         data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
4066         if (!data)
4067             goto out;
4068         info->lconstf.data = data;
4069 
4070         indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
4071         if (!indices)
4072             goto out;
4073 
4074         /* lazy sort, num_lconstf should be small */
4075         for (n = 0; n < tx->num_lconstf; ++n) {
4076             for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
4077                 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
4078                     k = i;
4079             }
4080             indices[n] = tx->lconstf[k].idx;
4081             memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
4082             tx->lconstf[k].idx = INT_MAX;
4083         }
4084 
4085         /* count ranges */
4086         for (n = 1, i = 1; i < tx->num_lconstf; ++i)
4087             if (indices[i] != indices[i - 1] + 1)
4088                 ++n;
4089         ranges = MALLOC(n * sizeof(ranges[0]));
4090         if (!ranges) {
4091             FREE(indices);
4092             goto out;
4093         }
4094         info->lconstf.ranges = ranges;
4095 
4096         k = 0;
4097         ranges[k].bgn = indices[0];
4098         for (i = 1; i < tx->num_lconstf; ++i) {
4099             if (indices[i] != indices[i - 1] + 1) {
4100                 ranges[k].next = &ranges[k + 1];
4101                 ranges[k].end = indices[i - 1] + 1;
4102                 ++k;
4103                 ranges[k].bgn = indices[i];
4104             }
4105         }
4106         ranges[k].end = indices[i - 1] + 1;
4107         ranges[k].next = NULL;
4108         assert(n == (k + 1));
4109 
4110         FREE(indices);
4111         hr = D3D_OK;
4112     }
4113 
4114     /* r500 */
4115     if (info->const_float_slots > device->max_vs_const_f &&
4116         (info->const_int_slots || info->const_bool_slots) &&
4117         !info->swvp_on)
4118         ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4119 
4120 
4121     if (tx->indirect_const_access) { /* vs only */
4122         info->const_float_slots = device->max_vs_const_f;
4123         tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
4124     }
4125 
4126     if (!info->swvp_on) {
4127         info->const_used_size = sizeof(float[4]) * tx->num_slots;
4128         if (tx->num_slots)
4129             ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
4130     } else {
4131          ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
4132          ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
4133          ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
4134          ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
4135     }
4136 
4137     if (info->process_vertices)
4138         ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
4139 
4140     if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
4141         const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
4142         tgsi_dump(toks, 0);
4143         ureg_free_tokens(toks);
4144     }
4145 
4146     if (info->process_vertices) {
4147         NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
4148                                                     tx->output_info,
4149                                                     tx->num_outputs,
4150                                                     &(info->so));
4151         info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
4152     } else
4153         info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
4154     if (!info->cso) {
4155         hr = D3DERR_DRIVERINTERNALERROR;
4156         FREE(info->lconstf.data);
4157         FREE(info->lconstf.ranges);
4158         goto out;
4159     }
4160 
4161     info->const_ranges = const_ranges;
4162     const_ranges = NULL;
4163     info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
4164 out:
4165     if (const_ranges)
4166         FREE(const_ranges);
4167     tx_dtor(tx);
4168     return hr;
4169 }
4170