1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "nir/tgsi_to_nir.h"
38
39 #define DBG_CHANNEL DBG_SHADER
40
41 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
42
43
44 struct shader_translator;
45
46 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
47
48 static inline const char *d3dsio_to_string(unsigned opcode);
49
50
51 #define NINED3D_SM1_VS 0xfffe
52 #define NINED3D_SM1_PS 0xffff
53
54 #define NINE_MAX_COND_DEPTH 64
55 #define NINE_MAX_LOOP_DEPTH 64
56
57 #define NINED3DSP_END 0x0000ffff
58
59 #define NINED3DSPTYPE_FLOAT4 0
60 #define NINED3DSPTYPE_INT4 1
61 #define NINED3DSPTYPE_BOOL 2
62
63 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
64
65 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
66 #define NINED3DSP_WRITEMASK_SHIFT 16
67
68 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
69
70 #define NINED3DSHADER_REL_OP_GT 1
71 #define NINED3DSHADER_REL_OP_EQ 2
72 #define NINED3DSHADER_REL_OP_GE 3
73 #define NINED3DSHADER_REL_OP_LT 4
74 #define NINED3DSHADER_REL_OP_NE 5
75 #define NINED3DSHADER_REL_OP_LE 6
76
77 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
78 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
79
80 #define NINED3DSI_TEXLD_PROJECT 0x1
81 #define NINED3DSI_TEXLD_BIAS 0x2
82
83 #define NINED3DSP_WRITEMASK_0 0x1
84 #define NINED3DSP_WRITEMASK_1 0x2
85 #define NINED3DSP_WRITEMASK_2 0x4
86 #define NINED3DSP_WRITEMASK_3 0x8
87 #define NINED3DSP_WRITEMASK_ALL 0xf
88
89 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
90
91 #define NINE_SWIZZLE4(x,y,z,w) \
92 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
93
94 #define NINE_APPLY_SWIZZLE(src, s) \
95 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
96
97 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
98 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
99 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
100
101 /*
102 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
103 * BIAS <= PS 1.4 (x-0.5)
104 * BIASNEG <= PS 1.4 (-(x-0.5))
105 * SIGN <= PS 1.4 (2(x-0.5))
106 * SIGNNEG <= PS 1.4 (-2(x-0.5))
107 * COMP <= PS 1.4 (1-x)
108 * X2 = PS 1.4 (2x)
109 * X2NEG = PS 1.4 (-2x)
110 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
111 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
112 * ABS >= SM 3.0 (abs(x))
113 * ABSNEG >= SM 3.0 (-abs(x))
114 * NOT >= SM 2.0 pedication only
115 */
116 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
117 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
130
131 static const char *sm1_mod_str[] =
132 {
133 [NINED3DSPSM_NONE] = "",
134 [NINED3DSPSM_NEG] = "-",
135 [NINED3DSPSM_BIAS] = "bias",
136 [NINED3DSPSM_BIASNEG] = "biasneg",
137 [NINED3DSPSM_SIGN] = "sign",
138 [NINED3DSPSM_SIGNNEG] = "signneg",
139 [NINED3DSPSM_COMP] = "comp",
140 [NINED3DSPSM_X2] = "x2",
141 [NINED3DSPSM_X2NEG] = "x2neg",
142 [NINED3DSPSM_DZ] = "dz",
143 [NINED3DSPSM_DW] = "dw",
144 [NINED3DSPSM_ABS] = "abs",
145 [NINED3DSPSM_ABSNEG] = "-abs",
146 [NINED3DSPSM_NOT] = "not"
147 };
148
149 static void
sm1_dump_writemask(BYTE mask)150 sm1_dump_writemask(BYTE mask)
151 {
152 if (mask & 1) DUMP("x"); else DUMP("_");
153 if (mask & 2) DUMP("y"); else DUMP("_");
154 if (mask & 4) DUMP("z"); else DUMP("_");
155 if (mask & 8) DUMP("w"); else DUMP("_");
156 }
157
158 static void
sm1_dump_swizzle(BYTE s)159 sm1_dump_swizzle(BYTE s)
160 {
161 char c[4] = { 'x', 'y', 'z', 'w' };
162 DUMP("%c%c%c%c",
163 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
164 }
165
166 static const char sm1_file_char[] =
167 {
168 [D3DSPR_TEMP] = 'r',
169 [D3DSPR_INPUT] = 'v',
170 [D3DSPR_CONST] = 'c',
171 [D3DSPR_ADDR] = 'A',
172 [D3DSPR_RASTOUT] = 'R',
173 [D3DSPR_ATTROUT] = 'D',
174 [D3DSPR_OUTPUT] = 'o',
175 [D3DSPR_CONSTINT] = 'I',
176 [D3DSPR_COLOROUT] = 'C',
177 [D3DSPR_DEPTHOUT] = 'D',
178 [D3DSPR_SAMPLER] = 's',
179 [D3DSPR_CONST2] = 'c',
180 [D3DSPR_CONST3] = 'c',
181 [D3DSPR_CONST4] = 'c',
182 [D3DSPR_CONSTBOOL] = 'B',
183 [D3DSPR_LOOP] = 'L',
184 [D3DSPR_TEMPFLOAT16] = 'h',
185 [D3DSPR_MISCTYPE] = 'M',
186 [D3DSPR_LABEL] = 'X',
187 [D3DSPR_PREDICATE] = 'p'
188 };
189
190 static void
sm1_dump_reg(BYTE file,INT index)191 sm1_dump_reg(BYTE file, INT index)
192 {
193 switch (file) {
194 case D3DSPR_LOOP:
195 DUMP("aL");
196 break;
197 case D3DSPR_COLOROUT:
198 DUMP("oC%i", index);
199 break;
200 case D3DSPR_DEPTHOUT:
201 DUMP("oDepth");
202 break;
203 case D3DSPR_RASTOUT:
204 DUMP("oRast%i", index);
205 break;
206 case D3DSPR_CONSTINT:
207 DUMP("iconst[%i]", index);
208 break;
209 case D3DSPR_CONSTBOOL:
210 DUMP("bconst[%i]", index);
211 break;
212 default:
213 DUMP("%c%i", sm1_file_char[file], index);
214 break;
215 }
216 }
217
218 struct sm1_src_param
219 {
220 INT idx;
221 struct sm1_src_param *rel;
222 BYTE file;
223 BYTE swizzle;
224 BYTE mod;
225 BYTE type;
226 union {
227 DWORD d[4];
228 float f[4];
229 int i[4];
230 BOOL b;
231 } imm;
232 };
233 static void
234 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
235
236 struct sm1_dst_param
237 {
238 INT idx;
239 struct sm1_src_param *rel;
240 BYTE file;
241 BYTE mask;
242 BYTE mod;
243 int8_t shift; /* sint4 */
244 BYTE type;
245 };
246
247 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)248 assert_replicate_swizzle(const struct ureg_src *reg)
249 {
250 assert(reg->SwizzleY == reg->SwizzleX &&
251 reg->SwizzleZ == reg->SwizzleX &&
252 reg->SwizzleW == reg->SwizzleX);
253 }
254
255 static void
sm1_dump_immediate(const struct sm1_src_param * param)256 sm1_dump_immediate(const struct sm1_src_param *param)
257 {
258 switch (param->type) {
259 case NINED3DSPTYPE_FLOAT4:
260 DUMP("{ %f %f %f %f }",
261 param->imm.f[0], param->imm.f[1],
262 param->imm.f[2], param->imm.f[3]);
263 break;
264 case NINED3DSPTYPE_INT4:
265 DUMP("{ %i %i %i %i }",
266 param->imm.i[0], param->imm.i[1],
267 param->imm.i[2], param->imm.i[3]);
268 break;
269 case NINED3DSPTYPE_BOOL:
270 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
271 break;
272 default:
273 assert(0);
274 break;
275 }
276 }
277
278 static void
sm1_dump_src_param(const struct sm1_src_param * param)279 sm1_dump_src_param(const struct sm1_src_param *param)
280 {
281 if (param->file == NINED3DSPR_IMMEDIATE) {
282 assert(!param->mod &&
283 !param->rel &&
284 param->swizzle == NINED3DSP_NOSWIZZLE);
285 sm1_dump_immediate(param);
286 return;
287 }
288
289 if (param->mod)
290 DUMP("%s(", sm1_mod_str[param->mod]);
291 if (param->rel) {
292 DUMP("%c[", sm1_file_char[param->file]);
293 sm1_dump_src_param(param->rel);
294 DUMP("+%i]", param->idx);
295 } else {
296 sm1_dump_reg(param->file, param->idx);
297 }
298 if (param->mod)
299 DUMP(")");
300 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
301 DUMP(".");
302 sm1_dump_swizzle(param->swizzle);
303 }
304 }
305
306 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)307 sm1_dump_dst_param(const struct sm1_dst_param *param)
308 {
309 if (param->mod & NINED3DSPDM_SATURATE)
310 DUMP("sat ");
311 if (param->mod & NINED3DSPDM_PARTIALP)
312 DUMP("pp ");
313 if (param->mod & NINED3DSPDM_CENTROID)
314 DUMP("centroid ");
315 if (param->shift < 0)
316 DUMP("/%u ", 1 << -param->shift);
317 if (param->shift > 0)
318 DUMP("*%u ", 1 << param->shift);
319
320 if (param->rel) {
321 DUMP("%c[", sm1_file_char[param->file]);
322 sm1_dump_src_param(param->rel);
323 DUMP("+%i]", param->idx);
324 } else {
325 sm1_dump_reg(param->file, param->idx);
326 }
327 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
328 DUMP(".");
329 sm1_dump_writemask(param->mask);
330 }
331 }
332
333 struct sm1_semantic
334 {
335 struct sm1_dst_param reg;
336 BYTE sampler_type;
337 D3DDECLUSAGE usage;
338 BYTE usage_idx;
339 };
340
341 struct sm1_op_info
342 {
343 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
344 * should be ignored completely */
345 unsigned sio;
346 unsigned opcode; /* TGSI_OPCODE_x */
347
348 /* versions are still set even handler is set */
349 struct {
350 unsigned min;
351 unsigned max;
352 } vert_version, frag_version;
353
354 /* number of regs parsed outside of special handler */
355 unsigned ndst;
356 unsigned nsrc;
357
358 /* some instructions don't map perfectly, so use a special handler */
359 translate_instruction_func handler;
360 };
361
362 struct sm1_instruction
363 {
364 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
365 BYTE flags;
366 BOOL coissue;
367 BOOL predicated;
368 BYTE ndst;
369 BYTE nsrc;
370 struct sm1_src_param src[4];
371 struct sm1_src_param src_rel[4];
372 struct sm1_src_param pred;
373 struct sm1_src_param dst_rel[1];
374 struct sm1_dst_param dst[1];
375
376 const struct sm1_op_info *info;
377 };
378
379 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)380 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
381 {
382 unsigned i;
383
384 /* no info stored for these: */
385 if (insn->opcode == D3DSIO_DCL)
386 return;
387 for (i = 0; i < indent; ++i)
388 DUMP(" ");
389
390 if (insn->predicated) {
391 DUMP("@");
392 sm1_dump_src_param(&insn->pred);
393 DUMP(" ");
394 }
395 DUMP("%s", d3dsio_to_string(insn->opcode));
396 if (insn->flags) {
397 switch (insn->opcode) {
398 case D3DSIO_TEX:
399 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
400 break;
401 default:
402 DUMP("_%x", insn->flags);
403 break;
404 }
405 }
406 if (insn->coissue)
407 DUMP("_co");
408 DUMP(" ");
409
410 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
411 sm1_dump_dst_param(&insn->dst[i]);
412 DUMP(" ");
413 }
414
415 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
416 sm1_dump_src_param(&insn->src[i]);
417 DUMP(" ");
418 }
419 if (insn->opcode == D3DSIO_DEF ||
420 insn->opcode == D3DSIO_DEFI ||
421 insn->opcode == D3DSIO_DEFB)
422 sm1_dump_immediate(&insn->src[0]);
423
424 DUMP("\n");
425 }
426
427 struct sm1_local_const
428 {
429 INT idx;
430 struct ureg_src reg;
431 float f[4]; /* for indirect addressing of float constants */
432 };
433
434 struct shader_translator
435 {
436 const DWORD *byte_code;
437 const DWORD *parse;
438 const DWORD *parse_next;
439
440 struct ureg_program *ureg;
441
442 /* shader version */
443 struct {
444 BYTE major;
445 BYTE minor;
446 } version;
447 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
448 unsigned num_constf_allowed;
449 unsigned num_consti_allowed;
450 unsigned num_constb_allowed;
451
452 boolean native_integers;
453 boolean inline_subroutines;
454 boolean want_texcoord;
455 boolean shift_wpos;
456 boolean wpos_is_sysval;
457 boolean face_is_sysval_integer;
458 boolean mul_zero_wins;
459 unsigned texcoord_sn;
460
461 struct sm1_instruction insn; /* current instruction */
462
463 struct {
464 struct ureg_dst *r;
465 struct ureg_dst oPos;
466 struct ureg_dst oPos_out; /* the real output when doing streamout */
467 struct ureg_dst oFog;
468 struct ureg_dst oPts;
469 struct ureg_dst oCol[4];
470 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
471 struct ureg_dst oDepth;
472 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
473 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
474 struct ureg_src vPos;
475 struct ureg_src vFace;
476 struct ureg_src s;
477 struct ureg_dst p;
478 struct ureg_dst address;
479 struct ureg_dst a0;
480 struct ureg_dst predicate;
481 struct ureg_dst predicate_tmp;
482 struct ureg_dst predicate_dst;
483 struct ureg_dst tS[8]; /* texture stage registers */
484 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
485 struct ureg_dst t[8]; /* scratch TEMPs */
486 struct ureg_src vC[2]; /* PS color in */
487 struct ureg_src vT[8]; /* PS texcoord in */
488 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
489 } regs;
490 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
491 unsigned num_scratch;
492 unsigned loop_depth;
493 unsigned loop_depth_max;
494 unsigned cond_depth;
495 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
496 unsigned cond_labels[NINE_MAX_COND_DEPTH];
497 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
498 boolean predicated_activated;
499
500 unsigned *inst_labels; /* LABEL op */
501 unsigned num_inst_labels;
502
503 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
504
505 struct sm1_local_const *lconstf;
506 unsigned num_lconstf;
507 struct sm1_local_const *lconsti;
508 unsigned num_lconsti;
509 struct sm1_local_const *lconstb;
510 unsigned num_lconstb;
511
512 boolean slots_used[NINE_MAX_CONST_ALL];
513 unsigned *slot_map;
514 unsigned num_slots;
515
516 boolean indirect_const_access;
517 boolean failure;
518
519 struct nine_vs_output_info output_info[16];
520 int num_outputs;
521
522 struct nine_shader_info *info;
523
524 int16_t op_info_map[D3DSIO_BREAKP + 1];
525 };
526
527 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
528 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
529
530 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
531
532 static void
533 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
534
535 static void
sm1_instruction_check(const struct sm1_instruction * insn)536 sm1_instruction_check(const struct sm1_instruction *insn)
537 {
538 if (insn->opcode == D3DSIO_CRS)
539 {
540 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
541 {
542 DBG("CRS.mask.w\n");
543 }
544 }
545 }
546
547 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)548 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
549 int mask, int output_index)
550 {
551 tx->output_info[tx->num_outputs].output_semantic = Usage;
552 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
553 tx->output_info[tx->num_outputs].mask = mask;
554 tx->output_info[tx->num_outputs].output_index = output_index;
555 tx->num_outputs++;
556 }
557
nine_float_constant_src(struct shader_translator * tx,int idx)558 static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
559 {
560 struct ureg_src src;
561
562 if (tx->slot_map)
563 idx = tx->slot_map[idx];
564 /* vswp constant handling: we use two buffers
565 * to fit all the float constants. The special handling
566 * doesn't need to be elsewhere, because all the instructions
567 * accessing the constants directly are VS1, and swvp
568 * is VS >= 2 */
569 if (tx->info->swvp_on && idx >= 4096) {
570 /* TODO: swvp rel is broken if many constants are used */
571 src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
572 src = ureg_src_dimension(src, 1);
573 } else {
574 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
575 src = ureg_src_dimension(src, 0);
576 }
577
578 if (!tx->info->swvp_on)
579 tx->slots_used[idx] = TRUE;
580 if (tx->info->const_float_slots < (idx + 1))
581 tx->info->const_float_slots = idx + 1;
582 if (tx->num_slots < (idx + 1))
583 tx->num_slots = idx + 1;
584
585 return src;
586 }
587
nine_integer_constant_src(struct shader_translator * tx,int idx)588 static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
589 {
590 struct ureg_src src;
591
592 if (tx->info->swvp_on) {
593 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
594 src = ureg_src_dimension(src, 2);
595 } else {
596 unsigned slot_idx = tx->info->const_i_base + idx;
597 if (tx->slot_map)
598 slot_idx = tx->slot_map[slot_idx];
599 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
600 src = ureg_src_dimension(src, 0);
601 tx->slots_used[slot_idx] = TRUE;
602 tx->info->int_slots_used[idx] = TRUE;
603 if (tx->num_slots < (slot_idx + 1))
604 tx->num_slots = slot_idx + 1;
605 }
606
607 if (tx->info->const_int_slots < (idx + 1))
608 tx->info->const_int_slots = idx + 1;
609
610 return src;
611 }
612
nine_boolean_constant_src(struct shader_translator * tx,int idx)613 static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
614 {
615 struct ureg_src src;
616
617 char r = idx / 4;
618 char s = idx & 3;
619
620 if (tx->info->swvp_on) {
621 src = ureg_src_register(TGSI_FILE_CONSTANT, r);
622 src = ureg_src_dimension(src, 3);
623 } else {
624 unsigned slot_idx = tx->info->const_b_base + r;
625 if (tx->slot_map)
626 slot_idx = tx->slot_map[slot_idx];
627 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
628 src = ureg_src_dimension(src, 0);
629 tx->slots_used[slot_idx] = TRUE;
630 tx->info->bool_slots_used[idx] = TRUE;
631 if (tx->num_slots < (slot_idx + 1))
632 tx->num_slots = slot_idx + 1;
633 }
634 src = ureg_swizzle(src, s, s, s, s);
635
636 if (tx->info->const_bool_slots < (idx + 1))
637 tx->info->const_bool_slots = idx + 1;
638
639 return src;
640 }
641
642 static boolean
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)643 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
644 {
645 INT i;
646
647 if (index < 0 || index >= tx->num_constf_allowed) {
648 tx->failure = TRUE;
649 return FALSE;
650 }
651 for (i = 0; i < tx->num_lconstf; ++i) {
652 if (tx->lconstf[i].idx == index) {
653 *src = tx->lconstf[i].reg;
654 return TRUE;
655 }
656 }
657 return FALSE;
658 }
659 static boolean
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)660 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
661 {
662 int i;
663
664 if (index < 0 || index >= tx->num_consti_allowed) {
665 tx->failure = TRUE;
666 return FALSE;
667 }
668 for (i = 0; i < tx->num_lconsti; ++i) {
669 if (tx->lconsti[i].idx == index) {
670 *src = tx->lconsti[i].reg;
671 return TRUE;
672 }
673 }
674 return FALSE;
675 }
676 static boolean
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)677 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
678 {
679 int i;
680
681 if (index < 0 || index >= tx->num_constb_allowed) {
682 tx->failure = TRUE;
683 return FALSE;
684 }
685 for (i = 0; i < tx->num_lconstb; ++i) {
686 if (tx->lconstb[i].idx == index) {
687 *src = tx->lconstb[i].reg;
688 return TRUE;
689 }
690 }
691 return FALSE;
692 }
693
694 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])695 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
696 {
697 unsigned n;
698
699 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
700
701 for (n = 0; n < tx->num_lconstf; ++n)
702 if (tx->lconstf[n].idx == index)
703 break;
704 if (n == tx->num_lconstf) {
705 if ((n % 8) == 0) {
706 tx->lconstf = REALLOC(tx->lconstf,
707 (n + 0) * sizeof(tx->lconstf[0]),
708 (n + 8) * sizeof(tx->lconstf[0]));
709 assert(tx->lconstf);
710 }
711 tx->num_lconstf++;
712 }
713 tx->lconstf[n].idx = index;
714 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
715
716 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
717 }
718 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])719 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
720 {
721 unsigned n;
722
723 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
724
725 for (n = 0; n < tx->num_lconsti; ++n)
726 if (tx->lconsti[n].idx == index)
727 break;
728 if (n == tx->num_lconsti) {
729 if ((n % 8) == 0) {
730 tx->lconsti = REALLOC(tx->lconsti,
731 (n + 0) * sizeof(tx->lconsti[0]),
732 (n + 8) * sizeof(tx->lconsti[0]));
733 assert(tx->lconsti);
734 }
735 tx->num_lconsti++;
736 }
737
738 tx->lconsti[n].idx = index;
739 tx->lconsti[n].reg = tx->native_integers ?
740 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
741 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
742 }
743 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)744 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
745 {
746 unsigned n;
747
748 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
749
750 for (n = 0; n < tx->num_lconstb; ++n)
751 if (tx->lconstb[n].idx == index)
752 break;
753 if (n == tx->num_lconstb) {
754 if ((n % 8) == 0) {
755 tx->lconstb = REALLOC(tx->lconstb,
756 (n + 0) * sizeof(tx->lconstb[0]),
757 (n + 8) * sizeof(tx->lconstb[0]));
758 assert(tx->lconstb);
759 }
760 tx->num_lconstb++;
761 }
762
763 tx->lconstb[n].idx = index;
764 tx->lconstb[n].reg = tx->native_integers ?
765 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
766 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
767 }
768
769 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)770 tx_scratch(struct shader_translator *tx)
771 {
772 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
773 tx->failure = TRUE;
774 return tx->regs.t[0];
775 }
776 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
777 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
778 return tx->regs.t[tx->num_scratch++];
779 }
780
781 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)782 tx_scratch_scalar(struct shader_translator *tx)
783 {
784 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
785 }
786
787 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)788 tx_src_scalar(struct ureg_dst dst)
789 {
790 struct ureg_src src = ureg_src(dst);
791 int c = ffs(dst.WriteMask) - 1;
792 if (dst.WriteMask == (1 << c))
793 src = ureg_scalar(src, c);
794 return src;
795 }
796
797 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)798 tx_temp_alloc(struct shader_translator *tx, INT idx)
799 {
800 assert(idx >= 0);
801 if (idx >= tx->num_temp) {
802 unsigned k = tx->num_temp;
803 unsigned n = idx + 1;
804 tx->regs.r = REALLOC(tx->regs.r,
805 k * sizeof(tx->regs.r[0]),
806 n * sizeof(tx->regs.r[0]));
807 for (; k < n; ++k)
808 tx->regs.r[k] = ureg_dst_undef();
809 tx->num_temp = n;
810 }
811 if (ureg_dst_is_undef(tx->regs.r[idx]))
812 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
813 }
814
815 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)816 tx_addr_alloc(struct shader_translator *tx, INT idx)
817 {
818 assert(idx == 0);
819 if (ureg_dst_is_undef(tx->regs.address))
820 tx->regs.address = ureg_DECL_address(tx->ureg);
821 if (ureg_dst_is_undef(tx->regs.a0))
822 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
823 }
824
825 static inline bool
TEX_if_fetch4(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)826 TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst,
827 unsigned target, struct ureg_src src0,
828 struct ureg_src src1, INT idx)
829 {
830 struct ureg_dst tmp;
831 struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1};
832
833 if (!(tx->info->fetch4 & (1 << idx)))
834 return false;
835
836 /* TODO: needs more tests, but this feature is not much used at all */
837
838 tmp = tx_scratch(tx);
839 ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT,
840 NULL, 0, src_tg4, 3);
841 ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W)));
842 return true;
843 }
844
845 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
846 * the projection should be applied on the texture. It doesn't
847 * apply on texkill.
848 * The doc is very imprecise here (it says the projection is done
849 * before rasterization, thus in vs, which seems wrong since ps instructions
850 * are affected differently)
851 * For now we only apply to the ps TEX instruction and TEXBEM.
852 * Perhaps some other instructions would need it */
853 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)854 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
855 struct ureg_src src, INT idx)
856 {
857 struct ureg_dst tmp;
858 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
859
860 /* no projection */
861 if (dim == 1) {
862 ureg_MOV(tx->ureg, dst, src);
863 } else {
864 tmp = tx_scratch_scalar(tx);
865 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
866 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
867 }
868 }
869
870 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)871 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
872 unsigned target, struct ureg_src src0,
873 struct ureg_src src1, INT idx)
874 {
875 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
876 struct ureg_dst tmp;
877 boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
878
879 /* dim == 1: no projection
880 * Looks like must be disabled when it makes no
881 * sense according the texture dimensions
882 */
883 if (dim == 1 || (dim <= target && !shadow)) {
884 ureg_TEX(tx->ureg, dst, target, src0, src1);
885 } else if (dim == 4) {
886 ureg_TXP(tx->ureg, dst, target, src0, src1);
887 } else {
888 tmp = tx_scratch(tx);
889 apply_ps1x_projection(tx, tmp, src0, idx);
890 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
891 }
892 }
893
894 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)895 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
896 {
897 assert(IS_PS);
898 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
899 if (ureg_src_is_undef(tx->regs.vT[idx]))
900 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
901 TGSI_INTERPOLATE_PERSPECTIVE);
902 }
903
904 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)905 tx_bgnloop(struct shader_translator *tx)
906 {
907 tx->loop_depth++;
908 if (tx->loop_depth_max < tx->loop_depth)
909 tx->loop_depth_max = tx->loop_depth;
910 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
911 return &tx->loop_labels[tx->loop_depth - 1];
912 }
913
914 static inline unsigned *
tx_endloop(struct shader_translator * tx)915 tx_endloop(struct shader_translator *tx)
916 {
917 assert(tx->loop_depth);
918 tx->loop_depth--;
919 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
920 ureg_get_instruction_number(tx->ureg));
921 return &tx->loop_labels[tx->loop_depth];
922 }
923
924 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,boolean loop_or_rep)925 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
926 {
927 const unsigned l = tx->loop_depth - 1;
928
929 if (!tx->loop_depth)
930 {
931 DBG("loop counter requested outside of loop\n");
932 return ureg_dst_undef();
933 }
934
935 if (ureg_dst_is_undef(tx->regs.rL[l])) {
936 /* loop or rep ctr creation */
937 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
938 tx->loop_or_rep[l] = loop_or_rep;
939 }
940 /* loop - rep - endloop - endrep not allowed */
941 assert(tx->loop_or_rep[l] == loop_or_rep);
942
943 return tx->regs.rL[l];
944 }
945
946 static struct ureg_src
tx_get_loopal(struct shader_translator * tx)947 tx_get_loopal(struct shader_translator *tx)
948 {
949 int loop_level = tx->loop_depth - 1;
950
951 while (loop_level >= 0) {
952 /* handle loop - rep - endrep - endloop case */
953 if (tx->loop_or_rep[loop_level])
954 /* the value is in the loop counter y component (nine implementation) */
955 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
956 loop_level--;
957 }
958
959 DBG("aL counter requested outside of loop\n");
960 return ureg_src_undef();
961 }
962
963 static inline unsigned *
tx_cond(struct shader_translator * tx)964 tx_cond(struct shader_translator *tx)
965 {
966 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
967 tx->cond_depth++;
968 return &tx->cond_labels[tx->cond_depth - 1];
969 }
970
971 static inline unsigned *
tx_elsecond(struct shader_translator * tx)972 tx_elsecond(struct shader_translator *tx)
973 {
974 assert(tx->cond_depth);
975 return &tx->cond_labels[tx->cond_depth - 1];
976 }
977
978 static inline void
tx_endcond(struct shader_translator * tx)979 tx_endcond(struct shader_translator *tx)
980 {
981 assert(tx->cond_depth);
982 tx->cond_depth--;
983 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
984 ureg_get_instruction_number(tx->ureg));
985 }
986
987 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)988 nine_ureg_dst_register(unsigned file, int index)
989 {
990 return ureg_dst(ureg_src_register(file, index));
991 }
992
993 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)994 nine_get_position_input(struct shader_translator *tx)
995 {
996 struct ureg_program *ureg = tx->ureg;
997
998 if (tx->wpos_is_sysval)
999 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1000 else
1001 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
1002 0, TGSI_INTERPOLATE_LINEAR);
1003 }
1004
1005 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)1006 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
1007 {
1008 struct ureg_program *ureg = tx->ureg;
1009 struct ureg_src src;
1010 struct ureg_dst tmp;
1011
1012 assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
1013 (param->file == D3DSPR_INPUT && tx->version.major == 3));
1014
1015 switch (param->file)
1016 {
1017 case D3DSPR_TEMP:
1018 tx_temp_alloc(tx, param->idx);
1019 src = ureg_src(tx->regs.r[param->idx]);
1020 break;
1021 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1022 case D3DSPR_ADDR:
1023 if (IS_VS) {
1024 assert(param->idx == 0);
1025 /* the address register (vs only) must be
1026 * assigned before use */
1027 assert(!ureg_dst_is_undef(tx->regs.a0));
1028 /* Round to lowest for vs1.1 (contrary to the doc), else
1029 * round to nearest */
1030 if (tx->version.major < 2 && tx->version.minor < 2)
1031 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1032 else
1033 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1034 src = ureg_src(tx->regs.address);
1035 } else {
1036 if (tx->version.major < 2 && tx->version.minor < 4) {
1037 /* no subroutines, so should be defined */
1038 src = ureg_src(tx->regs.tS[param->idx]);
1039 } else {
1040 tx_texcoord_alloc(tx, param->idx);
1041 src = tx->regs.vT[param->idx];
1042 }
1043 }
1044 break;
1045 case D3DSPR_INPUT:
1046 if (IS_VS) {
1047 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1048 } else {
1049 if (tx->version.major < 3) {
1050 src = ureg_DECL_fs_input_centroid(
1051 ureg, TGSI_SEMANTIC_COLOR, param->idx,
1052 TGSI_INTERPOLATE_COLOR,
1053 tx->info->force_color_in_centroid ?
1054 TGSI_INTERPOLATE_LOC_CENTROID : 0,
1055 0, 1);
1056 } else {
1057 if(param->rel) {
1058 /* Copy all inputs (non consecutive)
1059 * to temp array (consecutive).
1060 * This is not good for performance.
1061 * A better way would be to have inputs
1062 * consecutive (would need implement alternative
1063 * way to match vs outputs and ps inputs).
1064 * However even with the better way, the temp array
1065 * copy would need to be used if some inputs
1066 * are not GENERIC or if they have different
1067 * interpolation flag. */
1068 if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1069 int i;
1070 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1071 for (i = 0; i < 10; i++) {
1072 if (!ureg_src_is_undef(tx->regs.v[i]))
1073 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1074 else
1075 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1076 }
1077 }
1078 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1079 } else {
1080 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1081 src = tx->regs.v[param->idx];
1082 }
1083 }
1084 }
1085 if (param->rel)
1086 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1087 break;
1088 case D3DSPR_PREDICATE:
1089 if (ureg_dst_is_undef(tx->regs.predicate)) {
1090 /* Forbidden to use the predicate register before being set */
1091 tx->failure = TRUE;
1092 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1093 }
1094 src = ureg_src(tx->regs.predicate);
1095 break;
1096 case D3DSPR_SAMPLER:
1097 assert(param->mod == NINED3DSPSM_NONE);
1098 /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */
1099 src = ureg_DECL_sampler(ureg, param->idx);
1100 break;
1101 case D3DSPR_CONST:
1102 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1103 src = nine_float_constant_src(tx, param->idx);
1104 if (param->rel) {
1105 tx->indirect_const_access = TRUE;
1106 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1107 }
1108 }
1109 if (!IS_VS && tx->version.major < 2) {
1110 /* ps 1.X clamps constants */
1111 tmp = tx_scratch(tx);
1112 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1113 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1114 src = ureg_src(tmp);
1115 }
1116 break;
1117 case D3DSPR_CONST2:
1118 case D3DSPR_CONST3:
1119 case D3DSPR_CONST4:
1120 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1121 assert(!"CONST2/3/4");
1122 src = ureg_imm1f(ureg, 0.0f);
1123 break;
1124 case D3DSPR_CONSTINT:
1125 /* relative adressing only possible for float constants in vs */
1126 if (!tx_lconsti(tx, &src, param->idx))
1127 src = nine_integer_constant_src(tx, param->idx);
1128 break;
1129 case D3DSPR_CONSTBOOL:
1130 if (!tx_lconstb(tx, &src, param->idx))
1131 src = nine_boolean_constant_src(tx, param->idx);
1132 break;
1133 case D3DSPR_LOOP:
1134 if (ureg_dst_is_undef(tx->regs.address))
1135 tx->regs.address = ureg_DECL_address(ureg);
1136 if (!tx->native_integers)
1137 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1138 else
1139 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1140 src = ureg_src(tx->regs.address);
1141 break;
1142 case D3DSPR_MISCTYPE:
1143 switch (param->idx) {
1144 case D3DSMO_POSITION:
1145 if (ureg_src_is_undef(tx->regs.vPos))
1146 tx->regs.vPos = nine_get_position_input(tx);
1147 if (tx->shift_wpos) {
1148 /* TODO: do this only once */
1149 struct ureg_dst wpos = tx_scratch(tx);
1150 ureg_ADD(ureg, wpos, tx->regs.vPos,
1151 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1152 src = ureg_src(wpos);
1153 } else {
1154 src = tx->regs.vPos;
1155 }
1156 break;
1157 case D3DSMO_FACE:
1158 if (ureg_src_is_undef(tx->regs.vFace)) {
1159 if (tx->face_is_sysval_integer) {
1160 tmp = ureg_DECL_temporary(ureg);
1161 tx->regs.vFace =
1162 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1163
1164 /* convert bool to float */
1165 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1166 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1167 tx->regs.vFace = ureg_src(tmp);
1168 } else {
1169 tx->regs.vFace = ureg_DECL_fs_input(ureg,
1170 TGSI_SEMANTIC_FACE, 0,
1171 TGSI_INTERPOLATE_CONSTANT);
1172 }
1173 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1174 }
1175 src = tx->regs.vFace;
1176 break;
1177 default:
1178 assert(!"invalid src D3DSMO");
1179 break;
1180 }
1181 break;
1182 case D3DSPR_TEMPFLOAT16:
1183 break;
1184 default:
1185 assert(!"invalid src D3DSPR");
1186 }
1187
1188 switch (param->mod) {
1189 case NINED3DSPSM_DW:
1190 tmp = tx_scratch(tx);
1191 /* NOTE: app is not allowed to read w with this modifier */
1192 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1193 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1194 src = ureg_src(tmp);
1195 break;
1196 case NINED3DSPSM_DZ:
1197 tmp = tx_scratch(tx);
1198 /* NOTE: app is not allowed to read z with this modifier */
1199 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1200 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1201 src = ureg_src(tmp);
1202 break;
1203 default:
1204 break;
1205 }
1206
1207 if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER)
1208 src = ureg_swizzle(src,
1209 (param->swizzle >> 0) & 0x3,
1210 (param->swizzle >> 2) & 0x3,
1211 (param->swizzle >> 4) & 0x3,
1212 (param->swizzle >> 6) & 0x3);
1213
1214 switch (param->mod) {
1215 case NINED3DSPSM_ABS:
1216 src = ureg_abs(src);
1217 break;
1218 case NINED3DSPSM_ABSNEG:
1219 src = ureg_negate(ureg_abs(src));
1220 break;
1221 case NINED3DSPSM_NEG:
1222 src = ureg_negate(src);
1223 break;
1224 case NINED3DSPSM_BIAS:
1225 tmp = tx_scratch(tx);
1226 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1227 src = ureg_src(tmp);
1228 break;
1229 case NINED3DSPSM_BIASNEG:
1230 tmp = tx_scratch(tx);
1231 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1232 src = ureg_src(tmp);
1233 break;
1234 case NINED3DSPSM_NOT:
1235 if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1236 tmp = tx_scratch(tx);
1237 ureg_NOT(ureg, tmp, src);
1238 src = ureg_src(tmp);
1239 break;
1240 } else { /* predicate */
1241 tmp = tx_scratch(tx);
1242 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1243 src = ureg_src(tmp);
1244 }
1245 FALLTHROUGH;
1246 case NINED3DSPSM_COMP:
1247 tmp = tx_scratch(tx);
1248 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1249 src = ureg_src(tmp);
1250 break;
1251 case NINED3DSPSM_DZ:
1252 case NINED3DSPSM_DW:
1253 /* Already handled*/
1254 break;
1255 case NINED3DSPSM_SIGN:
1256 tmp = tx_scratch(tx);
1257 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1258 src = ureg_src(tmp);
1259 break;
1260 case NINED3DSPSM_SIGNNEG:
1261 tmp = tx_scratch(tx);
1262 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1263 src = ureg_src(tmp);
1264 break;
1265 case NINED3DSPSM_X2:
1266 tmp = tx_scratch(tx);
1267 ureg_ADD(ureg, tmp, src, src);
1268 src = ureg_src(tmp);
1269 break;
1270 case NINED3DSPSM_X2NEG:
1271 tmp = tx_scratch(tx);
1272 ureg_ADD(ureg, tmp, src, src);
1273 src = ureg_negate(ureg_src(tmp));
1274 break;
1275 default:
1276 assert(param->mod == NINED3DSPSM_NONE);
1277 break;
1278 }
1279
1280 return src;
1281 }
1282
1283 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1284 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1285 {
1286 struct ureg_dst dst;
1287
1288 switch (param->file)
1289 {
1290 case D3DSPR_TEMP:
1291 assert(!param->rel);
1292 tx_temp_alloc(tx, param->idx);
1293 dst = tx->regs.r[param->idx];
1294 break;
1295 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1296 case D3DSPR_ADDR:
1297 assert(!param->rel);
1298 if (tx->version.major < 2 && !IS_VS) {
1299 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1300 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1301 dst = tx->regs.tS[param->idx];
1302 } else
1303 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1304 tx_texcoord_alloc(tx, param->idx);
1305 dst = ureg_dst(tx->regs.vT[param->idx]);
1306 } else {
1307 tx_addr_alloc(tx, param->idx);
1308 dst = tx->regs.a0;
1309 }
1310 break;
1311 case D3DSPR_RASTOUT:
1312 assert(!param->rel);
1313 switch (param->idx) {
1314 case 0:
1315 if (ureg_dst_is_undef(tx->regs.oPos))
1316 tx->regs.oPos =
1317 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1318 dst = tx->regs.oPos;
1319 break;
1320 case 1:
1321 if (ureg_dst_is_undef(tx->regs.oFog))
1322 tx->regs.oFog =
1323 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
1324 dst = tx->regs.oFog;
1325 break;
1326 case 2:
1327 if (ureg_dst_is_undef(tx->regs.oPts))
1328 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1329 dst = tx->regs.oPts;
1330 break;
1331 default:
1332 assert(0);
1333 break;
1334 }
1335 break;
1336 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1337 case D3DSPR_OUTPUT:
1338 if (tx->version.major < 3) {
1339 assert(!param->rel);
1340 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1341 } else {
1342 assert(!param->rel); /* TODO */
1343 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1344 dst = tx->regs.o[param->idx];
1345 }
1346 break;
1347 case D3DSPR_ATTROUT: /* VS */
1348 case D3DSPR_COLOROUT: /* PS */
1349 assert(param->idx >= 0 && param->idx < 4);
1350 assert(!param->rel);
1351 tx->info->rt_mask |= 1 << param->idx;
1352 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1353 /* ps < 3: oCol[0] will have fog blending afterward */
1354 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1355 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1356 } else {
1357 tx->regs.oCol[param->idx] =
1358 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1359 }
1360 }
1361 dst = tx->regs.oCol[param->idx];
1362 if (IS_VS && tx->version.major < 3)
1363 dst = ureg_saturate(dst);
1364 break;
1365 case D3DSPR_DEPTHOUT:
1366 assert(!param->rel);
1367 if (ureg_dst_is_undef(tx->regs.oDepth))
1368 tx->regs.oDepth =
1369 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1370 TGSI_WRITEMASK_Z, 0, 1);
1371 dst = tx->regs.oDepth; /* XXX: must write .z component */
1372 break;
1373 case D3DSPR_PREDICATE:
1374 if (ureg_dst_is_undef(tx->regs.predicate))
1375 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1376 dst = tx->regs.predicate;
1377 break;
1378 case D3DSPR_TEMPFLOAT16:
1379 DBG("unhandled D3DSPR: %u\n", param->file);
1380 break;
1381 default:
1382 assert(!"invalid dst D3DSPR");
1383 break;
1384 }
1385 if (param->rel)
1386 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1387
1388 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1389 dst = ureg_writemask(dst, param->mask);
1390 if (param->mod & NINED3DSPDM_SATURATE)
1391 dst = ureg_saturate(dst);
1392
1393 if (tx->predicated_activated) {
1394 tx->regs.predicate_dst = dst;
1395 dst = tx->regs.predicate_tmp;
1396 }
1397
1398 return dst;
1399 }
1400
1401 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1402 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1403 {
1404 if (param->shift) {
1405 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1406 return tx->regs.tdst;
1407 }
1408 return _tx_dst_param(tx, param);
1409 }
1410
1411 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1412 tx_apply_dst0_modifiers(struct shader_translator *tx)
1413 {
1414 struct ureg_dst rdst;
1415 float f;
1416
1417 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1418 return;
1419 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1420
1421 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1422
1423 if (tx->insn.dst[0].shift < 0)
1424 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1425 else
1426 f = 1 << tx->insn.dst[0].shift;
1427
1428 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1429 }
1430
1431 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1432 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1433 {
1434 struct ureg_src src;
1435
1436 assert(!param->shift);
1437 assert(!(param->mod & NINED3DSPDM_SATURATE));
1438
1439 switch (param->file) {
1440 case D3DSPR_INPUT:
1441 if (IS_VS) {
1442 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1443 } else {
1444 assert(!param->rel);
1445 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1446 src = tx->regs.v[param->idx];
1447 }
1448 break;
1449 default:
1450 src = ureg_src(tx_dst_param(tx, param));
1451 break;
1452 }
1453 if (param->rel)
1454 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1455
1456 if (!param->mask)
1457 WARN("mask is 0, using identity swizzle\n");
1458
1459 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1460 char s[4];
1461 int n;
1462 int c;
1463 for (n = 0, c = 0; c < 4; ++c)
1464 if (param->mask & (1 << c))
1465 s[n++] = c;
1466 assert(n);
1467 for (c = n; c < 4; ++c)
1468 s[c] = s[n - 1];
1469 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1470 }
1471 return src;
1472 }
1473
1474 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1475 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1476 {
1477 struct ureg_program *ureg = tx->ureg;
1478 struct ureg_dst dst;
1479 struct ureg_src src[2];
1480 struct sm1_src_param *src_mat = &tx->insn.src[1];
1481 unsigned i;
1482
1483 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1484 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1485
1486 for (i = 0; i < n; i++)
1487 {
1488 const unsigned m = (1 << i);
1489
1490 src[1] = tx_src_param(tx, src_mat);
1491 src_mat->idx++;
1492
1493 if (!(dst.WriteMask & m))
1494 continue;
1495
1496 /* XXX: src == dst case ? */
1497
1498 switch (k) {
1499 case 3:
1500 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1501 break;
1502 case 4:
1503 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1504 break;
1505 default:
1506 DBG("invalid operation: M%ux%u\n", m, n);
1507 break;
1508 }
1509 }
1510
1511 return D3D_OK;
1512 }
1513
1514 #define VNOTSUPPORTED 0, 0
1515 #define V(maj, min) (((maj) << 8) | (min))
1516
1517 static inline const char *
d3dsio_to_string(unsigned opcode)1518 d3dsio_to_string( unsigned opcode )
1519 {
1520 static const char *names[] = {
1521 "NOP",
1522 "MOV",
1523 "ADD",
1524 "SUB",
1525 "MAD",
1526 "MUL",
1527 "RCP",
1528 "RSQ",
1529 "DP3",
1530 "DP4",
1531 "MIN",
1532 "MAX",
1533 "SLT",
1534 "SGE",
1535 "EXP",
1536 "LOG",
1537 "LIT",
1538 "DST",
1539 "LRP",
1540 "FRC",
1541 "M4x4",
1542 "M4x3",
1543 "M3x4",
1544 "M3x3",
1545 "M3x2",
1546 "CALL",
1547 "CALLNZ",
1548 "LOOP",
1549 "RET",
1550 "ENDLOOP",
1551 "LABEL",
1552 "DCL",
1553 "POW",
1554 "CRS",
1555 "SGN",
1556 "ABS",
1557 "NRM",
1558 "SINCOS",
1559 "REP",
1560 "ENDREP",
1561 "IF",
1562 "IFC",
1563 "ELSE",
1564 "ENDIF",
1565 "BREAK",
1566 "BREAKC",
1567 "MOVA",
1568 "DEFB",
1569 "DEFI",
1570 NULL,
1571 NULL,
1572 NULL,
1573 NULL,
1574 NULL,
1575 NULL,
1576 NULL,
1577 NULL,
1578 NULL,
1579 NULL,
1580 NULL,
1581 NULL,
1582 NULL,
1583 NULL,
1584 NULL,
1585 "TEXCOORD",
1586 "TEXKILL",
1587 "TEX",
1588 "TEXBEM",
1589 "TEXBEML",
1590 "TEXREG2AR",
1591 "TEXREG2GB",
1592 "TEXM3x2PAD",
1593 "TEXM3x2TEX",
1594 "TEXM3x3PAD",
1595 "TEXM3x3TEX",
1596 NULL,
1597 "TEXM3x3SPEC",
1598 "TEXM3x3VSPEC",
1599 "EXPP",
1600 "LOGP",
1601 "CND",
1602 "DEF",
1603 "TEXREG2RGB",
1604 "TEXDP3TEX",
1605 "TEXM3x2DEPTH",
1606 "TEXDP3",
1607 "TEXM3x3",
1608 "TEXDEPTH",
1609 "CMP",
1610 "BEM",
1611 "DP2ADD",
1612 "DSX",
1613 "DSY",
1614 "TEXLDD",
1615 "SETP",
1616 "TEXLDL",
1617 "BREAKP"
1618 };
1619
1620 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1621
1622 switch (opcode) {
1623 case D3DSIO_PHASE: return "PHASE";
1624 case D3DSIO_COMMENT: return "COMMENT";
1625 case D3DSIO_END: return "END";
1626 default:
1627 return NULL;
1628 }
1629 }
1630
1631 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1632 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1633 (inst).vert_version.max | \
1634 (inst).frag_version.min | \
1635 (inst).frag_version.max)
1636
1637 #define SPECIAL(name) \
1638 NineTranslateInstruction_##name
1639
1640 #define DECL_SPECIAL(name) \
1641 static HRESULT \
1642 NineTranslateInstruction_##name( struct shader_translator *tx )
1643
1644 static HRESULT
1645 NineTranslateInstruction_Generic(struct shader_translator *);
1646
DECL_SPECIAL(NOP)1647 DECL_SPECIAL(NOP)
1648 {
1649 /* Nothing to do. NOP was used to avoid hangs
1650 * with very old d3d drivers. */
1651 return D3D_OK;
1652 }
1653
DECL_SPECIAL(SUB)1654 DECL_SPECIAL(SUB)
1655 {
1656 struct ureg_program *ureg = tx->ureg;
1657 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1658 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1659 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1660
1661 ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1662 return D3D_OK;
1663 }
1664
DECL_SPECIAL(ABS)1665 DECL_SPECIAL(ABS)
1666 {
1667 struct ureg_program *ureg = tx->ureg;
1668 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1669 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1670
1671 ureg_MOV(ureg, dst, ureg_abs(src));
1672 return D3D_OK;
1673 }
1674
DECL_SPECIAL(XPD)1675 DECL_SPECIAL(XPD)
1676 {
1677 struct ureg_program *ureg = tx->ureg;
1678 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1679 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1680 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1681
1682 ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1683 ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1684 TGSI_SWIZZLE_X, 0),
1685 ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1686 TGSI_SWIZZLE_Y, 0));
1687 ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1688 ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1689 TGSI_SWIZZLE_Y, 0),
1690 ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1691 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1692 ureg_src(dst));
1693 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1694 ureg_imm1f(ureg, 1));
1695 return D3D_OK;
1696 }
1697
DECL_SPECIAL(M4x4)1698 DECL_SPECIAL(M4x4)
1699 {
1700 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1701 }
1702
DECL_SPECIAL(M4x3)1703 DECL_SPECIAL(M4x3)
1704 {
1705 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1706 }
1707
DECL_SPECIAL(M3x4)1708 DECL_SPECIAL(M3x4)
1709 {
1710 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1711 }
1712
DECL_SPECIAL(M3x3)1713 DECL_SPECIAL(M3x3)
1714 {
1715 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1716 }
1717
DECL_SPECIAL(M3x2)1718 DECL_SPECIAL(M3x2)
1719 {
1720 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1721 }
1722
DECL_SPECIAL(CMP)1723 DECL_SPECIAL(CMP)
1724 {
1725 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1726 tx_src_param(tx, &tx->insn.src[0]),
1727 tx_src_param(tx, &tx->insn.src[2]),
1728 tx_src_param(tx, &tx->insn.src[1]));
1729 return D3D_OK;
1730 }
1731
DECL_SPECIAL(CND)1732 DECL_SPECIAL(CND)
1733 {
1734 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1735 struct ureg_dst cgt;
1736 struct ureg_src cnd;
1737
1738 /* the coissue flag was a tip for compilers to advise to
1739 * execute two operations at the same time, in cases
1740 * the two executions had same dst with different channels.
1741 * It has no effect on current hw. However it seems CND
1742 * is affected. The handling of this very specific case
1743 * handled below mimick wine behaviour */
1744 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1745 ureg_MOV(tx->ureg,
1746 dst, tx_src_param(tx, &tx->insn.src[1]));
1747 return D3D_OK;
1748 }
1749
1750 cnd = tx_src_param(tx, &tx->insn.src[0]);
1751 cgt = tx_scratch(tx);
1752
1753 if (tx->version.major == 1 && tx->version.minor < 4)
1754 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1755
1756 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1757
1758 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1759 tx_src_param(tx, &tx->insn.src[1]),
1760 tx_src_param(tx, &tx->insn.src[2]));
1761 return D3D_OK;
1762 }
1763
DECL_SPECIAL(CALL)1764 DECL_SPECIAL(CALL)
1765 {
1766 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1767 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1768 return D3D_OK;
1769 }
1770
DECL_SPECIAL(CALLNZ)1771 DECL_SPECIAL(CALLNZ)
1772 {
1773 struct ureg_program *ureg = tx->ureg;
1774 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1775
1776 if (!tx->native_integers)
1777 ureg_IF(ureg, src, tx_cond(tx));
1778 else
1779 ureg_UIF(ureg, src, tx_cond(tx));
1780 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1781 tx_endcond(tx);
1782 ureg_ENDIF(ureg);
1783 return D3D_OK;
1784 }
1785
DECL_SPECIAL(LOOP)1786 DECL_SPECIAL(LOOP)
1787 {
1788 struct ureg_program *ureg = tx->ureg;
1789 unsigned *label;
1790 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1791 struct ureg_dst ctr;
1792 struct ureg_dst tmp;
1793 struct ureg_src ctrx;
1794
1795 label = tx_bgnloop(tx);
1796 ctr = tx_get_loopctr(tx, TRUE);
1797 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1798
1799 /* src: num_iterations - start_value of al - step for al - 0 */
1800 ureg_MOV(ureg, ctr, src);
1801 ureg_BGNLOOP(tx->ureg, label);
1802 tmp = tx_scratch_scalar(tx);
1803 /* Initially ctr.x contains the number of iterations.
1804 * ctr.y will contain the updated value of al.
1805 * We decrease ctr.x at the end of every iteration,
1806 * and stop when it reaches 0. */
1807
1808 if (!tx->native_integers) {
1809 /* case src and ctr contain floats */
1810 /* to avoid precision issue, we stop when ctr <= 0.5 */
1811 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1812 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1813 } else {
1814 /* case src and ctr contain integers */
1815 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1816 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1817 }
1818 ureg_BRK(ureg);
1819 tx_endcond(tx);
1820 ureg_ENDIF(ureg);
1821 return D3D_OK;
1822 }
1823
DECL_SPECIAL(RET)1824 DECL_SPECIAL(RET)
1825 {
1826 /* RET as a last instruction could be safely ignored.
1827 * Remove it to prevent crashes/warnings in case underlying
1828 * driver doesn't implement arbitrary returns.
1829 */
1830 if (*(tx->parse_next) != NINED3DSP_END) {
1831 ureg_RET(tx->ureg);
1832 }
1833 return D3D_OK;
1834 }
1835
DECL_SPECIAL(ENDLOOP)1836 DECL_SPECIAL(ENDLOOP)
1837 {
1838 struct ureg_program *ureg = tx->ureg;
1839 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1840 struct ureg_dst dst_ctrx, dst_al;
1841 struct ureg_src src_ctr, al_counter;
1842
1843 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1844 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1845 src_ctr = ureg_src(ctr);
1846 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1847
1848 /* ctr.x -= 1
1849 * ctr.y (aL) += step */
1850 if (!tx->native_integers) {
1851 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1852 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1853 } else {
1854 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1855 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1856 }
1857 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1858 return D3D_OK;
1859 }
1860
DECL_SPECIAL(LABEL)1861 DECL_SPECIAL(LABEL)
1862 {
1863 unsigned k = tx->num_inst_labels;
1864 unsigned n = tx->insn.src[0].idx;
1865 assert(n < 2048);
1866 if (n >= k)
1867 tx->inst_labels = REALLOC(tx->inst_labels,
1868 k * sizeof(tx->inst_labels[0]),
1869 n * sizeof(tx->inst_labels[0]));
1870
1871 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1872 return D3D_OK;
1873 }
1874
DECL_SPECIAL(SINCOS)1875 DECL_SPECIAL(SINCOS)
1876 {
1877 struct ureg_program *ureg = tx->ureg;
1878 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1879 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1880 struct ureg_dst tmp = tx_scratch_scalar(tx);
1881
1882 assert(!(dst.WriteMask & 0xc));
1883
1884 /* Copying to a temporary register avoids src/dst aliasing.
1885 * src is supposed to have replicated swizzle. */
1886 ureg_MOV(ureg, tmp, src);
1887
1888 /* z undefined, w untouched */
1889 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1890 tx_src_scalar(tmp));
1891 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1892 tx_src_scalar(tmp));
1893 return D3D_OK;
1894 }
1895
DECL_SPECIAL(SGN)1896 DECL_SPECIAL(SGN)
1897 {
1898 ureg_SSG(tx->ureg,
1899 tx_dst_param(tx, &tx->insn.dst[0]),
1900 tx_src_param(tx, &tx->insn.src[0]));
1901 return D3D_OK;
1902 }
1903
DECL_SPECIAL(REP)1904 DECL_SPECIAL(REP)
1905 {
1906 struct ureg_program *ureg = tx->ureg;
1907 unsigned *label;
1908 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1909 struct ureg_dst ctr;
1910 struct ureg_dst tmp;
1911 struct ureg_src ctrx;
1912
1913 label = tx_bgnloop(tx);
1914 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1915 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1916
1917 /* NOTE: rep must be constant, so we don't have to save the count */
1918 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1919
1920 /* rep: num_iterations - 0 - 0 - 0 */
1921 ureg_MOV(ureg, ctr, rep);
1922 ureg_BGNLOOP(ureg, label);
1923 tmp = tx_scratch_scalar(tx);
1924 /* Initially ctr.x contains the number of iterations.
1925 * We decrease ctr.x at the end of every iteration,
1926 * and stop when it reaches 0. */
1927
1928 if (!tx->native_integers) {
1929 /* case src and ctr contain floats */
1930 /* to avoid precision issue, we stop when ctr <= 0.5 */
1931 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1932 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1933 } else {
1934 /* case src and ctr contain integers */
1935 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1936 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1937 }
1938 ureg_BRK(ureg);
1939 tx_endcond(tx);
1940 ureg_ENDIF(ureg);
1941
1942 return D3D_OK;
1943 }
1944
DECL_SPECIAL(ENDREP)1945 DECL_SPECIAL(ENDREP)
1946 {
1947 struct ureg_program *ureg = tx->ureg;
1948 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1949 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1950 struct ureg_src src_ctr = ureg_src(ctr);
1951
1952 /* ctr.x -= 1 */
1953 if (!tx->native_integers)
1954 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1955 else
1956 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1957
1958 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1959 return D3D_OK;
1960 }
1961
DECL_SPECIAL(ENDIF)1962 DECL_SPECIAL(ENDIF)
1963 {
1964 tx_endcond(tx);
1965 ureg_ENDIF(tx->ureg);
1966 return D3D_OK;
1967 }
1968
DECL_SPECIAL(IF)1969 DECL_SPECIAL(IF)
1970 {
1971 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1972
1973 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1974 ureg_UIF(tx->ureg, src, tx_cond(tx));
1975 else
1976 ureg_IF(tx->ureg, src, tx_cond(tx));
1977
1978 return D3D_OK;
1979 }
1980
1981 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)1982 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1983 {
1984 switch (flags) {
1985 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1986 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1987 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1988 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1989 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1990 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1991 default:
1992 assert(!"invalid comparison flags");
1993 return TGSI_OPCODE_SGT;
1994 }
1995 }
1996
DECL_SPECIAL(IFC)1997 DECL_SPECIAL(IFC)
1998 {
1999 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2000 struct ureg_src src[2];
2001 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2002 src[0] = tx_src_param(tx, &tx->insn.src[0]);
2003 src[1] = tx_src_param(tx, &tx->insn.src[1]);
2004 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2005 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2006 return D3D_OK;
2007 }
2008
DECL_SPECIAL(ELSE)2009 DECL_SPECIAL(ELSE)
2010 {
2011 ureg_ELSE(tx->ureg, tx_elsecond(tx));
2012 return D3D_OK;
2013 }
2014
DECL_SPECIAL(BREAKC)2015 DECL_SPECIAL(BREAKC)
2016 {
2017 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2018 struct ureg_src src[2];
2019 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2020 src[0] = tx_src_param(tx, &tx->insn.src[0]);
2021 src[1] = tx_src_param(tx, &tx->insn.src[1]);
2022 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2023 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2024 ureg_BRK(tx->ureg);
2025 tx_endcond(tx);
2026 ureg_ENDIF(tx->ureg);
2027 return D3D_OK;
2028 }
2029
2030 static const char *sm1_declusage_names[] =
2031 {
2032 [D3DDECLUSAGE_POSITION] = "POSITION",
2033 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
2034 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2035 [D3DDECLUSAGE_NORMAL] = "NORMAL",
2036 [D3DDECLUSAGE_PSIZE] = "PSIZE",
2037 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2038 [D3DDECLUSAGE_TANGENT] = "TANGENT",
2039 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2040 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2041 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2042 [D3DDECLUSAGE_COLOR] = "COLOR",
2043 [D3DDECLUSAGE_FOG] = "FOG",
2044 [D3DDECLUSAGE_DEPTH] = "DEPTH",
2045 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2046 };
2047
2048 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)2049 sm1_to_nine_declusage(struct sm1_semantic *dcl)
2050 {
2051 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2052 }
2053
2054 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,boolean tc,struct sm1_semantic * dcl)2055 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2056 boolean tc,
2057 struct sm1_semantic *dcl)
2058 {
2059 BYTE index = dcl->usage_idx;
2060
2061 /* For everything that is not matching to a TGSI_SEMANTIC_****,
2062 * we match to a TGSI_SEMANTIC_GENERIC with index.
2063 *
2064 * The index can be anything UINT16 and usage_idx is BYTE,
2065 * so we can fit everything. It doesn't matter if indices
2066 * are close together or low.
2067 *
2068 *
2069 * POSITION >= 1: 10 * index + 7
2070 * COLOR >= 2: 10 * (index-1) + 8
2071 * FOG: 16
2072 * TEXCOORD[0..15]: index
2073 * BLENDWEIGHT: 10 * index + 19
2074 * BLENDINDICES: 10 * index + 20
2075 * NORMAL: 10 * index + 21
2076 * TANGENT: 10 * index + 22
2077 * BINORMAL: 10 * index + 23
2078 * TESSFACTOR: 10 * index + 24
2079 */
2080
2081 switch (dcl->usage) {
2082 case D3DDECLUSAGE_POSITION:
2083 case D3DDECLUSAGE_POSITIONT:
2084 case D3DDECLUSAGE_DEPTH:
2085 if (index == 0) {
2086 sem->Name = TGSI_SEMANTIC_POSITION;
2087 sem->Index = 0;
2088 } else {
2089 sem->Name = TGSI_SEMANTIC_GENERIC;
2090 sem->Index = 10 * index + 7;
2091 }
2092 break;
2093 case D3DDECLUSAGE_COLOR:
2094 if (index < 2) {
2095 sem->Name = TGSI_SEMANTIC_COLOR;
2096 sem->Index = index;
2097 } else {
2098 sem->Name = TGSI_SEMANTIC_GENERIC;
2099 sem->Index = 10 * (index-1) + 8;
2100 }
2101 break;
2102 case D3DDECLUSAGE_FOG:
2103 assert(index == 0);
2104 sem->Name = TGSI_SEMANTIC_GENERIC;
2105 sem->Index = 16;
2106 break;
2107 case D3DDECLUSAGE_PSIZE:
2108 assert(index == 0);
2109 sem->Name = TGSI_SEMANTIC_PSIZE;
2110 sem->Index = 0;
2111 break;
2112 case D3DDECLUSAGE_TEXCOORD:
2113 assert(index < 16);
2114 if (index < 8 && tc)
2115 sem->Name = TGSI_SEMANTIC_TEXCOORD;
2116 else
2117 sem->Name = TGSI_SEMANTIC_GENERIC;
2118 sem->Index = index;
2119 break;
2120 case D3DDECLUSAGE_BLENDWEIGHT:
2121 sem->Name = TGSI_SEMANTIC_GENERIC;
2122 sem->Index = 10 * index + 19;
2123 break;
2124 case D3DDECLUSAGE_BLENDINDICES:
2125 sem->Name = TGSI_SEMANTIC_GENERIC;
2126 sem->Index = 10 * index + 20;
2127 break;
2128 case D3DDECLUSAGE_NORMAL:
2129 sem->Name = TGSI_SEMANTIC_GENERIC;
2130 sem->Index = 10 * index + 21;
2131 break;
2132 case D3DDECLUSAGE_TANGENT:
2133 sem->Name = TGSI_SEMANTIC_GENERIC;
2134 sem->Index = 10 * index + 22;
2135 break;
2136 case D3DDECLUSAGE_BINORMAL:
2137 sem->Name = TGSI_SEMANTIC_GENERIC;
2138 sem->Index = 10 * index + 23;
2139 break;
2140 case D3DDECLUSAGE_TESSFACTOR:
2141 sem->Name = TGSI_SEMANTIC_GENERIC;
2142 sem->Index = 10 * index + 24;
2143 break;
2144 case D3DDECLUSAGE_SAMPLE:
2145 sem->Name = TGSI_SEMANTIC_COUNT;
2146 sem->Index = 0;
2147 break;
2148 default:
2149 unreachable("Invalid DECLUSAGE.");
2150 break;
2151 }
2152 }
2153
2154 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2155 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2156 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2157 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2158 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2159 d3dstt_to_tgsi_tex(BYTE sampler_type)
2160 {
2161 switch (sampler_type) {
2162 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2163 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2164 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2165 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2166 default:
2167 assert(0);
2168 return TGSI_TEXTURE_UNKNOWN;
2169 }
2170 }
2171 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2172 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2173 {
2174 switch (sampler_type) {
2175 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2176 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2177 case NINED3DSTT_VOLUME:
2178 case NINED3DSTT_CUBE:
2179 default:
2180 assert(0);
2181 return TGSI_TEXTURE_UNKNOWN;
2182 }
2183 }
2184 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2185 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2186 {
2187 boolean shadow = !!(info->sampler_mask_shadow & (1 << stage));
2188 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2189 case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2190 case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2191 case 3: return TGSI_TEXTURE_3D;
2192 default:
2193 return TGSI_TEXTURE_CUBE;
2194 }
2195 }
2196
2197 static const char *
sm1_sampler_type_name(BYTE sampler_type)2198 sm1_sampler_type_name(BYTE sampler_type)
2199 {
2200 switch (sampler_type) {
2201 case NINED3DSTT_1D: return "1D";
2202 case NINED3DSTT_2D: return "2D";
2203 case NINED3DSTT_VOLUME: return "VOLUME";
2204 case NINED3DSTT_CUBE: return "CUBE";
2205 default:
2206 return "(D3DSTT_?)";
2207 }
2208 }
2209
2210 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2211 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2212 {
2213 switch (sem->Name) {
2214 case TGSI_SEMANTIC_POSITION:
2215 case TGSI_SEMANTIC_NORMAL:
2216 return TGSI_INTERPOLATE_LINEAR;
2217 case TGSI_SEMANTIC_BCOLOR:
2218 case TGSI_SEMANTIC_COLOR:
2219 return TGSI_INTERPOLATE_COLOR;
2220 case TGSI_SEMANTIC_FOG:
2221 case TGSI_SEMANTIC_GENERIC:
2222 case TGSI_SEMANTIC_TEXCOORD:
2223 case TGSI_SEMANTIC_CLIPDIST:
2224 case TGSI_SEMANTIC_CLIPVERTEX:
2225 return TGSI_INTERPOLATE_PERSPECTIVE;
2226 case TGSI_SEMANTIC_EDGEFLAG:
2227 case TGSI_SEMANTIC_FACE:
2228 case TGSI_SEMANTIC_INSTANCEID:
2229 case TGSI_SEMANTIC_PCOORD:
2230 case TGSI_SEMANTIC_PRIMID:
2231 case TGSI_SEMANTIC_PSIZE:
2232 case TGSI_SEMANTIC_VERTEXID:
2233 return TGSI_INTERPOLATE_CONSTANT;
2234 default:
2235 assert(0);
2236 return TGSI_INTERPOLATE_CONSTANT;
2237 }
2238 }
2239
DECL_SPECIAL(DCL)2240 DECL_SPECIAL(DCL)
2241 {
2242 struct ureg_program *ureg = tx->ureg;
2243 boolean is_input;
2244 boolean is_sampler;
2245 struct tgsi_declaration_semantic tgsi;
2246 struct sm1_semantic sem;
2247 sm1_read_semantic(tx, &sem);
2248
2249 is_input = sem.reg.file == D3DSPR_INPUT;
2250 is_sampler =
2251 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2252
2253 DUMP("DCL ");
2254 sm1_dump_dst_param(&sem.reg);
2255 if (is_sampler)
2256 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2257 else
2258 if (tx->version.major >= 3)
2259 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2260 else
2261 if (sem.usage | sem.usage_idx)
2262 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2263 else
2264 DUMP("\n");
2265
2266 if (is_sampler) {
2267 const unsigned m = 1 << sem.reg.idx;
2268 ureg_DECL_sampler(ureg, sem.reg.idx);
2269 tx->info->sampler_mask |= m;
2270 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2271 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2272 d3dstt_to_tgsi_tex(sem.sampler_type);
2273 return D3D_OK;
2274 }
2275
2276 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2277 if (IS_VS) {
2278 if (is_input) {
2279 /* linkage outside of shader with vertex declaration */
2280 ureg_DECL_vs_input(ureg, sem.reg.idx);
2281 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2282 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2283 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2284 /* NOTE: preserving order in case of indirect access */
2285 } else
2286 if (tx->version.major >= 3) {
2287 /* SM2 output semantic determined by file */
2288 assert(sem.reg.mask != 0);
2289 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2290 tx->info->position_t = TRUE;
2291 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2292 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2293 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2294 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2295 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2296 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2297 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2298 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2299 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2300 }
2301
2302 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2303 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2304 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2305 }
2306 }
2307 } else {
2308 if (is_input && tx->version.major >= 3) {
2309 unsigned interp_location = 0;
2310 /* SM3 only, SM2 input semantic determined by file */
2311 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2312 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2313 /* PositionT and tessfactor forbidden */
2314 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2315 return D3DERR_INVALIDCALL;
2316
2317 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2318 /* Position0 is forbidden (likely because vPos already does that) */
2319 if (sem.usage == D3DDECLUSAGE_POSITION)
2320 return D3DERR_INVALIDCALL;
2321 /* Following code is for depth */
2322 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2323 return D3D_OK;
2324 }
2325
2326 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2327 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2328 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2329
2330 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
2331 ureg, tgsi.Name, tgsi.Index,
2332 nine_tgsi_to_interp_mode(&tgsi),
2333 interp_location, 0, 1);
2334 } else
2335 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2336 /* FragColor or FragDepth */
2337 assert(sem.reg.mask != 0);
2338 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2339 0, 1);
2340 }
2341 }
2342 return D3D_OK;
2343 }
2344
DECL_SPECIAL(DEF)2345 DECL_SPECIAL(DEF)
2346 {
2347 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2348 return D3D_OK;
2349 }
2350
DECL_SPECIAL(DEFB)2351 DECL_SPECIAL(DEFB)
2352 {
2353 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2354 return D3D_OK;
2355 }
2356
DECL_SPECIAL(DEFI)2357 DECL_SPECIAL(DEFI)
2358 {
2359 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2360 return D3D_OK;
2361 }
2362
DECL_SPECIAL(POW)2363 DECL_SPECIAL(POW)
2364 {
2365 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2366 struct ureg_src src[2] = {
2367 tx_src_param(tx, &tx->insn.src[0]),
2368 tx_src_param(tx, &tx->insn.src[1])
2369 };
2370 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2371 return D3D_OK;
2372 }
2373
2374 /* Tests results on Win 10:
2375 * NV (NVIDIA GeForce GT 635M)
2376 * AMD (AMD Radeon HD 7730M)
2377 * INTEL (Intel(R) HD Graphics 4000)
2378 * PS2 and PS3:
2379 * RCP and RSQ can generate inf on NV and AMD.
2380 * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2381 * NV: log not clamped
2382 * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2383 * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2384 * All devices have 0*anything = 0
2385 *
2386 * INTEL VS2 and VS3: same behaviour.
2387 * Some differences VS2 and VS3 for constants defined with inf/NaN.
2388 * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2389 * VS2 seems to clamp to zero (may be test failure).
2390 * AMD VS2: unknown, VS3: very likely behaviour of PS3
2391 * NV VS2 and VS3: very likely behaviour of PS3
2392 * For both, Inf in VS becomes NaN is PS
2393 * "Very likely" because the test was less extensive.
2394 *
2395 * Thus all clamping can be removed for shaders 2 and 3,
2396 * as long as 0*anything = 0.
2397 * Else clamps to enforce 0*anything = 0 (anything being then
2398 * neither inf or NaN, the user being unlikely to pass them
2399 * as constant).
2400 * The status for VS1 and PS1 is unknown.
2401 */
2402
DECL_SPECIAL(RCP)2403 DECL_SPECIAL(RCP)
2404 {
2405 struct ureg_program *ureg = tx->ureg;
2406 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2407 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2408 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2409 ureg_RCP(ureg, tmp, src);
2410 if (!tx->mul_zero_wins) {
2411 /* FLT_MAX has issues with Rayman */
2412 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2413 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2414 }
2415 return D3D_OK;
2416 }
2417
DECL_SPECIAL(RSQ)2418 DECL_SPECIAL(RSQ)
2419 {
2420 struct ureg_program *ureg = tx->ureg;
2421 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2422 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2423 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2424 ureg_RSQ(ureg, tmp, ureg_abs(src));
2425 if (!tx->mul_zero_wins)
2426 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2427 return D3D_OK;
2428 }
2429
DECL_SPECIAL(LOG)2430 DECL_SPECIAL(LOG)
2431 {
2432 struct ureg_program *ureg = tx->ureg;
2433 struct ureg_dst tmp = tx_scratch_scalar(tx);
2434 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2435 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2436 ureg_LG2(ureg, tmp, ureg_abs(src));
2437 if (tx->mul_zero_wins) {
2438 ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2439 } else {
2440 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2441 }
2442 return D3D_OK;
2443 }
2444
DECL_SPECIAL(LIT)2445 DECL_SPECIAL(LIT)
2446 {
2447 struct ureg_program *ureg = tx->ureg;
2448 struct ureg_dst tmp = tx_scratch(tx);
2449 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2450 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2451 ureg_LIT(ureg, tmp, src);
2452 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2453 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2454 * it 0^0 if src.w=0, which value is driver dependent. */
2455 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2456 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2457 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2458 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2459 return D3D_OK;
2460 }
2461
DECL_SPECIAL(NRM)2462 DECL_SPECIAL(NRM)
2463 {
2464 struct ureg_program *ureg = tx->ureg;
2465 struct ureg_dst tmp = tx_scratch_scalar(tx);
2466 struct ureg_src nrm = tx_src_scalar(tmp);
2467 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2468 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2469 ureg_DP3(ureg, tmp, src, src);
2470 ureg_RSQ(ureg, tmp, nrm);
2471 if (!tx->mul_zero_wins)
2472 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2473 ureg_MUL(ureg, dst, src, nrm);
2474 return D3D_OK;
2475 }
2476
DECL_SPECIAL(DP2ADD)2477 DECL_SPECIAL(DP2ADD)
2478 {
2479 struct ureg_dst tmp = tx_scratch_scalar(tx);
2480 struct ureg_src dp2 = tx_src_scalar(tmp);
2481 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2482 struct ureg_src src[3];
2483 int i;
2484 for (i = 0; i < 3; ++i)
2485 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2486 assert_replicate_swizzle(&src[2]);
2487
2488 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2489 ureg_ADD(tx->ureg, dst, src[2], dp2);
2490
2491 return D3D_OK;
2492 }
2493
DECL_SPECIAL(TEXCOORD)2494 DECL_SPECIAL(TEXCOORD)
2495 {
2496 struct ureg_program *ureg = tx->ureg;
2497 const unsigned s = tx->insn.dst[0].idx;
2498 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2499
2500 tx_texcoord_alloc(tx, s);
2501 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2502 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2503
2504 return D3D_OK;
2505 }
2506
DECL_SPECIAL(TEXCOORD_ps14)2507 DECL_SPECIAL(TEXCOORD_ps14)
2508 {
2509 struct ureg_program *ureg = tx->ureg;
2510 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2511 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2512
2513 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2514
2515 ureg_MOV(ureg, dst, src);
2516
2517 return D3D_OK;
2518 }
2519
DECL_SPECIAL(TEXKILL)2520 DECL_SPECIAL(TEXKILL)
2521 {
2522 struct ureg_src reg;
2523
2524 if (tx->version.major > 1 || tx->version.minor > 3) {
2525 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2526 } else {
2527 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2528 reg = tx->regs.vT[tx->insn.dst[0].idx];
2529 }
2530 if (tx->version.major < 2)
2531 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2532 ureg_KILL_IF(tx->ureg, reg);
2533
2534 return D3D_OK;
2535 }
2536
DECL_SPECIAL(TEXBEM)2537 DECL_SPECIAL(TEXBEM)
2538 {
2539 struct ureg_program *ureg = tx->ureg;
2540 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2541 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2542 struct ureg_dst tmp, tmp2, texcoord;
2543 struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2544 struct ureg_src bumpenvlscale, bumpenvloffset;
2545 const int m = tx->insn.dst[0].idx;
2546
2547 assert(tx->version.major == 1);
2548
2549 sample = ureg_DECL_sampler(ureg, m);
2550 tx->info->sampler_mask |= 1 << m;
2551
2552 tx_texcoord_alloc(tx, m);
2553
2554 tmp = tx_scratch(tx);
2555 tmp2 = tx_scratch(tx);
2556 texcoord = tx_scratch(tx);
2557 /*
2558 * Bump-env-matrix:
2559 * 00 is X
2560 * 01 is Y
2561 * 10 is Z
2562 * 11 is W
2563 */
2564 c8m = nine_float_constant_src(tx, 8+m);
2565 c16m2 = nine_float_constant_src(tx, 8+8+m/2);
2566
2567 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2568 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2569 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2570 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2571
2572 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2573 if (m % 2 == 0) {
2574 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2575 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2576 } else {
2577 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2578 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2579 }
2580
2581 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2582
2583 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2584 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2585 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2586 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2587 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2588 NINE_APPLY_SWIZZLE(src, Y),
2589 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2590
2591 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2592 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2593 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2594 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2595 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2596 NINE_APPLY_SWIZZLE(src, Y),
2597 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2598
2599 /* Now the texture coordinates are in tmp.xy */
2600
2601 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2602 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2603 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2604 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2605 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2606 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2607 bumpenvlscale, bumpenvloffset);
2608 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2609 }
2610
2611 tx->info->bumpenvmat_needed = 1;
2612
2613 return D3D_OK;
2614 }
2615
DECL_SPECIAL(TEXREG2AR)2616 DECL_SPECIAL(TEXREG2AR)
2617 {
2618 struct ureg_program *ureg = tx->ureg;
2619 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2620 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2621 struct ureg_src sample;
2622 const int m = tx->insn.dst[0].idx;
2623 ASSERTED const int n = tx->insn.src[0].idx;
2624 assert(m >= 0 && m > n);
2625
2626 sample = ureg_DECL_sampler(ureg, m);
2627 tx->info->sampler_mask |= 1 << m;
2628 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2629
2630 return D3D_OK;
2631 }
2632
DECL_SPECIAL(TEXREG2GB)2633 DECL_SPECIAL(TEXREG2GB)
2634 {
2635 struct ureg_program *ureg = tx->ureg;
2636 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2637 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2638 struct ureg_src sample;
2639 const int m = tx->insn.dst[0].idx;
2640 ASSERTED const int n = tx->insn.src[0].idx;
2641 assert(m >= 0 && m > n);
2642
2643 sample = ureg_DECL_sampler(ureg, m);
2644 tx->info->sampler_mask |= 1 << m;
2645 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2646
2647 return D3D_OK;
2648 }
2649
DECL_SPECIAL(TEXM3x2PAD)2650 DECL_SPECIAL(TEXM3x2PAD)
2651 {
2652 return D3D_OK; /* this is just padding */
2653 }
2654
DECL_SPECIAL(TEXM3x2TEX)2655 DECL_SPECIAL(TEXM3x2TEX)
2656 {
2657 struct ureg_program *ureg = tx->ureg;
2658 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2659 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2660 struct ureg_src sample;
2661 const int m = tx->insn.dst[0].idx - 1;
2662 ASSERTED const int n = tx->insn.src[0].idx;
2663 assert(m >= 0 && m > n);
2664
2665 tx_texcoord_alloc(tx, m);
2666 tx_texcoord_alloc(tx, m+1);
2667
2668 /* performs the matrix multiplication */
2669 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2670 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2671
2672 sample = ureg_DECL_sampler(ureg, m + 1);
2673 tx->info->sampler_mask |= 1 << (m + 1);
2674 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2675
2676 return D3D_OK;
2677 }
2678
DECL_SPECIAL(TEXM3x3PAD)2679 DECL_SPECIAL(TEXM3x3PAD)
2680 {
2681 return D3D_OK; /* this is just padding */
2682 }
2683
DECL_SPECIAL(TEXM3x3SPEC)2684 DECL_SPECIAL(TEXM3x3SPEC)
2685 {
2686 struct ureg_program *ureg = tx->ureg;
2687 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2688 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2689 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2690 struct ureg_src sample;
2691 struct ureg_dst tmp;
2692 const int m = tx->insn.dst[0].idx - 2;
2693 ASSERTED const int n = tx->insn.src[0].idx;
2694 assert(m >= 0 && m > n);
2695
2696 tx_texcoord_alloc(tx, m);
2697 tx_texcoord_alloc(tx, m+1);
2698 tx_texcoord_alloc(tx, m+2);
2699
2700 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2701 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2702 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2703
2704 sample = ureg_DECL_sampler(ureg, m + 2);
2705 tx->info->sampler_mask |= 1 << (m + 2);
2706 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2707
2708 /* At this step, dst = N = (u', w', z').
2709 * We want dst to be the texture sampled at (u'', w'', z''), with
2710 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2711 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2712 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2713 /* at this step tmp.x = 1/N.N */
2714 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2715 /* at this step tmp.y = N.E */
2716 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2717 /* at this step tmp.x = N.E/N.N */
2718 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2719 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2720 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2721 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2722 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2723
2724 return D3D_OK;
2725 }
2726
DECL_SPECIAL(TEXREG2RGB)2727 DECL_SPECIAL(TEXREG2RGB)
2728 {
2729 struct ureg_program *ureg = tx->ureg;
2730 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2731 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2732 struct ureg_src sample;
2733 const int m = tx->insn.dst[0].idx;
2734 ASSERTED const int n = tx->insn.src[0].idx;
2735 assert(m >= 0 && m > n);
2736
2737 sample = ureg_DECL_sampler(ureg, m);
2738 tx->info->sampler_mask |= 1 << m;
2739 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2740
2741 return D3D_OK;
2742 }
2743
DECL_SPECIAL(TEXDP3TEX)2744 DECL_SPECIAL(TEXDP3TEX)
2745 {
2746 struct ureg_program *ureg = tx->ureg;
2747 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2748 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2749 struct ureg_dst tmp;
2750 struct ureg_src sample;
2751 const int m = tx->insn.dst[0].idx;
2752 ASSERTED const int n = tx->insn.src[0].idx;
2753 assert(m >= 0 && m > n);
2754
2755 tx_texcoord_alloc(tx, m);
2756
2757 tmp = tx_scratch(tx);
2758 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2759 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2760
2761 sample = ureg_DECL_sampler(ureg, m);
2762 tx->info->sampler_mask |= 1 << m;
2763 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2764
2765 return D3D_OK;
2766 }
2767
DECL_SPECIAL(TEXM3x2DEPTH)2768 DECL_SPECIAL(TEXM3x2DEPTH)
2769 {
2770 struct ureg_program *ureg = tx->ureg;
2771 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2772 struct ureg_dst tmp;
2773 const int m = tx->insn.dst[0].idx - 1;
2774 ASSERTED const int n = tx->insn.src[0].idx;
2775 assert(m >= 0 && m > n);
2776
2777 tx_texcoord_alloc(tx, m);
2778 tx_texcoord_alloc(tx, m+1);
2779
2780 tmp = tx_scratch(tx);
2781
2782 /* performs the matrix multiplication */
2783 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2784 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2785
2786 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2787 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2788 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2789 /* res = 'w' == 0 ? 1.0 : z/w */
2790 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2791 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2792 /* replace the depth for depth testing with the result */
2793 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2794 TGSI_WRITEMASK_Z, 0, 1);
2795 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2796 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2797 return D3D_OK;
2798 }
2799
DECL_SPECIAL(TEXDP3)2800 DECL_SPECIAL(TEXDP3)
2801 {
2802 struct ureg_program *ureg = tx->ureg;
2803 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2804 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2805 const int m = tx->insn.dst[0].idx;
2806 ASSERTED const int n = tx->insn.src[0].idx;
2807 assert(m >= 0 && m > n);
2808
2809 tx_texcoord_alloc(tx, m);
2810
2811 ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2812
2813 return D3D_OK;
2814 }
2815
DECL_SPECIAL(TEXM3x3)2816 DECL_SPECIAL(TEXM3x3)
2817 {
2818 struct ureg_program *ureg = tx->ureg;
2819 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2820 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2821 struct ureg_src sample;
2822 struct ureg_dst E, tmp;
2823 const int m = tx->insn.dst[0].idx - 2;
2824 ASSERTED const int n = tx->insn.src[0].idx;
2825 assert(m >= 0 && m > n);
2826
2827 tx_texcoord_alloc(tx, m);
2828 tx_texcoord_alloc(tx, m+1);
2829 tx_texcoord_alloc(tx, m+2);
2830
2831 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2832 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2833 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2834
2835 switch (tx->insn.opcode) {
2836 case D3DSIO_TEXM3x3:
2837 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2838 break;
2839 case D3DSIO_TEXM3x3TEX:
2840 sample = ureg_DECL_sampler(ureg, m + 2);
2841 tx->info->sampler_mask |= 1 << (m + 2);
2842 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2843 break;
2844 case D3DSIO_TEXM3x3VSPEC:
2845 sample = ureg_DECL_sampler(ureg, m + 2);
2846 tx->info->sampler_mask |= 1 << (m + 2);
2847 E = tx_scratch(tx);
2848 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2849 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2850 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2851 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2852 /* At this step, dst = N = (u', w', z').
2853 * We want dst to be the texture sampled at (u'', w'', z''), with
2854 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2855 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2856 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2857 /* at this step tmp.x = 1/N.N */
2858 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2859 /* at this step tmp.y = N.E */
2860 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2861 /* at this step tmp.x = N.E/N.N */
2862 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2863 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2864 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2865 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2866 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2867 break;
2868 default:
2869 return D3DERR_INVALIDCALL;
2870 }
2871 return D3D_OK;
2872 }
2873
DECL_SPECIAL(TEXDEPTH)2874 DECL_SPECIAL(TEXDEPTH)
2875 {
2876 struct ureg_program *ureg = tx->ureg;
2877 struct ureg_dst r5;
2878 struct ureg_src r5r, r5g;
2879
2880 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2881
2882 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2883 * r5 won't be used afterward, thus we can use r5.ba */
2884 r5 = tx->regs.r[5];
2885 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2886 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2887
2888 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2889 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2890 /* r5.r = r/g */
2891 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2892 r5r, ureg_imm1f(ureg, 1.0f));
2893 /* replace the depth for depth testing with the result */
2894 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2895 TGSI_WRITEMASK_Z, 0, 1);
2896 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2897
2898 return D3D_OK;
2899 }
2900
DECL_SPECIAL(BEM)2901 DECL_SPECIAL(BEM)
2902 {
2903 struct ureg_program *ureg = tx->ureg;
2904 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2905 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2906 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2907 struct ureg_src m00, m01, m10, m11, c8m;
2908 const int m = tx->insn.dst[0].idx;
2909 struct ureg_dst tmp = tx_scratch(tx);
2910 /*
2911 * Bump-env-matrix:
2912 * 00 is X
2913 * 01 is Y
2914 * 10 is Z
2915 * 11 is W
2916 */
2917 c8m = nine_float_constant_src(tx, 8+m);
2918 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2919 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2920 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2921 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2922 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2923 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2924 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2925 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2926 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2927 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2928
2929 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2930 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2931 NINE_APPLY_SWIZZLE(src1, X), src0);
2932 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2933 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2934 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2935 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2936
2937 tx->info->bumpenvmat_needed = 1;
2938
2939 return D3D_OK;
2940 }
2941
DECL_SPECIAL(TEXLD)2942 DECL_SPECIAL(TEXLD)
2943 {
2944 struct ureg_program *ureg = tx->ureg;
2945 unsigned target;
2946 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2947 struct ureg_src src[2] = {
2948 tx_src_param(tx, &tx->insn.src[0]),
2949 tx_src_param(tx, &tx->insn.src[1])
2950 };
2951 assert(tx->insn.src[1].idx >= 0 &&
2952 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2953 target = tx->sampler_targets[tx->insn.src[1].idx];
2954
2955 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
2956 return D3D_OK;
2957
2958 switch (tx->insn.flags) {
2959 case 0:
2960 ureg_TEX(ureg, dst, target, src[0], src[1]);
2961 break;
2962 case NINED3DSI_TEXLD_PROJECT:
2963 ureg_TXP(ureg, dst, target, src[0], src[1]);
2964 break;
2965 case NINED3DSI_TEXLD_BIAS:
2966 ureg_TXB(ureg, dst, target, src[0], src[1]);
2967 break;
2968 default:
2969 assert(0);
2970 return D3DERR_INVALIDCALL;
2971 }
2972 return D3D_OK;
2973 }
2974
DECL_SPECIAL(TEXLD_14)2975 DECL_SPECIAL(TEXLD_14)
2976 {
2977 struct ureg_program *ureg = tx->ureg;
2978 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2979 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2980 const unsigned s = tx->insn.dst[0].idx;
2981 const unsigned t = ps1x_sampler_type(tx->info, s);
2982
2983 tx->info->sampler_mask |= 1 << s;
2984 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2985
2986 return D3D_OK;
2987 }
2988
DECL_SPECIAL(TEX)2989 DECL_SPECIAL(TEX)
2990 {
2991 struct ureg_program *ureg = tx->ureg;
2992 const unsigned s = tx->insn.dst[0].idx;
2993 const unsigned t = ps1x_sampler_type(tx->info, s);
2994 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2995 struct ureg_src src[2];
2996
2997 tx_texcoord_alloc(tx, s);
2998
2999 src[0] = tx->regs.vT[s];
3000 src[1] = ureg_DECL_sampler(ureg, s);
3001 tx->info->sampler_mask |= 1 << s;
3002
3003 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
3004
3005 return D3D_OK;
3006 }
3007
DECL_SPECIAL(TEXLDD)3008 DECL_SPECIAL(TEXLDD)
3009 {
3010 unsigned target;
3011 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3012 struct ureg_src src[4] = {
3013 tx_src_param(tx, &tx->insn.src[0]),
3014 tx_src_param(tx, &tx->insn.src[1]),
3015 tx_src_param(tx, &tx->insn.src[2]),
3016 tx_src_param(tx, &tx->insn.src[3])
3017 };
3018 assert(tx->insn.src[1].idx >= 0 &&
3019 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3020 target = tx->sampler_targets[tx->insn.src[1].idx];
3021
3022 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3023 return D3D_OK;
3024
3025 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
3026 return D3D_OK;
3027 }
3028
DECL_SPECIAL(TEXLDL)3029 DECL_SPECIAL(TEXLDL)
3030 {
3031 unsigned target;
3032 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3033 struct ureg_src src[2] = {
3034 tx_src_param(tx, &tx->insn.src[0]),
3035 tx_src_param(tx, &tx->insn.src[1])
3036 };
3037 assert(tx->insn.src[1].idx >= 0 &&
3038 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3039 target = tx->sampler_targets[tx->insn.src[1].idx];
3040
3041 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3042 return D3D_OK;
3043
3044 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3045 return D3D_OK;
3046 }
3047
DECL_SPECIAL(SETP)3048 DECL_SPECIAL(SETP)
3049 {
3050 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3051 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3052 struct ureg_src src[2] = {
3053 tx_src_param(tx, &tx->insn.src[0]),
3054 tx_src_param(tx, &tx->insn.src[1])
3055 };
3056 ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3057 return D3D_OK;
3058 }
3059
DECL_SPECIAL(BREAKP)3060 DECL_SPECIAL(BREAKP)
3061 {
3062 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3063 ureg_IF(tx->ureg, src, tx_cond(tx));
3064 ureg_BRK(tx->ureg);
3065 tx_endcond(tx);
3066 ureg_ENDIF(tx->ureg);
3067 return D3D_OK;
3068 }
3069
DECL_SPECIAL(PHASE)3070 DECL_SPECIAL(PHASE)
3071 {
3072 return D3D_OK; /* we don't care about phase */
3073 }
3074
DECL_SPECIAL(COMMENT)3075 DECL_SPECIAL(COMMENT)
3076 {
3077 return D3D_OK; /* nothing to do */
3078 }
3079
3080
3081 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3082 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3083
3084 static const struct sm1_op_info inst_table[] =
3085 {
3086 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3087 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3088 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3089 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3090 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3091 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3092 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3093 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3094 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3095 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3096 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3097 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3098 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3099 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3100 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3101 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3102 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3103 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3104 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3105 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3106
3107 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3108 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3109 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3110 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3111 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3112
3113 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3114 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3115 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3116 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3117 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3118 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3119
3120 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3121
3122 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3123 _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3124 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3125 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3126 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3127
3128 _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3129 _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3130
3131 /* More flow control */
3132 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3133 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3134 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3135 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3136 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3137 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3138 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3139 _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3140 /* we don't write to the address register, but a normal register (copied
3141 * when needed to the address register), thus we don't use ARR */
3142 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3143
3144 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3145 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3146
3147 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3148 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3149 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3150 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3151 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3152 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3153 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3154 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3155 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3156 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3157 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3158 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3159 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3160 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3161 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3162 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3163
3164 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3165 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3166 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3167 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3168
3169 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3170
3171 /* More tex stuff */
3172 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3173 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3174 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3175 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3176 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3177 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3178
3179 /* Misc */
3180 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3181 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3182 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3183 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3184 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3185 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3186 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3187 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3188 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3189 };
3190
3191 static const struct sm1_op_info inst_phase =
3192 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3193
3194 static const struct sm1_op_info inst_comment =
3195 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3196
3197 static void
create_op_info_map(struct shader_translator * tx)3198 create_op_info_map(struct shader_translator *tx)
3199 {
3200 const unsigned version = (tx->version.major << 8) | tx->version.minor;
3201 unsigned i;
3202
3203 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3204 tx->op_info_map[i] = -1;
3205
3206 if (tx->processor == PIPE_SHADER_VERTEX) {
3207 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3208 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3209 if (inst_table[i].vert_version.min <= version &&
3210 inst_table[i].vert_version.max >= version)
3211 tx->op_info_map[inst_table[i].sio] = i;
3212 }
3213 } else {
3214 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3215 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3216 if (inst_table[i].frag_version.min <= version &&
3217 inst_table[i].frag_version.max >= version)
3218 tx->op_info_map[inst_table[i].sio] = i;
3219 }
3220 }
3221 }
3222
3223 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3224 NineTranslateInstruction_Generic(struct shader_translator *tx)
3225 {
3226 struct ureg_dst dst[1];
3227 struct ureg_src src[4];
3228 unsigned i;
3229
3230 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3231 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3232 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3233 src[i] = tx_src_param(tx, &tx->insn.src[i]);
3234
3235 ureg_insn(tx->ureg, tx->insn.info->opcode,
3236 dst, tx->insn.ndst,
3237 src, tx->insn.nsrc, 0);
3238 return D3D_OK;
3239 }
3240
3241 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3242 TOKEN_PEEK(struct shader_translator *tx)
3243 {
3244 return *(tx->parse);
3245 }
3246
3247 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3248 TOKEN_NEXT(struct shader_translator *tx)
3249 {
3250 return *(tx->parse)++;
3251 }
3252
3253 static inline void
TOKEN_JUMP(struct shader_translator * tx)3254 TOKEN_JUMP(struct shader_translator *tx)
3255 {
3256 if (tx->parse_next && tx->parse != tx->parse_next) {
3257 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3258 tx->parse = tx->parse_next;
3259 }
3260 }
3261
3262 static inline boolean
sm1_parse_eof(struct shader_translator * tx)3263 sm1_parse_eof(struct shader_translator *tx)
3264 {
3265 return TOKEN_PEEK(tx) == NINED3DSP_END;
3266 }
3267
3268 static void
sm1_read_version(struct shader_translator * tx)3269 sm1_read_version(struct shader_translator *tx)
3270 {
3271 const DWORD tok = TOKEN_NEXT(tx);
3272
3273 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3274 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3275
3276 switch (tok >> 16) {
3277 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3278 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3279 default:
3280 DBG("Invalid shader type: %x\n", tok);
3281 tx->processor = ~0;
3282 break;
3283 }
3284 }
3285
3286 /* This is just to check if we parsed the instruction properly. */
3287 static void
sm1_parse_get_skip(struct shader_translator * tx)3288 sm1_parse_get_skip(struct shader_translator *tx)
3289 {
3290 const DWORD tok = TOKEN_PEEK(tx);
3291
3292 if (tx->version.major >= 2) {
3293 tx->parse_next = tx->parse + 1 /* this */ +
3294 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3295 } else {
3296 tx->parse_next = NULL; /* TODO: determine from param count */
3297 }
3298 }
3299
3300 static void
sm1_print_comment(const char * comment,UINT size)3301 sm1_print_comment(const char *comment, UINT size)
3302 {
3303 if (!size)
3304 return;
3305 /* TODO */
3306 }
3307
3308 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3309 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3310 {
3311 DWORD tok = TOKEN_PEEK(tx);
3312
3313 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3314 {
3315 const char *comment = "";
3316 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3317 tx->parse += size + 1;
3318
3319 if (print)
3320 sm1_print_comment(comment, size);
3321
3322 tok = TOKEN_PEEK(tx);
3323 }
3324 }
3325
3326 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3327 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3328 {
3329 *reg = TOKEN_NEXT(tx);
3330
3331 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3332 {
3333 if (tx->version.major < 2)
3334 *rel = (1 << 31) |
3335 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3336 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3337 D3DSP_NOSWIZZLE;
3338 else
3339 *rel = TOKEN_NEXT(tx);
3340 }
3341 }
3342
3343 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3344 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3345 {
3346 int8_t shift;
3347 dst->file =
3348 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3349 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3350 dst->type = TGSI_RETURN_TYPE_FLOAT;
3351 dst->idx = tok & D3DSP_REGNUM_MASK;
3352 dst->rel = NULL;
3353 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3354 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3355 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3356 dst->shift = (shift & 0x7) - (shift & 0x8);
3357 }
3358
3359 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3360 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3361 {
3362 src->file =
3363 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3364 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3365 src->type = TGSI_RETURN_TYPE_FLOAT;
3366 src->idx = tok & D3DSP_REGNUM_MASK;
3367 src->rel = NULL;
3368 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3369 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3370
3371 switch (src->file) {
3372 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3373 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3374 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3375 default:
3376 break;
3377 }
3378 }
3379
3380 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3381 sm1_parse_immediate(struct shader_translator *tx,
3382 struct sm1_src_param *imm)
3383 {
3384 imm->file = NINED3DSPR_IMMEDIATE;
3385 imm->idx = INT_MIN;
3386 imm->rel = NULL;
3387 imm->swizzle = NINED3DSP_NOSWIZZLE;
3388 imm->mod = 0;
3389 switch (tx->insn.opcode) {
3390 case D3DSIO_DEF:
3391 imm->type = NINED3DSPTYPE_FLOAT4;
3392 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3393 tx->parse += 4;
3394 break;
3395 case D3DSIO_DEFI:
3396 imm->type = NINED3DSPTYPE_INT4;
3397 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3398 tx->parse += 4;
3399 break;
3400 case D3DSIO_DEFB:
3401 imm->type = NINED3DSPTYPE_BOOL;
3402 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3403 tx->parse += 1;
3404 break;
3405 default:
3406 assert(0);
3407 break;
3408 }
3409 }
3410
3411 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3412 sm1_read_dst_param(struct shader_translator *tx,
3413 struct sm1_dst_param *dst,
3414 struct sm1_src_param *rel)
3415 {
3416 DWORD tok_dst, tok_rel = 0;
3417
3418 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3419 sm1_parse_dst_param(dst, tok_dst);
3420 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3421 sm1_parse_src_param(rel, tok_rel);
3422 dst->rel = rel;
3423 }
3424 }
3425
3426 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3427 sm1_read_src_param(struct shader_translator *tx,
3428 struct sm1_src_param *src,
3429 struct sm1_src_param *rel)
3430 {
3431 DWORD tok_src, tok_rel = 0;
3432
3433 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3434 sm1_parse_src_param(src, tok_src);
3435 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3436 assert(rel);
3437 sm1_parse_src_param(rel, tok_rel);
3438 src->rel = rel;
3439 }
3440 }
3441
3442 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3443 sm1_read_semantic(struct shader_translator *tx,
3444 struct sm1_semantic *sem)
3445 {
3446 const DWORD tok_usg = TOKEN_NEXT(tx);
3447 const DWORD tok_dst = TOKEN_NEXT(tx);
3448
3449 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3450 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3451 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3452
3453 sm1_parse_dst_param(&sem->reg, tok_dst);
3454 }
3455
3456 static void
sm1_parse_instruction(struct shader_translator * tx)3457 sm1_parse_instruction(struct shader_translator *tx)
3458 {
3459 struct sm1_instruction *insn = &tx->insn;
3460 HRESULT hr;
3461 DWORD tok;
3462 const struct sm1_op_info *info = NULL;
3463 unsigned i;
3464
3465 sm1_parse_comments(tx, TRUE);
3466 sm1_parse_get_skip(tx);
3467
3468 tok = TOKEN_NEXT(tx);
3469
3470 insn->opcode = tok & D3DSI_OPCODE_MASK;
3471 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3472 insn->coissue = !!(tok & D3DSI_COISSUE);
3473 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3474
3475 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3476 int k = tx->op_info_map[insn->opcode];
3477 if (k >= 0) {
3478 assert(k < ARRAY_SIZE(inst_table));
3479 info = &inst_table[k];
3480 }
3481 } else {
3482 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3483 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3484 }
3485 if (!info) {
3486 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3487 TOKEN_JUMP(tx);
3488 return;
3489 }
3490 insn->info = info;
3491 insn->ndst = info->ndst;
3492 insn->nsrc = info->nsrc;
3493
3494 /* check version */
3495 {
3496 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3497 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3498 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3499 if (ver < min || ver > max) {
3500 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3501 min, ver, max);
3502 return;
3503 }
3504 }
3505
3506 for (i = 0; i < insn->ndst; ++i)
3507 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3508 if (insn->predicated)
3509 sm1_read_src_param(tx, &insn->pred, NULL);
3510 for (i = 0; i < insn->nsrc; ++i)
3511 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3512
3513 /* parse here so we can dump them before processing */
3514 if (insn->opcode == D3DSIO_DEF ||
3515 insn->opcode == D3DSIO_DEFI ||
3516 insn->opcode == D3DSIO_DEFB)
3517 sm1_parse_immediate(tx, &tx->insn.src[0]);
3518
3519 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3520 sm1_instruction_check(insn);
3521
3522 if (insn->predicated) {
3523 tx->predicated_activated = true;
3524 if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3525 tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3526 tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3527 }
3528 }
3529
3530 if (info->handler)
3531 hr = info->handler(tx);
3532 else
3533 hr = NineTranslateInstruction_Generic(tx);
3534 tx_apply_dst0_modifiers(tx);
3535
3536 if (insn->predicated) {
3537 tx->predicated_activated = false;
3538 /* TODO: predicate might be allowed on outputs,
3539 * which cannot be src. Workaround it. */
3540 ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3541 ureg_negate(tx_src_param(tx, &insn->pred)),
3542 ureg_src(tx->regs.predicate_tmp),
3543 ureg_src(tx->regs.predicate_dst));
3544 }
3545
3546 if (hr != D3D_OK)
3547 tx->failure = TRUE;
3548 tx->num_scratch = 0; /* reset */
3549
3550 TOKEN_JUMP(tx);
3551 }
3552
3553 #define GET_CAP(n) screen->get_param( \
3554 screen, PIPE_CAP_##n)
3555 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3556 screen, info->type, PIPE_SHADER_CAP_##n)
3557
3558 static HRESULT
tx_ctor(struct shader_translator * tx,struct pipe_screen * screen,struct nine_shader_info * info)3559 tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3560 {
3561 unsigned i;
3562
3563 memset(tx, 0, sizeof(*tx));
3564
3565 tx->info = info;
3566
3567 tx->byte_code = info->byte_code;
3568 tx->parse = info->byte_code;
3569
3570 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3571 info->input_map[i] = NINE_DECLUSAGE_NONE;
3572 info->num_inputs = 0;
3573
3574 info->position_t = FALSE;
3575 info->point_size = FALSE;
3576
3577 memset(tx->slots_used, 0, sizeof(tx->slots_used));
3578 memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3579 memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3580
3581 tx->info->const_float_slots = 0;
3582 tx->info->const_int_slots = 0;
3583 tx->info->const_bool_slots = 0;
3584
3585 info->sampler_mask = 0x0;
3586 info->rt_mask = 0x0;
3587
3588 info->lconstf.data = NULL;
3589 info->lconstf.ranges = NULL;
3590
3591 info->bumpenvmat_needed = 0;
3592
3593 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3594 tx->regs.rL[i] = ureg_dst_undef();
3595 }
3596 tx->regs.address = ureg_dst_undef();
3597 tx->regs.a0 = ureg_dst_undef();
3598 tx->regs.p = ureg_dst_undef();
3599 tx->regs.oDepth = ureg_dst_undef();
3600 tx->regs.vPos = ureg_src_undef();
3601 tx->regs.vFace = ureg_src_undef();
3602 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3603 tx->regs.o[i] = ureg_dst_undef();
3604 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3605 tx->regs.oCol[i] = ureg_dst_undef();
3606 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3607 tx->regs.vC[i] = ureg_src_undef();
3608 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3609 tx->regs.vT[i] = ureg_src_undef();
3610
3611 sm1_read_version(tx);
3612
3613 info->version = (tx->version.major << 4) | tx->version.minor;
3614
3615 tx->num_outputs = 0;
3616
3617 create_op_info_map(tx);
3618
3619 tx->ureg = ureg_create(info->type);
3620 if (!tx->ureg) {
3621 return E_OUTOFMEMORY;
3622 }
3623
3624 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3625 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3626 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3627 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3628 tx->texcoord_sn = tx->want_texcoord ?
3629 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3630 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3631 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3632
3633 if (IS_VS) {
3634 tx->num_constf_allowed = NINE_MAX_CONST_F;
3635 } else if (tx->version.major < 2) {/* IS_PS v1 */
3636 tx->num_constf_allowed = 8;
3637 } else if (tx->version.major == 2) {/* IS_PS v2 */
3638 tx->num_constf_allowed = 32;
3639 } else {/* IS_PS v3 */
3640 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3641 }
3642
3643 if (tx->version.major < 2) {
3644 tx->num_consti_allowed = 0;
3645 tx->num_constb_allowed = 0;
3646 } else {
3647 tx->num_consti_allowed = NINE_MAX_CONST_I;
3648 tx->num_constb_allowed = NINE_MAX_CONST_B;
3649 }
3650
3651 if (info->swvp_on) {
3652 /* TODO: The values tx->version.major == 1 */
3653 tx->num_constf_allowed = 8192;
3654 tx->num_consti_allowed = 2048;
3655 tx->num_constb_allowed = 2048;
3656 }
3657
3658 /* VS must always write position. Declare it here to make it the 1st output.
3659 * (Some drivers like nv50 are buggy and rely on that.)
3660 */
3661 if (IS_VS) {
3662 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3663 } else {
3664 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3665 if (!tx->shift_wpos)
3666 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3667 }
3668
3669 tx->mul_zero_wins = GET_CAP(TGSI_MUL_ZERO_WINS);
3670 if (tx->mul_zero_wins)
3671 ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
3672
3673 /* Add additional definition of constants */
3674 if (info->add_constants_defs.c_combination) {
3675 unsigned i;
3676
3677 assert(info->add_constants_defs.int_const_added);
3678 assert(info->add_constants_defs.bool_const_added);
3679 /* We only add constants that are used by the shader
3680 * and that are not defined in the shader */
3681 for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3682 if ((*info->add_constants_defs.int_const_added)[i]) {
3683 DBG("Defining const i%i : { %i %i %i %i }\n", i,
3684 info->add_constants_defs.c_combination->const_i[i][0],
3685 info->add_constants_defs.c_combination->const_i[i][1],
3686 info->add_constants_defs.c_combination->const_i[i][2],
3687 info->add_constants_defs.c_combination->const_i[i][3]);
3688 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3689 }
3690 }
3691 for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3692 if ((*info->add_constants_defs.bool_const_added)[i]) {
3693 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3694 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3695 }
3696 }
3697 }
3698 return D3D_OK;
3699 }
3700
3701 static void
tx_dtor(struct shader_translator * tx)3702 tx_dtor(struct shader_translator *tx)
3703 {
3704 if (tx->slot_map)
3705 FREE(tx->slot_map);
3706 if (tx->num_inst_labels)
3707 FREE(tx->inst_labels);
3708 FREE(tx->lconstf);
3709 FREE(tx->regs.r);
3710 FREE(tx);
3711 }
3712
3713 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3714 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3715 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3716 shader_add_vs_viewport_transform(struct shader_translator *tx)
3717 {
3718 struct ureg_program *ureg = tx->ureg;
3719 struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3720 struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3721 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3722
3723 c0 = ureg_src_dimension(c0, 4);
3724 c1 = ureg_src_dimension(c1, 4);
3725 /* TODO: find out when we need to apply the viewport transformation or not.
3726 * Likely will be XYZ vs XYZRHW in vdecl_out
3727 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3728 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3729 */
3730 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3731 }
3732
3733 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_src src_col)3734 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3735 {
3736 struct ureg_program *ureg = tx->ureg;
3737 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3738 struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3739 struct ureg_src fog_vs, fog_color;
3740 struct ureg_dst fog_factor, depth;
3741
3742 if (!tx->info->fog_enable) {
3743 ureg_MOV(ureg, oCol0, src_col);
3744 return;
3745 }
3746
3747 if (tx->info->fog_mode != D3DFOG_NONE) {
3748 depth = tx_scratch_scalar(tx);
3749 /* Depth used for fog is perspective interpolated */
3750 ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3751 ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3752 }
3753
3754 fog_color = nine_float_constant_src(tx, 32);
3755 fog_params = nine_float_constant_src(tx, 33);
3756 fog_factor = tx_scratch_scalar(tx);
3757
3758 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3759 fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3760 fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3761 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3762 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3763 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3764 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3765 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3766 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3767 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3768 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3769 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3770 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3771 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3772 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3773 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3774 } else {
3775 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
3776 TGSI_INTERPOLATE_PERSPECTIVE),
3777 TGSI_SWIZZLE_X);
3778 ureg_MOV(ureg, fog_factor, fog_vs);
3779 }
3780
3781 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3782 tx_src_scalar(fog_factor), src_col, fog_color);
3783 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3784 }
3785
parse_shader(struct shader_translator * tx)3786 static void parse_shader(struct shader_translator *tx)
3787 {
3788 struct nine_shader_info *info = tx->info;
3789
3790 while (!sm1_parse_eof(tx) && !tx->failure)
3791 sm1_parse_instruction(tx);
3792 tx->parse++; /* for byte_size */
3793
3794 if (tx->failure)
3795 return;
3796
3797 if (IS_PS && tx->version.major < 3) {
3798 if (tx->version.major < 2) {
3799 assert(tx->num_temp); /* there must be color output */
3800 info->rt_mask |= 0x1;
3801 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3802 } else {
3803 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3804 }
3805 }
3806
3807 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3808 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
3809 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3810 }
3811
3812 if (info->position_t)
3813 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3814
3815 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3816 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3817 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3818 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3819 info->point_size = TRUE;
3820 }
3821
3822 if (info->process_vertices)
3823 shader_add_vs_viewport_transform(tx);
3824
3825 ureg_END(tx->ureg);
3826 }
3827
3828 #define NINE_SHADER_DEBUG_OPTION_NIR_VS (1 << 0)
3829 #define NINE_SHADER_DEBUG_OPTION_NIR_PS (1 << 1)
3830 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2)
3831 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3)
3832 #define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4)
3833 #define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5)
3834
3835 static const struct debug_named_value nine_shader_debug_options[] = {
3836 { "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS, "Use NIR for vertex shaders even if the driver doesn't prefer it." },
3837 { "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS, "Use NIR for pixel shaders even if the driver doesn't prefer it." },
3838 { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3839 { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3840 { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3841 { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
3842 DEBUG_NAMED_VALUE_END /* must be last */
3843 };
3844
3845 static inline boolean
nine_shader_get_debug_flag(uint64_t flag)3846 nine_shader_get_debug_flag(uint64_t flag)
3847 {
3848 static uint64_t flags = 0;
3849 static boolean first_run = TRUE;
3850
3851 if (unlikely(first_run)) {
3852 first_run = FALSE;
3853 flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3854
3855 // Check old TGSI dump envvar too
3856 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3857 flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3858 }
3859 }
3860
3861 return !!(flags & flag);
3862 }
3863
3864 static void
nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state * state,const struct tgsi_token * tgsi_tokens,struct pipe_screen * screen)3865 nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
3866 struct pipe_screen *screen)
3867 {
3868 struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL);
3869
3870 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
3871 nir_print_shader(nir, stdout);
3872 }
3873
3874 state->type = PIPE_SHADER_IR_NIR;
3875 state->tokens = NULL;
3876 state->ir.nir = nir;
3877 memset(&state->stream_output, 0, sizeof(state->stream_output));
3878 }
3879
3880 static void *
nine_ureg_create_shader(struct ureg_program * ureg,struct pipe_context * pipe,const struct pipe_stream_output_info * so)3881 nine_ureg_create_shader(struct ureg_program *ureg,
3882 struct pipe_context *pipe,
3883 const struct pipe_stream_output_info *so)
3884 {
3885 struct pipe_shader_state state;
3886 const struct tgsi_token *tgsi_tokens;
3887 struct pipe_screen *screen = pipe->screen;
3888
3889 tgsi_tokens = ureg_finalize(ureg);
3890 if (!tgsi_tokens)
3891 return NULL;
3892
3893 assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
3894 enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
3895
3896 int preferred_ir = screen->get_shader_param(screen, shader_type, PIPE_SHADER_CAP_PREFERRED_IR);
3897 bool prefer_nir = (preferred_ir == PIPE_SHADER_IR_NIR);
3898 bool use_nir = prefer_nir ||
3899 ((shader_type == PIPE_SHADER_VERTEX) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS)) ||
3900 ((shader_type == PIPE_SHADER_FRAGMENT) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS));
3901
3902 /* Allow user to override preferred IR, this is very useful for debugging */
3903 if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
3904 use_nir = false;
3905 if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
3906 use_nir = false;
3907
3908 DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n",
3909 shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
3910 prefer_nir ? "NIR" : "TGSI",
3911 use_nir ? "NIR" : "TGSI");
3912
3913 if (use_nir) {
3914 nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
3915 } else {
3916 pipe_shader_state_from_tgsi(&state, tgsi_tokens);
3917 }
3918
3919 assert(state.tokens || state.ir.nir);
3920
3921 if (so)
3922 state.stream_output = *so;
3923
3924 switch (shader_type) {
3925 case PIPE_SHADER_VERTEX:
3926 return pipe->create_vs_state(pipe, &state);
3927 case PIPE_SHADER_FRAGMENT:
3928 return pipe->create_fs_state(pipe, &state);
3929 default:
3930 unreachable("unsupported shader type");
3931 }
3932 }
3933
3934
3935 void *
nine_create_shader_with_so_and_destroy(struct ureg_program * p,struct pipe_context * pipe,const struct pipe_stream_output_info * so)3936 nine_create_shader_with_so_and_destroy(struct ureg_program *p,
3937 struct pipe_context *pipe,
3938 const struct pipe_stream_output_info *so)
3939 {
3940 void *result = nine_ureg_create_shader(p, pipe, so);
3941 ureg_destroy(p);
3942 return result;
3943 }
3944
3945 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)3946 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3947 {
3948 struct shader_translator *tx;
3949 HRESULT hr = D3D_OK;
3950 const unsigned processor = info->type;
3951 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3952 unsigned *const_ranges = NULL;
3953
3954 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3955
3956 tx = MALLOC_STRUCT(shader_translator);
3957 if (!tx)
3958 return E_OUTOFMEMORY;
3959
3960 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
3961 hr = E_OUTOFMEMORY;
3962 goto out;
3963 }
3964
3965 assert(IS_VS || !info->swvp_on);
3966
3967 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3968 hr = D3DERR_INVALIDCALL;
3969 DBG("Unsupported shader version: %u.%u !\n",
3970 tx->version.major, tx->version.minor);
3971 goto out;
3972 }
3973 if (tx->processor != processor) {
3974 hr = D3DERR_INVALIDCALL;
3975 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3976 goto out;
3977 }
3978 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3979 tx->version.major, tx->version.minor);
3980
3981 parse_shader(tx);
3982
3983 if (tx->failure) {
3984 /* For VS shaders, we print the warning later,
3985 * we first try with swvp. */
3986 if (IS_PS)
3987 ERR("Encountered buggy shader\n");
3988 ureg_destroy(tx->ureg);
3989 hr = D3DERR_INVALIDCALL;
3990 goto out;
3991 }
3992
3993 /* Recompile after compacting constant slots if possible */
3994 if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
3995 unsigned *slot_map;
3996 unsigned c;
3997 int i, j, num_ranges, prev;
3998
3999 DBG("Recompiling shader for constant compaction\n");
4000 ureg_destroy(tx->ureg);
4001
4002 if (tx->num_inst_labels)
4003 FREE(tx->inst_labels);
4004 FREE(tx->lconstf);
4005 FREE(tx->regs.r);
4006
4007 num_ranges = 0;
4008 prev = -2;
4009 for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
4010 if (tx->slots_used[i]) {
4011 if (prev != i - 1)
4012 num_ranges++;
4013 prev = i;
4014 }
4015 }
4016 slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned));
4017 const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
4018 if (!slot_map || !const_ranges) {
4019 hr = E_OUTOFMEMORY;
4020 goto out;
4021 }
4022 c = 0;
4023 j = -1;
4024 prev = -2;
4025 for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
4026 if (tx->slots_used[i]) {
4027 if (prev != i - 1)
4028 j++;
4029 /* Initialize first slot of the range */
4030 if (!const_ranges[2*j+1])
4031 const_ranges[2*j] = i;
4032 const_ranges[2*j+1]++;
4033 prev = i;
4034 slot_map[i] = c++;
4035 }
4036 }
4037
4038 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4039 hr = E_OUTOFMEMORY;
4040 goto out;
4041 }
4042 tx->slot_map = slot_map;
4043 parse_shader(tx);
4044 assert(!tx->failure);
4045 #if !defined(NDEBUG)
4046 i = 0;
4047 j = 0;
4048 while (const_ranges[i*2+1] != 0) {
4049 j += const_ranges[i*2+1];
4050 i++;
4051 }
4052 assert(j == tx->num_slots);
4053 #endif
4054 }
4055
4056 /* record local constants */
4057 if (tx->num_lconstf && tx->indirect_const_access) {
4058 struct nine_range *ranges;
4059 float *data;
4060 int *indices;
4061 unsigned i, k, n;
4062
4063 hr = E_OUTOFMEMORY;
4064
4065 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
4066 if (!data)
4067 goto out;
4068 info->lconstf.data = data;
4069
4070 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
4071 if (!indices)
4072 goto out;
4073
4074 /* lazy sort, num_lconstf should be small */
4075 for (n = 0; n < tx->num_lconstf; ++n) {
4076 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
4077 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
4078 k = i;
4079 }
4080 indices[n] = tx->lconstf[k].idx;
4081 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
4082 tx->lconstf[k].idx = INT_MAX;
4083 }
4084
4085 /* count ranges */
4086 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
4087 if (indices[i] != indices[i - 1] + 1)
4088 ++n;
4089 ranges = MALLOC(n * sizeof(ranges[0]));
4090 if (!ranges) {
4091 FREE(indices);
4092 goto out;
4093 }
4094 info->lconstf.ranges = ranges;
4095
4096 k = 0;
4097 ranges[k].bgn = indices[0];
4098 for (i = 1; i < tx->num_lconstf; ++i) {
4099 if (indices[i] != indices[i - 1] + 1) {
4100 ranges[k].next = &ranges[k + 1];
4101 ranges[k].end = indices[i - 1] + 1;
4102 ++k;
4103 ranges[k].bgn = indices[i];
4104 }
4105 }
4106 ranges[k].end = indices[i - 1] + 1;
4107 ranges[k].next = NULL;
4108 assert(n == (k + 1));
4109
4110 FREE(indices);
4111 hr = D3D_OK;
4112 }
4113
4114 /* r500 */
4115 if (info->const_float_slots > device->max_vs_const_f &&
4116 (info->const_int_slots || info->const_bool_slots) &&
4117 !info->swvp_on)
4118 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4119
4120
4121 if (tx->indirect_const_access) { /* vs only */
4122 info->const_float_slots = device->max_vs_const_f;
4123 tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
4124 }
4125
4126 if (!info->swvp_on) {
4127 info->const_used_size = sizeof(float[4]) * tx->num_slots;
4128 if (tx->num_slots)
4129 ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
4130 } else {
4131 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
4132 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
4133 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
4134 ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
4135 }
4136
4137 if (info->process_vertices)
4138 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
4139
4140 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
4141 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
4142 tgsi_dump(toks, 0);
4143 ureg_free_tokens(toks);
4144 }
4145
4146 if (info->process_vertices) {
4147 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
4148 tx->output_info,
4149 tx->num_outputs,
4150 &(info->so));
4151 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
4152 } else
4153 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
4154 if (!info->cso) {
4155 hr = D3DERR_DRIVERINTERNALERROR;
4156 FREE(info->lconstf.data);
4157 FREE(info->lconstf.ranges);
4158 goto out;
4159 }
4160
4161 info->const_ranges = const_ranges;
4162 const_ranges = NULL;
4163 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
4164 out:
4165 if (const_ranges)
4166 FREE(const_ranges);
4167 tx_dtor(tx);
4168 return hr;
4169 }
4170