1 /**********************************************************
2  * Copyright 2008-2009 VMware, Inc.  All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **********************************************************/
25 
26 
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "util/u_memory.h"
30 
31 #include "svga_tgsi_emit.h"
32 
33 
34 /**
35  * Translate TGSI semantic info into SVGA3d semantic info.
36  * This is called for VS outputs and PS inputs only.
37  */
38 static boolean
translate_vs_ps_semantic(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned * usage,unsigned * idx)39 translate_vs_ps_semantic(struct svga_shader_emitter *emit,
40                          struct tgsi_declaration_semantic semantic,
41                          unsigned *usage,
42                          unsigned *idx)
43 {
44    switch (semantic.Name) {
45    case TGSI_SEMANTIC_POSITION:
46       *idx = semantic.Index;
47       *usage = SVGA3D_DECLUSAGE_POSITION;
48       break;
49    case TGSI_SEMANTIC_COLOR:
50       *idx = semantic.Index;
51       *usage = SVGA3D_DECLUSAGE_COLOR;
52       break;
53    case TGSI_SEMANTIC_BCOLOR:
54       *idx = semantic.Index + 2; /* sharing with COLOR */
55       *usage = SVGA3D_DECLUSAGE_COLOR;
56       break;
57    case TGSI_SEMANTIC_FOG:
58       *idx = 0;
59       assert(semantic.Index == 0);
60       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
61       break;
62    case TGSI_SEMANTIC_PSIZE:
63       *idx = semantic.Index;
64       *usage = SVGA3D_DECLUSAGE_PSIZE;
65       break;
66    case TGSI_SEMANTIC_GENERIC:
67       *idx = svga_remap_generic_index(emit->key.generic_remap_table,
68                                       semantic.Index);
69       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
70       break;
71    case TGSI_SEMANTIC_NORMAL:
72       *idx = semantic.Index;
73       *usage = SVGA3D_DECLUSAGE_NORMAL;
74       break;
75    case TGSI_SEMANTIC_CLIPDIST:
76    case TGSI_SEMANTIC_CLIPVERTEX:
77       /* XXX at this time we don't support clip distance or clip vertices */
78       debug_warn_once("unsupported clip distance/vertex attribute\n");
79       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
80       *idx = 0;
81       return TRUE;
82    default:
83       assert(0);
84       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
85       *idx = 0;
86       return FALSE;
87    }
88 
89    return TRUE;
90 }
91 
92 
93 /**
94  * Emit a PS input (or VS depth/fog output) register declaration.
95  * For example, if usage = SVGA3D_DECLUSAGE_TEXCOORD, reg.num = 1, and
96  * index = 3, we'll emit "dcl_texcoord3 v1".
97  */
98 static boolean
emit_decl(struct svga_shader_emitter * emit,SVGA3dShaderDestToken reg,unsigned usage,unsigned index)99 emit_decl(struct svga_shader_emitter *emit,
100           SVGA3dShaderDestToken reg,
101           unsigned usage,
102           unsigned index)
103 {
104    SVGA3DOpDclArgs dcl;
105    SVGA3dShaderInstToken opcode;
106 
107    /* check values against bitfield sizes */
108    assert(index < 16);
109    assert(usage <= SVGA3D_DECLUSAGE_MAX);
110 
111    opcode = inst_token(SVGA3DOP_DCL);
112    dcl.values[0] = 0;
113    dcl.values[1] = 0;
114 
115    dcl.dst = reg;
116    dcl.usage = usage;
117    dcl.index = index;
118    dcl.values[0] |= 1<<31;
119 
120    return (emit_instruction(emit, opcode) &&
121            svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
122 }
123 
124 
125 /**
126  * Emit declaration for PS front/back-face input register.
127  */
128 static boolean
emit_vface_decl(struct svga_shader_emitter * emit)129 emit_vface_decl(struct svga_shader_emitter *emit)
130 {
131    if (!emit->emitted_vface) {
132       SVGA3dShaderDestToken reg =
133          dst_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
134 
135       if (!emit_decl(emit, reg, 0, 0))
136          return FALSE;
137 
138       emit->emitted_vface = TRUE;
139    }
140    return TRUE;
141 }
142 
143 
144 /**
145  * Emit PS input register to pass depth/fog coordinates.
146  * Note that this always goes into texcoord[0].
147  */
148 static boolean
ps30_input_emit_depth_fog(struct svga_shader_emitter * emit,struct src_register * out)149 ps30_input_emit_depth_fog(struct svga_shader_emitter *emit,
150                           struct src_register *out)
151 {
152    struct src_register reg;
153 
154    if (emit->emitted_depth_fog) {
155       *out = emit->ps_depth_fog;
156       return TRUE;
157    }
158 
159    if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
160       return FALSE;
161 
162    reg = src_register(SVGA3DREG_INPUT,
163                        emit->ps30_input_count++);
164 
165    *out = emit->ps_depth_fog = reg;
166 
167    emit->emitted_depth_fog = TRUE;
168 
169    return emit_decl(emit, dst(reg), SVGA3D_DECLUSAGE_TEXCOORD, 0);
170 }
171 
172 
173 /**
174  * Process a PS input declaration.
175  * We'll emit a declaration like "dcl_texcoord1 v2"
176  */
177 static boolean
ps30_input(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)178 ps30_input(struct svga_shader_emitter *emit,
179            struct tgsi_declaration_semantic semantic,
180            unsigned idx)
181 {
182    unsigned usage, index;
183    SVGA3dShaderDestToken reg;
184 
185    if (semantic.Name == TGSI_SEMANTIC_POSITION) {
186 
187       emit->ps_true_pos = src_register(SVGA3DREG_MISCTYPE,
188                                         SVGA3DMISCREG_POSITION);
189       emit->ps_true_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,
190                                                           TGSI_SWIZZLE_Y,
191                                                           TGSI_SWIZZLE_Y,
192                                                           TGSI_SWIZZLE_Y);
193       reg = writemask(dst(emit->ps_true_pos),
194                        TGSI_WRITEMASK_XY);
195       emit->ps_reads_pos = TRUE;
196 
197       if (emit->info.reads_z) {
198          emit->ps_temp_pos = dst_register(SVGA3DREG_TEMP,
199                                            emit->nr_hw_temp);
200 
201          emit->input_map[idx] = src_register(SVGA3DREG_TEMP,
202                                               emit->nr_hw_temp);
203          emit->nr_hw_temp++;
204 
205          if (!ps30_input_emit_depth_fog(emit, &emit->ps_depth_pos))
206             return FALSE;
207 
208          emit->ps_depth_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z,
209                                                               TGSI_SWIZZLE_Z,
210                                                               TGSI_SWIZZLE_Z,
211                                                               TGSI_SWIZZLE_W);
212       }
213       else {
214          emit->input_map[idx] = emit->ps_true_pos;
215       }
216 
217       return emit_decl(emit, reg, 0, 0);
218    }
219    else if (emit->key.fs.light_twoside &&
220             (semantic.Name == TGSI_SEMANTIC_COLOR)) {
221 
222       if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
223          return FALSE;
224 
225       emit->internal_color_idx[emit->internal_color_count] = idx;
226       emit->input_map[idx] =
227          src_register(SVGA3DREG_INPUT, emit->ps30_input_count);
228       emit->ps30_input_count++;
229       emit->internal_color_count++;
230 
231       reg = dst(emit->input_map[idx]);
232 
233       if (!emit_decl(emit, reg, usage, index))
234          return FALSE;
235 
236       semantic.Name = TGSI_SEMANTIC_BCOLOR;
237       if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
238          return FALSE;
239 
240       if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
241          return FALSE;
242 
243       reg = dst_register(SVGA3DREG_INPUT, emit->ps30_input_count++);
244 
245       if (!emit_decl(emit, reg, usage, index))
246          return FALSE;
247 
248       if (!emit_vface_decl(emit))
249          return FALSE;
250 
251       return TRUE;
252    }
253    else if (semantic.Name == TGSI_SEMANTIC_FACE) {
254       if (!emit_vface_decl(emit))
255          return FALSE;
256       emit->emit_frontface = TRUE;
257       emit->internal_frontface_idx = idx;
258       return TRUE;
259    }
260    else if (semantic.Name == TGSI_SEMANTIC_FOG) {
261 
262       assert(semantic.Index == 0);
263 
264       if (!ps30_input_emit_depth_fog(emit, &emit->input_map[idx]))
265          return FALSE;
266 
267       emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,
268                                                              TGSI_SWIZZLE_X,
269                                                              TGSI_SWIZZLE_X,
270                                                              TGSI_SWIZZLE_X);
271       return TRUE;
272    }
273    else {
274 
275       if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
276          return FALSE;
277 
278       if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
279          return FALSE;
280 
281       emit->input_map[idx] =
282          src_register(SVGA3DREG_INPUT, emit->ps30_input_count++);
283 
284       reg = dst(emit->input_map[idx]);
285 
286       if (!emit_decl(emit, reg, usage, index))
287          return FALSE;
288 
289       if (semantic.Name == TGSI_SEMANTIC_GENERIC &&
290           emit->key.sprite_origin_lower_left &&
291           index >= 1 &&
292           emit->key.sprite_coord_enable & (1 << semantic.Index)) {
293          /* This is a sprite texture coord with lower-left origin.
294           * We need to invert the texture T coordinate since the SVGA3D
295           * device only supports an upper-left origin.
296           */
297          unsigned unit = index - 1;
298 
299          emit->inverted_texcoords |= (1 << unit);
300 
301          /* save original texcoord reg */
302          emit->ps_true_texcoord[unit] = emit->input_map[idx];
303 
304          /* this temp register will be the results of the MAD instruction */
305          emit->ps_inverted_texcoord[unit] =
306             src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
307          emit->nr_hw_temp++;
308 
309          emit->ps_inverted_texcoord_input[unit] = idx;
310 
311          /* replace input_map entry with the temp register */
312          emit->input_map[idx] = emit->ps_inverted_texcoord[unit];
313       }
314 
315       return TRUE;
316    }
317 
318 }
319 
320 
321 /**
322  * Process a PS output declaration.
323  * Note that we don't actually emit a SVGA3DOpDcl for PS outputs.
324  * \idx  register index, such as OUT[2] (not semantic index)
325  */
326 static boolean
ps30_output(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)327 ps30_output(struct svga_shader_emitter *emit,
328             struct tgsi_declaration_semantic semantic,
329             unsigned idx)
330 {
331    switch (semantic.Name) {
332    case TGSI_SEMANTIC_COLOR:
333       if (emit->unit == PIPE_SHADER_FRAGMENT) {
334          if (emit->key.fs.white_fragments) {
335             /* Used for XOR logicop mode */
336             emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
337                                                   emit->nr_hw_temp++);
338             emit->temp_color_output[idx] = emit->output_map[idx];
339             emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT,
340                                                         semantic.Index);
341          }
342          else if (emit->key.fs.write_color0_to_n_cbufs) {
343             /* We'll write color output [0] to all render targets.
344              * Prepare all the output registers here, but only when the
345              * semantic.Index == 0 so we don't do this more than once.
346              */
347             if (semantic.Index == 0) {
348                unsigned i;
349                for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) {
350                   emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP,
351                                                      emit->nr_hw_temp++);
352                   emit->temp_color_output[i] = emit->output_map[idx+i];
353                   emit->true_color_output[i] = dst_register(SVGA3DREG_COLOROUT,
354                                                             i);
355                }
356             }
357          }
358          else {
359             emit->output_map[idx] =
360                dst_register(SVGA3DREG_COLOROUT, semantic.Index);
361          }
362       }
363       else {
364          emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT,
365                                                semantic.Index);
366       }
367       break;
368    case TGSI_SEMANTIC_POSITION:
369       emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
370                                             emit->nr_hw_temp++);
371       emit->temp_pos = emit->output_map[idx];
372       emit->true_pos = dst_register(SVGA3DREG_DEPTHOUT,
373                                      semantic.Index);
374       break;
375    default:
376       assert(0);
377       /* A wild stab in the dark. */
378       emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT, 0);
379       break;
380    }
381 
382    return TRUE;
383 }
384 
385 
386 /**
387  * Declare a VS input register.
388  * We still make up the input semantics the same as in 2.0
389  */
390 static boolean
vs30_input(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)391 vs30_input(struct svga_shader_emitter *emit,
392            struct tgsi_declaration_semantic semantic,
393            unsigned idx)
394 {
395    SVGA3DOpDclArgs dcl;
396    SVGA3dShaderInstToken opcode;
397    unsigned usage, index;
398 
399    opcode = inst_token(SVGA3DOP_DCL);
400    dcl.values[0] = 0;
401    dcl.values[1] = 0;
402 
403    emit->input_map[idx] = src_register(SVGA3DREG_INPUT, idx);
404    dcl.dst = dst_register(SVGA3DREG_INPUT, idx);
405 
406    assert(dcl.dst.reserved0);
407 
408    svga_generate_vdecl_semantics(idx, &usage, &index);
409 
410    dcl.usage = usage;
411    dcl.index = index;
412    dcl.values[0] |= 1<<31;
413 
414    return (emit_instruction(emit, opcode) &&
415            svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
416 }
417 
418 
419 /**
420  * Declare VS output for holding depth/fog.
421  */
422 static boolean
vs30_output_emit_depth_fog(struct svga_shader_emitter * emit,SVGA3dShaderDestToken * out)423 vs30_output_emit_depth_fog(struct svga_shader_emitter *emit,
424                            SVGA3dShaderDestToken *out)
425 {
426    SVGA3dShaderDestToken reg;
427 
428    if (emit->emitted_depth_fog) {
429       *out = emit->vs_depth_fog;
430       return TRUE;
431    }
432 
433    reg = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++);
434 
435    *out = emit->vs_depth_fog = reg;
436 
437    emit->emitted_depth_fog = TRUE;
438 
439    return emit_decl(emit, reg, SVGA3D_DECLUSAGE_TEXCOORD, 0);
440 }
441 
442 
443 /**
444  * Declare a VS output.
445  * VS3.0 outputs have proper declarations and semantic info for
446  * matching against PS inputs.
447  */
448 static boolean
vs30_output(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)449 vs30_output(struct svga_shader_emitter *emit,
450             struct tgsi_declaration_semantic semantic,
451             unsigned idx)
452 {
453    SVGA3DOpDclArgs dcl;
454    SVGA3dShaderInstToken opcode;
455    unsigned usage, index;
456 
457    opcode = inst_token(SVGA3DOP_DCL);
458    dcl.values[0] = 0;
459    dcl.values[1] = 0;
460 
461    if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
462       return FALSE;
463 
464    if (emit->vs30_output_count >= SVGA3D_OUTPUTREG_MAX)
465       return FALSE;
466 
467    dcl.dst = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++);
468    dcl.usage = usage;
469    dcl.index = index;
470    dcl.values[0] |= 1<<31;
471 
472    if (semantic.Name == TGSI_SEMANTIC_POSITION) {
473       assert(idx == 0);
474       emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
475                                             emit->nr_hw_temp++);
476       emit->temp_pos = emit->output_map[idx];
477       emit->true_pos = dcl.dst;
478 
479       /* Grab an extra output for the depth output */
480       if (!vs30_output_emit_depth_fog(emit, &emit->depth_pos))
481          return FALSE;
482 
483    }
484    else if (semantic.Name == TGSI_SEMANTIC_PSIZE) {
485       emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
486                                             emit->nr_hw_temp++);
487       emit->temp_psiz = emit->output_map[idx];
488 
489       /* This has the effect of not declaring psiz (below) and not
490        * emitting the final MOV to true_psiz in the postamble.
491        */
492       if (!emit->key.vs.allow_psiz)
493          return TRUE;
494 
495       emit->true_psiz = dcl.dst;
496    }
497    else if (semantic.Name == TGSI_SEMANTIC_FOG) {
498       /*
499        * Fog is shared with depth.
500        * So we need to decrement out_count since emit_depth_fog will increment it.
501        */
502       emit->vs30_output_count--;
503 
504       if (!vs30_output_emit_depth_fog(emit, &emit->output_map[idx]))
505          return FALSE;
506 
507       return TRUE;
508    }
509    else {
510       emit->output_map[idx] = dcl.dst;
511    }
512 
513    return (emit_instruction(emit, opcode) &&
514            svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
515 }
516 
517 
518 /** Translate PIPE_TEXTURE_x to SVGA3DSAMP_x */
519 static ubyte
svga_tgsi_sampler_type(const struct svga_shader_emitter * emit,int idx)520 svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
521 {
522    switch (emit->sampler_target[idx]) {
523    case TGSI_TEXTURE_1D:
524       return SVGA3DSAMP_2D;
525    case TGSI_TEXTURE_2D:
526    case TGSI_TEXTURE_RECT:
527       return SVGA3DSAMP_2D;
528    case TGSI_TEXTURE_SHADOW2D:
529       return SVGA3DSAMP_2D_SHADOW;
530    case TGSI_TEXTURE_3D:
531       return SVGA3DSAMP_VOLUME;
532    case TGSI_TEXTURE_CUBE:
533       return SVGA3DSAMP_CUBE;
534    }
535 
536    return SVGA3DSAMP_UNKNOWN;
537 }
538 
539 
540 static boolean
ps30_sampler(struct svga_shader_emitter * emit,unsigned idx)541 ps30_sampler(struct svga_shader_emitter *emit,
542               unsigned idx)
543 {
544    SVGA3DOpDclArgs dcl;
545    SVGA3dShaderInstToken opcode;
546 
547    opcode = inst_token(SVGA3DOP_DCL);
548    dcl.values[0] = 0;
549    dcl.values[1] = 0;
550 
551    dcl.dst = dst_register(SVGA3DREG_SAMPLER, idx);
552    dcl.type = svga_tgsi_sampler_type(emit, idx);
553    dcl.values[0] |= 1<<31;
554 
555    return (emit_instruction(emit, opcode) &&
556            svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
557 }
558 
559 
560 boolean
svga_shader_emit_samplers_decl(struct svga_shader_emitter * emit)561 svga_shader_emit_samplers_decl(struct svga_shader_emitter *emit)
562 {
563    unsigned i;
564 
565    for (i = 0; i < emit->num_samplers; i++) {
566       if (!ps30_sampler(emit, i))
567          return FALSE;
568    }
569    return TRUE;
570 }
571 
572 
573 boolean
svga_translate_decl_sm30(struct svga_shader_emitter * emit,const struct tgsi_full_declaration * decl)574 svga_translate_decl_sm30(struct svga_shader_emitter *emit,
575                          const struct tgsi_full_declaration *decl)
576 {
577    unsigned first = decl->Range.First;
578    unsigned last = decl->Range.Last;
579    unsigned idx;
580 
581    for (idx = first; idx <= last; idx++) {
582       boolean ok = TRUE;
583 
584       switch (decl->Declaration.File) {
585       case TGSI_FILE_SAMPLER:
586          assert (emit->unit == PIPE_SHADER_FRAGMENT);
587          /* just keep track of the number of samplers here.
588           * Will emit the declaration in the helpers function.
589           */
590          emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
591          break;
592 
593       case TGSI_FILE_INPUT:
594          if (emit->unit == PIPE_SHADER_VERTEX)
595             ok = vs30_input(emit, decl->Semantic, idx);
596          else
597             ok = ps30_input(emit, decl->Semantic, idx);
598          break;
599 
600       case TGSI_FILE_OUTPUT:
601          if (emit->unit == PIPE_SHADER_VERTEX)
602             ok = vs30_output(emit, decl->Semantic, idx);
603          else
604             ok = ps30_output(emit, decl->Semantic, idx);
605          break;
606 
607       case TGSI_FILE_SAMPLER_VIEW:
608          {
609             unsigned unit = decl->Range.First;
610             assert(decl->Range.First == decl->Range.Last);
611             emit->sampler_target[unit] = decl->SamplerView.Resource;
612          }
613          break;
614 
615       default:
616          /* don't need to declare other vars */
617          ok = TRUE;
618       }
619 
620       if (!ok)
621          return FALSE;
622    }
623 
624    return TRUE;
625 }
626